]> git.proxmox.com Git - mirror_iproute2.git/commitdiff
(Logical change 1.3)
authorosdl.org!shemminger <osdl.org!shemminger>
Thu, 15 Apr 2004 20:56:59 +0000 (20:56 +0000)
committerosdl.org!shemminger <osdl.org!shemminger>
Thu, 15 Apr 2004 20:56:59 +0000 (20:56 +0000)
137 files changed:
Config
Makefile
Modules/Catalogue
Modules/tcp_diag.c
Patches/Catalogue
Patches/af_unix.dif
Patches/pidentd-3.0.12.dif
Patches/rt_cache_stat.dif
Patches/symbol_exports.dif
README
README.decnet
README.iproute2+tc
RELNOTES
doc/Makefile
doc/Plan
doc/SNAPSHOT.tex
doc/api-ip6-flowlabels.tex
doc/arpd.sgml
doc/do-psnup
doc/ip-cref.tex
doc/ip-tunnels.tex
doc/nstat.sgml
doc/preamble.tex
doc/rtstat.sgml
doc/ss.sgml
etc/iproute2/rt_dsfield
etc/iproute2/rt_protos
etc/iproute2/rt_realms
etc/iproute2/rt_scopes
etc/iproute2/rt_tables
examples/SYN-DoS.rate.limit
examples/cbqinit.eth1
examples/dhcp-client-script
examples/diffserv/Edge1
examples/diffserv/Edge2
examples/diffserv/Edge31-ca-u32
examples/diffserv/Edge31-cb-chains
examples/diffserv/Edge32-ca-u32
examples/diffserv/Edge32-cb-chains
examples/diffserv/Edge32-cb-u32
examples/diffserv/README
examples/diffserv/afcbq
examples/diffserv/ef-prio
examples/diffserv/efcbq
examples/diffserv/regression-testing
include-glibc/bits/sockunion.h
include-glibc/db.h
include-glibc/glibc-bugs.h
include-glibc/netinet/in.h
include-glibc/netinet/ip.h
include-glibc/socketbits.h
include/SNAPSHOT.h
include/libnetlink.h
include/ll_map.h
include/rt_names.h
include/rtm_map.h
include/tcp_diag.h
include/utils.h
ip/Makefile
ip/ifcfg
ip/ip.c
ip/ip_common.h
ip/ipaddress.c
ip/iplink.c
ip/ipmaddr.c
ip/ipmonitor.c
ip/ipmroute.c
ip/ipneigh.c
ip/iproute.c
ip/iprule.c
ip/iptunnel.c
ip/routef
ip/routel
ip/rtm_map.c
ip/rtmon.c
ip/rtpr
lib/Makefile
lib/dnet_ntop.c
lib/dnet_pton.c
lib/inet_ntop.c
lib/inet_proto.c
lib/inet_pton.c
lib/ipx_ntop.c
lib/ipx_pton.c
lib/libnetlink.c
lib/ll_addr.c
lib/ll_map.c
lib/ll_proto.c
lib/ll_types.c
lib/rt_names.c
lib/utils.c
misc/Makefile
misc/arpd.c
misc/ifstat.c
misc/netbug
misc/nstat.c
misc/rtacct.c
misc/rtstat.c
misc/ss.c
misc/ssfilter.h
misc/ssfilter.y
tc/Makefile
tc/README.last
tc/f_fw.c
tc/f_route.c
tc/f_rsvp.c
tc/f_tcindex.c
tc/f_u32.c
tc/m_estimator.c
tc/m_police.c
tc/q_atm.c
tc/q_cbq.c
tc/q_csz.c
tc/q_dsmark.c
tc/q_fifo.c
tc/q_gred.c
tc/q_hfsc.c
tc/q_hpfq.c
tc/q_ingress.c
tc/q_prio.c
tc/q_red.c
tc/q_sfq.c
tc/q_tbf.c
tc/tc.c
tc/tc_cbq.c
tc/tc_cbq.h
tc/tc_class.c
tc/tc_common.h
tc/tc_core.c
tc/tc_core.h
tc/tc_estimator.c
tc/tc_filter.c
tc/tc_qdisc.c
tc/tc_red.c
tc/tc_red.h
tc/tc_util.c
tc/tc_util.h

diff --git a/Config b/Config
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ca6cdcea700f5a7b183bc929633146be94a0dd1a 100644 (file)
--- a/Config
+++ b/Config
@@ -0,0 +1,2 @@
+TC_CONFIG_DIFFSERV=n
+TC_CONFIG_ATM=n
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..05063e775462c495012498b1b8322254eb61dffc 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -0,0 +1,77 @@
+# Path to parent kernel include files directory
+DESTDIR=
+SBINDIR=/sbin
+CONFDIR=/etc/iproute2
+DOCDIR=/usr/doc/iproute2
+
+KERNEL_INCLUDE=/usr/src/linux/include
+LIBC_INCLUDE=/usr/include
+
+DEFINES= -DRESOLVE_HOSTNAMES
+
+#options if you have a bind>=4.9.4 libresolv (or, maybe, glibc)
+LDLIBS=-lresolv
+ADDLIB=
+
+#options if you compile with libc5, and without a bind>=4.9.4 libresolv
+#LDLIBS=
+#ADDLIB=inet_ntop.o inet_pton.o
+
+#options for decnet
+ADDLIB+=dnet_ntop.o dnet_pton.o
+
+#options for ipx
+ADDLIB+=ipx_ntop.o ipx_pton.o
+
+ifeq ($(LIBC_INCLUDE)/socketbits.h,$(wildcard $(LIBC_INCLUDE)/socketbits.h))
+  ifeq ($(LIBC_INCLUDE)/net/if_packet.h,$(wildcard $(LIBC_INCLUDE)/net/if_packet.h))
+    GLIBCFIX=-I../include-glibc -include ../include-glibc/glibc-bugs.h
+  endif
+endif
+ifeq ($(LIBC_INCLUDE)/bits/socket.h,$(wildcard $(LIBC_INCLUDE)/bits/socket.h))
+  GLIBCFIX=-I../include-glibc -I/usr/include/db3 -include ../include-glibc/glibc-bugs.h
+endif
+
+
+CC = gcc
+CCOPTS = -D_GNU_SOURCE -O2 -Wstrict-prototypes -Wall -g
+CFLAGS = $(CCOPTS) $(GLIBCFIX) -I$(KERNEL_INCLUDE) -I../include $(DEFINES)
+
+LDLIBS += -L../lib -lnetlink -lutil
+
+SUBDIRS=lib ip tc misc
+
+LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
+
+all: check-kernel
+       @set -e; \
+       for i in $(SUBDIRS); \
+       do $(MAKE) -C $$i; done
+
+check-kernel:
+ifeq ($(KERNEL_INCLUDE),)
+       @echo "Please, set correct KERNEL_INCLUDE"; false
+else
+       @set -e; \
+       if [ ! -r $(KERNEL_INCLUDE)/linux/autoconf.h ]; then \
+               echo "Please, compile the kernel first"; false; fi
+endif
+
+install: all
+       install -m 0755 -d $(DESTDIR)$(SBINDIR)
+       install -m 0755 -d $(DESTDIR)$(CONFDIR)
+       install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
+       install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
+       install -m 0644 README.iproute2+tc $(shell find examples -type f -maxdepth 1) $(DESTDIR)$(DOCDIR)/examples
+       install -m 0644 $(shell echo examples/diffserv/*) $(DESTDIR)$(DOCDIR)/examples/diffserv
+       @for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done
+       @cd etc/iproute2; for i in *; do \
+               if [ ! -e $(DESTDIR)$(CONFDIR)/$$i ]; then \
+                       echo install -m 0644 $$i $(DESTDIR)$(CONFDIR); \
+                       install -m 0644 $$i $(DESTDIR)$(CONFDIR); fi; done
+
+clean:
+       for i in $(SUBDIRS) doc; \
+       do $(MAKE) -C $$i clean; done
+
+.EXPORT_ALL_VARIABLES:
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e5d2d0f25d167ae4cf4e390d1544ec86c59c568d 100644 (file)
@@ -0,0 +1,7 @@
+File:          tcp_diag.c
+Status:                desired for kernels < 2.4.17
+               not needed for kernels >= 2.4.17
+Description:   adds tcpdiag facility to kernel to accelerate ss utility
+               and pidentd
+Side effects:  none
\ No newline at end of file
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e11e221d0ac68ea6b701e1f1d6e445b38e6cf214 100644 (file)
@@ -0,0 +1,623 @@
+/*
+ * tcp_diag.c  Module for monitoring TCP sockets.
+ *
+ * Version:    $
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/random.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+#include <net/inet_common.h>
+
+#include <linux/inet.h>
+#include <linux/stddef.h>
+
+#include "tcp_diag.h"
+
+static struct sock *tcpnl;
+
+
+#define TCPDIAG_PUT(skb, attrtype, attrlen) \
+({ int rtalen = RTA_LENGTH(attrlen);        \
+   struct rtattr *rta;                      \
+   if (skb_tailroom(skb) < RTA_ALIGN(rtalen)) goto nlmsg_failure; \
+   rta = (void*)__skb_put(skb, RTA_ALIGN(rtalen)); \
+   rta->rta_type = attrtype;                \
+   rta->rta_len = rtalen;                   \
+   RTA_DATA(rta); })
+
+static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
+                       int ext, u32 pid, u32 seq)
+{
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       struct tcpdiagmsg *r;
+       struct nlmsghdr  *nlh;
+       struct tcp_info  *info = NULL;
+       struct tcpdiag_meminfo  *minfo = NULL;
+       unsigned char    *b = skb->tail;
+
+       nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
+       r = NLMSG_DATA(nlh);
+       if (sk->state != TCP_TIME_WAIT) {
+               if (ext & (1<<(TCPDIAG_MEMINFO-1)))
+                       minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo));
+               if (ext & (1<<(TCPDIAG_INFO-1)))
+                       info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
+       }
+       r->tcpdiag_family = sk->family;
+       r->tcpdiag_state = sk->state;
+       r->tcpdiag_timer = 0;
+       r->tcpdiag_retrans = 0;
+
+       r->id.tcpdiag_sport = sk->sport;
+       r->id.tcpdiag_dport = sk->dport;
+       r->id.tcpdiag_src[0] = sk->rcv_saddr;
+       r->id.tcpdiag_dst[0] = sk->daddr;
+       r->id.tcpdiag_if = sk->bound_dev_if;
+       *((struct sock **)&r->id.tcpdiag_cookie) = sk;
+
+       if (r->tcpdiag_state == TCP_TIME_WAIT) {
+               struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk;
+               long tmo = tw->ttd - jiffies;
+               if (tmo < 0)
+                       tmo = 0;
+
+               r->tcpdiag_state = tw->substate;
+               r->tcpdiag_timer = 3;
+               r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ;
+               r->tcpdiag_rqueue = 0;
+               r->tcpdiag_wqueue = 0;
+               r->tcpdiag_uid = 0;
+               r->tcpdiag_inode = 0;
+#ifdef CONFIG_IPV6
+               if (r->tcpdiag_family == AF_INET6) {
+                       memcpy(r->id.tcpdiag_src, &tw->v6_rcv_saddr, 16);
+                       memcpy(r->id.tcpdiag_dst, &tw->v6_daddr, 16);
+               }
+#endif
+               nlh->nlmsg_len = skb->tail - b;
+               return skb->len;
+       }
+
+#ifdef CONFIG_IPV6
+       if (r->tcpdiag_family == AF_INET6) {
+               memcpy(r->id.tcpdiag_src, &sk->net_pinfo.af_inet6.rcv_saddr, 16);
+               memcpy(r->id.tcpdiag_dst, &sk->net_pinfo.af_inet6.daddr, 16);
+       }
+#endif
+
+#define EXPIRES_IN_MS(tmo)  ((tmo-jiffies)*1000+HZ-1)/HZ
+
+       if (tp->pending == TCP_TIME_RETRANS) {
+               r->tcpdiag_timer = 1;
+               r->tcpdiag_retrans = tp->retransmits;
+               r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
+       } else if (tp->pending == TCP_TIME_PROBE0) {
+               r->tcpdiag_timer = 4;
+               r->tcpdiag_retrans = tp->probes_out;
+               r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
+       } else if (timer_pending(&sk->timer)) {
+               r->tcpdiag_timer = 2;
+               r->tcpdiag_retrans = tp->probes_out;
+               r->tcpdiag_expires = EXPIRES_IN_MS(sk->timer.expires);
+       } else {
+               r->tcpdiag_timer = 0;
+               r->tcpdiag_expires = 0;
+       }
+#undef EXPIRES_IN_MS
+
+       r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
+       r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
+       r->tcpdiag_uid = sock_i_uid(sk);
+       r->tcpdiag_inode = sock_i_ino(sk);
+
+       if (minfo) {
+               minfo->tcpdiag_rmem = atomic_read(&sk->rmem_alloc);
+               minfo->tcpdiag_wmem = sk->wmem_queued;
+               minfo->tcpdiag_fmem = sk->forward_alloc;
+               minfo->tcpdiag_tmem = atomic_read(&sk->wmem_alloc);
+       }
+
+       if (info) {
+               u32 now = tcp_time_stamp;
+
+               info->tcpi_state = sk->state;
+               info->tcpi_ca_state = tp->ca_state;
+               info->tcpi_retransmits = tp->retransmits;
+               info->tcpi_probes = tp->probes_out;
+               info->tcpi_backoff = tp->backoff;
+               info->tcpi_options = 0;
+               if (tp->tstamp_ok)
+                       info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
+               if (tp->sack_ok)
+                       info->tcpi_options |= TCPI_OPT_SACK;
+               if (tp->wscale_ok) {
+                       info->tcpi_options |= TCPI_OPT_WSCALE;
+                       info->tcpi_snd_wscale = tp->snd_wscale;
+                       info->tcpi_rcv_wscale = tp->rcv_wscale;
+               } else {
+                       info->tcpi_snd_wscale = 0;
+                       info->tcpi_rcv_wscale = 0;
+               }
+#ifdef CONFIG_INET_ECN
+               if (tp->ecn_flags&TCP_ECN_OK)
+                       info->tcpi_options |= TCPI_OPT_ECN;
+#endif
+
+               info->tcpi_rto = (1000000*tp->rto)/HZ;
+               info->tcpi_ato = (1000000*tp->ack.ato)/HZ;
+               info->tcpi_snd_mss = tp->mss_cache;
+               info->tcpi_rcv_mss = tp->ack.rcv_mss;
+
+               info->tcpi_unacked = tp->packets_out;
+               info->tcpi_sacked = tp->sacked_out;
+               info->tcpi_lost = tp->lost_out;
+               info->tcpi_retrans = tp->retrans_out;
+               info->tcpi_fackets = tp->fackets_out;
+
+               info->tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ;
+               info->tcpi_last_ack_sent = 0;
+               info->tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ;
+               info->tcpi_last_ack_recv = ((now - tp->rcv_tstamp)*1000)/HZ;
+
+               info->tcpi_pmtu = tp->pmtu_cookie;
+               info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
+               info->tcpi_rtt = ((1000000*tp->srtt)/HZ)>>3;
+               info->tcpi_rttvar = ((1000000*tp->mdev)/HZ)>>2;
+               info->tcpi_snd_ssthresh = tp->snd_ssthresh;
+               info->tcpi_snd_cwnd = tp->snd_cwnd;
+               info->tcpi_advmss = tp->advmss;
+               info->tcpi_reordering = tp->reordering;
+       }
+
+       nlh->nlmsg_len = skb->tail - b;
+       return skb->len;
+
+nlmsg_failure:
+       skb_trim(skb, b - skb->data);
+       return -1;
+}
+
+extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
+#ifdef CONFIG_IPV6
+extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
+                                 struct in6_addr *daddr, u16 dport,
+                                 int dif);
+#endif
+
+static int tcpdiag_get_exact(struct sk_buff *in_skb, struct nlmsghdr *nlh)
+{
+       int err;
+       struct sock *sk;
+       struct tcpdiagreq *req = NLMSG_DATA(nlh);
+       struct sk_buff *rep;
+
+       if (req->tcpdiag_family == AF_INET) {
+               sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport,
+                                  req->id.tcpdiag_src[0], req->id.tcpdiag_sport,
+                                  req->id.tcpdiag_if);
+       }
+#ifdef CONFIG_IPV6
+       else if (req->tcpdiag_family == AF_INET6) {
+               sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport,
+                                  (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport,
+                                  req->id.tcpdiag_if);
+       }
+#endif
+       else {
+               return -EINVAL;
+       }
+
+       if (sk == NULL)
+               return -ENOENT;
+
+       err = -ESTALE;
+       if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE ||
+            req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) &&
+           sk != *((struct sock **)&req->id.tcpdiag_cookie[0]))
+               goto out;
+
+       err = -ENOMEM;
+       rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+
+                                   sizeof(struct tcpdiag_meminfo)+
+                                   sizeof(struct tcp_info)+64), GFP_KERNEL);
+       if (!rep)
+               goto out;
+
+       if (tcpdiag_fill(rep, sk, req->tcpdiag_ext,
+                        NETLINK_CB(in_skb).pid,
+                        nlh->nlmsg_seq) <= 0)
+               BUG();
+
+       err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+       if (err > 0)
+               err = 0;
+
+out:
+       if (sk) {
+               if (sk->state == TCP_TIME_WAIT)
+                       tcp_tw_put((struct tcp_tw_bucket*)sk);
+               else
+                       sock_put(sk);
+       }
+       return err;
+}
+
+int bitstring_match(u32 *a1, u32 *a2, int bits)
+{
+       int words = bits >> 5;
+
+       bits &= 0x1f;
+
+       if (words) {
+               if (memcmp(a1, a2, words << 2))
+                       return 0;
+       }
+       if (bits) {
+               __u32 w1, w2;
+               __u32 mask;
+
+               w1 = a1[words];
+               w2 = a2[words];
+
+               mask = htonl((0xffffffff) << (32 - bits));
+
+               if ((w1 ^ w2) & mask)
+                       return 0;
+       }
+
+       return 1;
+}
+
+
+int tcpdiag_bc_run(char *bc, int len, struct sock *sk)
+{
+       while (len > 0) {
+               int yes = 1;
+               struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
+
+               switch (op->code) {
+               case TCPDIAG_BC_NOP:
+                       break;
+               case TCPDIAG_BC_JMP:
+                       yes = 0;
+                       break;
+               case TCPDIAG_BC_S_GE:
+                       yes = (sk->num >= op[1].no);
+                       break;
+               case TCPDIAG_BC_S_LE:
+                       yes = (sk->num <= op[1].no);
+                       break;
+               case TCPDIAG_BC_D_GE:
+                       yes = (ntohs(sk->dport) >= op[1].no);
+                       break;
+               case TCPDIAG_BC_D_LE:
+                       yes = (ntohs(sk->dport) <= op[1].no);
+                       break;
+               case TCPDIAG_BC_AUTO:
+                       yes = !(sk->userlocks&SOCK_BINDPORT_LOCK);
+                       break;
+               case TCPDIAG_BC_S_COND:
+               case TCPDIAG_BC_D_COND:
+               {
+                       struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1);
+                       u32 *addr;
+
+                       if (cond->port != -1 &&
+                           cond->port != (op->code == TCPDIAG_BC_S_COND ? sk->num : ntohs(sk->dport))) {
+                               yes = 0;
+                               break;
+                       }
+                       
+                       if (cond->prefix_len == 0)
+                               break;
+
+                       if (sk->family == AF_INET6) {
+                               if (op->code == TCPDIAG_BC_S_COND)
+                                       addr = (u32*)&sk->net_pinfo.af_inet6.rcv_saddr;
+                               else
+                                       addr = (u32*)&sk->net_pinfo.af_inet6.daddr;
+                       } else {
+                               if (op->code == TCPDIAG_BC_S_COND)
+                                       addr = &sk->rcv_saddr;
+                               else
+                                       addr = &sk->daddr;
+                       }
+
+                       if (bitstring_match(addr, cond->addr, cond->prefix_len))
+                               break;
+                       if (sk->family == AF_INET6 && cond->family == AF_INET) {
+                               if (addr[0] == 0 && addr[1] == 0 &&
+                                   addr[2] == __constant_htonl(0xffff) &&
+                                   bitstring_match(addr+3, cond->addr, cond->prefix_len))
+                                       break;
+                       }
+                       yes = 0;
+                       break;
+               }
+               }
+
+               if (yes) { 
+                       len -= op->yes;
+                       bc += op->yes;
+               } else {
+                       len -= op->no;
+                       bc += op->no;
+               }
+       }
+       return (len == 0);
+}
+
+int valid_cc(char *bc, int len, int cc)
+{
+       while (len >= 0) {
+               struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
+
+               if (cc > len)
+                       return 0;
+               if (cc == len)
+                       return 1;
+               if (op->yes < 4)
+                       return 0;
+               len -= op->yes;
+               bc  += op->yes;
+       }
+       return 0;
+}
+
+int tcpdiag_bc_audit(char *bytecode, int bytecode_len)
+{
+       char *bc = bytecode;
+       int  len = bytecode_len;
+
+       while (len > 0) {
+               struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
+
+//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
+               switch (op->code) {
+               case TCPDIAG_BC_AUTO:
+               case TCPDIAG_BC_S_COND:
+               case TCPDIAG_BC_D_COND:
+               case TCPDIAG_BC_S_GE:
+               case TCPDIAG_BC_S_LE:
+               case TCPDIAG_BC_D_GE:
+               case TCPDIAG_BC_D_LE:
+                       if (op->yes < 4 || op->yes > len+4)
+                               return -EINVAL;
+               case TCPDIAG_BC_JMP:
+                       if (op->no < 4 || op->no > len+4)
+                               return -EINVAL;
+                       if (op->no < len &&
+                           !valid_cc(bytecode, bytecode_len, len-op->no))
+                               return -EINVAL;
+                       break;
+               case TCPDIAG_BC_NOP:
+                       if (op->yes < 4 || op->yes > len+4)
+                               return -EINVAL;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               bc += op->yes;
+               len -= op->yes;
+       }
+       return len == 0 ? 0 : -EINVAL;
+}
+
+
+int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       int i, num;
+       int s_i, s_num;
+       struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
+       struct rtattr *bc = NULL;
+
+       if (cb->nlh->nlmsg_len > 4+NLMSG_SPACE(sizeof(struct tcpdiagreq)))
+               bc = (struct rtattr*)(r+1);
+
+       s_i = cb->args[1];
+       s_num = num = cb->args[2];
+
+       if (cb->args[0] == 0) {
+               if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
+                       goto skip_listen_ht;
+               tcp_listen_lock();
+               for (i = s_i; i < TCP_LHTABLE_SIZE; i++) {
+                       struct sock *sk = tcp_listening_hash[i];
+
+                       if (i > s_i)
+                               s_num = 0;
+
+                       for (sk = tcp_listening_hash[i], num = 0;
+                            sk != NULL;
+                            sk = sk->next, num++) {
+                               if (num < s_num)
+                                       continue;
+                               if (!(r->tcpdiag_states&TCPF_LISTEN) ||
+                                   r->id.tcpdiag_dport)
+                                       continue;
+                               if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport)
+                                       continue;
+                               if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
+                                       continue;
+                               if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
+                                                NETLINK_CB(cb->skb).pid,
+                                                cb->nlh->nlmsg_seq) <= 0) {
+                                       tcp_listen_unlock();
+                                       goto done;
+                               }
+                       }
+               }
+               tcp_listen_unlock();
+skip_listen_ht:
+               cb->args[0] = 1;
+               s_i = num = s_num = 0;
+       }
+
+       if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV)))
+               return skb->len;
+
+       for (i = s_i; i < tcp_ehash_size; i++) {
+               struct tcp_ehash_bucket *head = &tcp_ehash[i];
+               struct sock *sk;
+
+               if (i > s_i)
+                       s_num = 0;
+
+               read_lock_bh(&head->lock);
+
+               for (sk = head->chain, num = 0;
+                    sk != NULL;
+                    sk = sk->next, num++) {
+                       if (num < s_num)
+                               continue;
+                       if (!(r->tcpdiag_states&(1<<sk->state)))
+                               continue;
+                       if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport)
+                               continue;
+                       if (r->id.tcpdiag_dport != sk->dport && r->id.tcpdiag_dport)
+                               continue;
+                       if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
+                               continue;
+                       if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
+                                        NETLINK_CB(cb->skb).pid,
+                                        cb->nlh->nlmsg_seq) <= 0) {
+                               read_unlock_bh(&head->lock);
+                               goto done;
+                       }
+               }
+
+               if (r->tcpdiag_states&TCPF_TIME_WAIT) {
+                       for (sk = tcp_ehash[i+tcp_ehash_size].chain;
+                            sk != NULL;
+                            sk = sk->next, num++) {
+                               if (num < s_num)
+                                       continue;
+                               if (!(r->tcpdiag_states&(1<<sk->zapped)))
+                                       continue;
+                               if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport)
+                                       continue;
+                               if (r->id.tcpdiag_dport != sk->dport && r->id.tcpdiag_dport)
+                                       continue;
+                               if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
+                                       continue;
+                               if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
+                                                NETLINK_CB(cb->skb).pid,
+                                                cb->nlh->nlmsg_seq) <= 0) {
+                                       read_unlock_bh(&head->lock);
+                                       goto done;
+                               }
+                       }
+               }
+               read_unlock_bh(&head->lock);
+       }
+
+done:
+       cb->args[1] = i;
+       cb->args[2] = num;
+       return skb->len;
+}
+
+static int tcpdiag_dump_done(struct netlink_callback *cb)
+{
+       return 0;
+}
+
+
+static __inline__ int
+tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+       if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
+               return 0;
+
+       if (nlh->nlmsg_type != TCPDIAG_GETSOCK)
+               goto err_inval;
+
+       if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len)
+               goto err_inval;
+
+       if (nlh->nlmsg_flags&NLM_F_DUMP) {
+               if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) {
+                       struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq));
+                       if (rta->rta_type != TCPDIAG_REQ_BYTECODE ||
+                           rta->rta_len < 8 ||
+                           rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq)))
+                               goto err_inval;
+                       if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
+                               goto err_inval;
+               }
+               return netlink_dump_start(tcpnl, skb, nlh,
+                                         tcpdiag_dump,
+                                         tcpdiag_dump_done);
+       } else {
+               return tcpdiag_get_exact(skb, nlh);
+       }
+
+err_inval:
+       return -EINVAL;
+}
+
+
+extern __inline__ void tcpdiag_rcv_skb(struct sk_buff *skb)
+{
+       int err;
+       struct nlmsghdr * nlh;
+
+       if (skb->len >= NLMSG_SPACE(0)) {
+               nlh = (struct nlmsghdr *)skb->data;
+               if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+                       return;
+               err = tcpdiag_rcv_msg(skb, nlh);
+               if (err) 
+                       netlink_ack(skb, nlh, err);
+       }
+}
+
+static void tcpdiag_rcv(struct sock *sk, int len)
+{
+       struct sk_buff *skb;
+
+       while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+               tcpdiag_rcv_skb(skb);
+               kfree_skb(skb);
+       }
+}
+
+static int __init tcpdiag_init(void)
+{
+       tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv);
+       if (tcpnl == NULL)
+               return -EBUSY;
+       return 0;
+}
+
+static void __exit tcpdiag_exit(void)
+{
+       printk(KERN_INFO "Caution: unloading tcp_diag is not very well supported. Nothing to worry, but yet.\n");
+       if (tcpnl)
+               sock_release(tcpnl->socket);
+}
+
+module_init(tcpdiag_init);
+module_exit(tcpdiag_exit);
+
+/*
+ * Local variables:
+ * compile-command: "gcc -DMOPS -DMODULE -D__KERNEL__ -I../include -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -c tcp_diag.c"
+ * End:
+ */
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8e1927914b5701ec46d6f2152225e46f0d4662ec 100644 (file)
@@ -0,0 +1,46 @@
+File:          rt_cache_stat.dif
+Apply to:      kernel < 2.4.7
+Status:                recommended for kernels < 2.4.7.
+               already present in >= 2.4.7
+Description:   tracing efficiency of routing cache
+Side effects:  none
+
+File:          pidentd-3.0.12.dif
+Apply to:      pident-3.0.12 tree f.e. from am redhat rpm
+Status:                highly recommended
+Description:   Patch to pidentd allowing to use tcpdiag facility and fixing
+               some bugs in original pident.
+Side effects:  none. Does not break anything not depending on kernel version,
+               even if tcpdiag is absent.
+Advice:                not related to this patch but should be said yet.
+               Do NOT configure pidentd to use threads! Use option
+               "--without-threads" when doing "configure".
+               pidentd is typical example of application where
+               threading results in nothing but collapse of performance.
+               Apparently author learned thread programming and decided
+               to apply new knowledge to the first victim.
+
+File:          symbol_exports.dif 
+Apply to:      kernel < 2.4.17
+Status:                desired for kernels < 2.4.17
+               not needed for kernels >= 2.4.17
+Description:   exports symbols required to load tcpdiag module
+               tcpdiag is builtin since 2.4.17, hence the exports
+               are redundant.
+Side effects:  none
+
+File:          af_unix.dif
+Apply to:      kernel
+Status:                recommended
+Desciption:    implements fragmented skb for unix sockets reducing
+               vm pressure for datagram sockets and adds to /proc/net/unix
+               columns allowing to monitor recv/send memory and identify
+               peer of connected sockets.
+Side effects:  "lsof" blames something about unix sockets.
+               Not a big loss, lsof is not able to tell anything more
+               clever than "can't identify protocol" for sockets anyway. 
+Note:          the patch affects area where one or two lines changed
+               several times while 2.4. It does not depend on this,
+               but unfortunately may reject. It apply cleanly to
+               2.4.17.
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0e48a172b79d88e013b6b886d9102618dc2e573d 100644 (file)
@@ -0,0 +1,401 @@
+diff -ur ../vger3-011229/linux/net/unix/af_unix.c linux/net/unix/af_unix.c
+--- ../vger3-011229/linux/net/unix/af_unix.c   Mon Dec  3 20:24:03 2001
++++ linux/net/unix/af_unix.c   Sat Jan  5 04:30:19 2002
+@@ -112,6 +112,7 @@
+ #include <asm/checksum.h>
+ int sysctl_unix_max_dgram_qlen = 10;
++int sysctl_unix_stream_pages = MAX_SKB_FRAGS;
+ unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
+ rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
+@@ -1123,9 +1124,6 @@
+       struct scm_cookie scm;
+       memset(&scm, 0, sizeof(scm));
+       unix_detach_fds(&scm, skb);
+-
+-      /* Alas, it calls VFS */
+-      /* So fscking what? fput() had been SMP-safe since the last Summer */
+       scm_destroy(&scm);
+       sock_wfree(skb);
+ }
+@@ -1140,6 +1138,67 @@
+       scm->fp = NULL;
+ }
++int datagram_copy_fromiovec(struct iovec *iov, struct sk_buff *skb, int size)
++{
++      struct sock *sk;
++      struct sk_buff **tail, *skb1;
++      int copy = min_t(int, size, skb_tailroom(skb));
++
++      if (memcpy_fromiovec(skb_put(skb, copy), iov, copy))
++              goto do_fault;
++
++      if ((size -= copy) == 0)
++              return 0;
++
++      sk = skb->sk;
++      skb1 = skb;
++      tail = &skb_shinfo(skb)->frag_list;
++
++      do {
++              struct page *page;
++              int i = skb_shinfo(skb1)->nr_frags;
++
++              if (i == MAX_SKB_FRAGS) {
++                      skb1 = alloc_skb(0, sk->allocation);
++                      if (skb1 == NULL)
++                              goto do_oom;
++                      *tail = skb1;
++                      tail = &skb1->next;
++                      i = 0;
++                      skb->truesize += skb1->truesize;
++                      atomic_add(skb1->truesize, &sk->wmem_alloc);
++              }
++
++              page = alloc_pages(sk->allocation, 0);
++              if (page == NULL)
++                      goto do_oom;
++
++              copy = min_t(int, size, PAGE_SIZE);
++              skb_shinfo(skb1)->nr_frags=i+1;
++              skb_shinfo(skb1)->frags[i].page = page;
++              skb_shinfo(skb1)->frags[i].page_offset = 0;
++              skb_shinfo(skb1)->frags[i].size = copy;
++
++              skb1->len += copy;
++              skb1->data_len += copy;
++              if (skb != skb1) {
++                      skb->len += copy;
++                      skb->data_len += copy;
++              }
++              skb->truesize += PAGE_SIZE;
++              atomic_add(PAGE_SIZE, &sk->wmem_alloc);
++              if (memcpy_fromiovec(page_address(page), iov, copy))
++                      goto do_fault;
++      } while ((size -= copy) > 0);
++      return 0;
++
++do_oom:
++      return -ENOMEM;
++
++do_fault:
++      return -EFAULT;
++}
++
+ /*
+  *    Send AF_UNIX data.
+  */
+@@ -1155,6 +1214,7 @@
+       unsigned hash;
+       struct sk_buff *skb;
+       long timeo;
++      int alloc;
+       err = -EOPNOTSUPP;
+       if (msg->msg_flags&MSG_OOB)
+@@ -1178,10 +1238,14 @@
+               goto out;
+       err = -EMSGSIZE;
+-      if ((unsigned)len > sk->sndbuf - 32)
++      if ((unsigned)len > sk->sndbuf)
+               goto out;
+-      skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
++      alloc = len;
++      if (alloc > SKB_MAX_HEAD(0))
++              alloc = SKB_MAX_HEAD(0);
++
++      skb = sock_alloc_send_skb(sk, alloc, msg->msg_flags&MSG_DONTWAIT, &err);
+       if (skb==NULL)
+               goto out;
+@@ -1190,7 +1254,7 @@
+               unix_attach_fds(scm, skb);
+       skb->h.raw = skb->data;
+-      err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
++      err = datagram_copy_fromiovec(msg->msg_iov, skb, len);
+       if (err)
+               goto out_free;
+@@ -1275,74 +1339,57 @@
+       return err;
+ }
+-              
+ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
+                              struct scm_cookie *scm)
+ {
+       struct sock *sk = sock->sk;
+       unix_socket *other = NULL;
+-      struct sockaddr_un *sunaddr=msg->msg_name;
+-      int err,size;
+       struct sk_buff *skb;
++      int err;
+       int sent=0;
+       err = -EOPNOTSUPP;
+       if (msg->msg_flags&MSG_OOB)
+               goto out_err;
+-      if (msg->msg_namelen) {
+-              err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP);
++      err = -ENOTCONN;
++      other = unix_peer_get(sk);
++      if (!other)
+               goto out_err;
+-      } else {
+-              sunaddr = NULL;
+-              err = -ENOTCONN;
+-              other = unix_peer_get(sk);
+-              if (!other)
+-                      goto out_err;
+-      }
+       if (sk->shutdown&SEND_SHUTDOWN)
+               goto pipe_err;
+-      while(sent < len)
+-      {
+-              /*
+-               *      Optimisation for the fact that under 0.01% of X messages typically
+-               *      need breaking up.
+-               */
++      while(sent < len) {
++              int size, alloc;
+-              size=len-sent;
++              size = len-sent;
+               /* Keep two messages in the pipe so it schedules better */
+-              if (size > sk->sndbuf/2 - 64)
+-                      size = sk->sndbuf/2 - 64;
++              if (size > sk->sndbuf/2)
++                      size = sk->sndbuf/2;
+-              if (size > SKB_MAX_ALLOC)
+-                      size = SKB_MAX_ALLOC;
+-                      
+               /*
+                *      Grab a buffer
+                */
+-               
+-              skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
++              alloc = size;
++
++              if (size > SKB_MAX_HEAD(0)) {
++                      alloc = SKB_MAX_HEAD(0);
++                      if (size > alloc + sysctl_unix_stream_pages*PAGE_SIZE)
++                              size = alloc + sysctl_unix_stream_pages*PAGE_SIZE;
++              }
++
++              skb=sock_alloc_send_skb(sk,alloc,msg->msg_flags&MSG_DONTWAIT, &err);
+               if (skb==NULL)
+                       goto out_err;
+-              /*
+-               *      If you pass two values to the sock_alloc_send_skb
+-               *      it tries to grab the large buffer with GFP_NOFS
+-               *      (which can fail easily), and if it fails grab the
+-               *      fallback size buffer which is under a page and will
+-               *      succeed. [Alan]
+-               */
+-              size = min_t(int, size, skb_tailroom(skb));
+-
+               memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
+               if (scm->fp)
+                       unix_attach_fds(scm, skb);
+-              if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
++              if ((err = datagram_copy_fromiovec(msg->msg_iov, skb, size)) != 0) {
+                       kfree_skb(skb);
+                       goto out_err;
+               }
+@@ -1418,13 +1465,10 @@
+       scm->creds = *UNIXCREDS(skb);
+-      if (!(flags & MSG_PEEK))
+-      {
++      if (!(flags & MSG_PEEK)) {
+               if (UNIXCB(skb).fp)
+                       unix_detach_fds(scm, skb);
+-      }
+-      else 
+-      {
++      } else {
+               /* It is questionable: on PEEK we could:
+                  - do not return fds - good, but too simple 8)
+                  - return fds, and do not return them on read (old strategy,
+@@ -1483,13 +1527,10 @@
+       return timeo;
+ }
+-
+-
+ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size,
+                              int flags, struct scm_cookie *scm)
+ {
+       struct sock *sk = sock->sk;
+-      struct sockaddr_un *sunaddr=msg->msg_name;
+       int copied = 0;
+       int check_creds = 0;
+       int target;
+@@ -1515,21 +1556,18 @@
+       down(&sk->protinfo.af_unix.readsem);
+-      do
+-      {
++      do {
+               int chunk;
+               struct sk_buff *skb;
+               skb=skb_dequeue(&sk->receive_queue);
+-              if (skb==NULL)
+-              {
++              if (skb==NULL) {
+                       if (copied >= target)
+                               break;
+                       /*
+                        *      POSIX 1003.1g mandates this order.
+                        */
+-                       
+                       if ((err = sock_error(sk)) != 0)
+                               break;
+                       if (sk->shutdown & RCV_SHUTDOWN)
+@@ -1551,60 +1589,44 @@
+               if (check_creds) {
+                       /* Never glue messages from different writers */
+-                      if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) {
+-                              skb_queue_head(&sk->receive_queue, skb);
+-                              break;
+-                      }
++                      if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0)
++                              goto out_put_back;
+               } else {
+                       /* Copy credentials */
+                       scm->creds = *UNIXCREDS(skb);
+                       check_creds = 1;
+               }
+-              /* Copy address just once */
+-              if (sunaddr)
+-              {
+-                      unix_copy_addr(msg, skb->sk);
+-                      sunaddr = NULL;
+-              }
++              chunk = min_t(int, skb->len - sk->protinfo.af_unix.copied, size);
++              err = skb_copy_datagram_iovec(skb, sk->protinfo.af_unix.copied, msg->msg_iov, chunk);
++              if (err)
++                      goto out_put_back;
+-              chunk = min_t(unsigned int, skb->len, size);
+-              if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+-                      skb_queue_head(&sk->receive_queue, skb);
+-                      if (copied == 0)
+-                              copied = -EFAULT;
+-                      break;
+-              }
+               copied += chunk;
+               size -= chunk;
+               /* Mark read part of skb as used */
+-              if (!(flags & MSG_PEEK))
+-              {
+-                      skb_pull(skb, chunk);
+-
++              if (!(flags & MSG_PEEK)) {
+                       if (UNIXCB(skb).fp)
+                               unix_detach_fds(scm, skb);
+                       /* put the skb back if we didn't use it up.. */
+-                      if (skb->len)
+-                      {
+-                              skb_queue_head(&sk->receive_queue, skb);
+-                              break;
+-                      }
++                      if ((sk->protinfo.af_unix.copied += chunk) < skb->len)
++                              goto out_put_back;
++
++                      sk->protinfo.af_unix.copied = 0;
+                       kfree_skb(skb);
+                       if (scm->fp)
+                               break;
+-              }
+-              else
+-              {
++              } else {
+                       /* It is questionable, see note in unix_dgram_recvmsg.
+                        */
+                       if (UNIXCB(skb).fp)
+                               scm->fp = scm_fp_dup(UNIXCB(skb).fp);
++out_put_back:
+                       /* put message back and return */
+                       skb_queue_head(&sk->receive_queue, skb);
+                       break;
+@@ -1676,10 +1698,12 @@
+                               break;
+                       }
++                      down(&sk->protinfo.af_unix.readsem);
+                       spin_lock(&sk->receive_queue.lock);
+                       if((skb=skb_peek(&sk->receive_queue))!=NULL)
+-                              amount=skb->len;
++                              amount=skb->len - sk->protinfo.af_unix.copied;
+                       spin_unlock(&sk->receive_queue.lock);
++                      up(&sk->protinfo.af_unix.readsem);
+                       err = put_user(amount, (int *)arg);
+                       break;
+               }
+@@ -1734,7 +1758,7 @@
+       int i;
+       unix_socket *s;
+       
+-      len+= sprintf(buffer,"Num       RefCount Protocol Flags    Type St "
++      len+= sprintf(buffer,"Peer      RcvQueue WMem     Flags    Type St "
+           "Inode Path\n");
+       read_lock(&unix_table_lock);
+@@ -1742,10 +1766,10 @@
+       {
+               unix_state_rlock(s);
+-              len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld",
+-                      s,
+-                      atomic_read(&s->refcnt),
+-                      0,
++              len+=sprintf(buffer+len,"%08lX: %08X %08X %08X %04X %02X %5ld",
++                      unix_peer(s) ? sock_i_ino(unix_peer(s)) : 0,
++                      skb_queue_len(&s->receive_queue),
++                      atomic_read(&s->wmem_alloc),
+                       s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+                       s->type,
+                       s->socket ?
+diff -ur ../vger3-011229/linux/net/unix/sysctl_net_unix.c linux/net/unix/sysctl_net_unix.c
+--- ../vger3-011229/linux/net/unix/sysctl_net_unix.c   Tue Jan 30 21:20:16 2001
++++ linux/net/unix/sysctl_net_unix.c   Sat Jan  5 04:10:58 2002
+@@ -13,10 +13,14 @@
+ #include <linux/sysctl.h>
+ extern int sysctl_unix_max_dgram_qlen;
++extern int sysctl_unix_stream_pages;
+ ctl_table unix_table[] = {
+       {NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen",
+       &sysctl_unix_max_dgram_qlen, sizeof(int), 0600, NULL, 
++       &proc_dointvec },
++      {NET_UNIX_STREAM_PAGES, "stream_pages",
++      &sysctl_unix_stream_pages, sizeof(int), 0600, NULL, 
+        &proc_dointvec },
+       {0}
+ };
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6e54e9362f6185797174415f8946e1fe4262f307 100644 (file)
@@ -0,0 +1,270 @@
+diff -ur ../pidentd-3.0.12-orig/src/k_linux.c ./src/k_linux.c
+--- ../pidentd-3.0.12-orig/src/k_linux.c       Sat Jan 12 00:44:05 2002
++++ ./src/k_linux.c    Sat Nov  3 07:51:28 2001
+@@ -26,12 +26,65 @@
+ #include "pidentd.h"
++#define NETLINK_TCPDIAG 4
++#define TCPDIAG_GETSOCK 18
++
++#include <linux/uio.h>
++#include <linux/netlink.h>
++
++/* Socket identity */
++struct tcpdiag_sockid
++{
++      __u16   tcpdiag_sport;
++      __u16   tcpdiag_dport;
++      __u32   tcpdiag_src[4];
++      __u32   tcpdiag_dst[4];
++      __u32   tcpdiag_if;
++      __u32   tcpdiag_cookie[2];
++#define TCPDIAG_NOCOOKIE (~0U)
++};
++
++/* Request structure */
++
++struct tcpdiagreq
++{
++      __u8    tcpdiag_family;         /* Family of addresses. */
++      __u8    tcpdiag_src_len;
++      __u8    tcpdiag_dst_len;
++      __u8    tcpdiag_ext;            /* Query extended information */
++
++      struct tcpdiag_sockid id;
++
++      __u32   tcpdiag_states;         /* States to dump */
++      __u32   tcpdiag_dbs;            /* Tables to dump (NI) */
++};
++
++struct tcpdiagmsg
++{
++      __u8    tcpdiag_family;
++      __u8    tcpdiag_state;
++      __u8    tcpdiag_timer;
++      __u8    tcpdiag_retrans;
++
++      struct tcpdiag_sockid id;
++
++      __u32   tcpdiag_expires;
++      __u32   tcpdiag_rqueue;
++      __u32   tcpdiag_wqueue;
++      __u32   tcpdiag_uid;
++      __u32   tcpdiag_inode;
++};
++
++
++int tcpdiag_fd = -1;
++
+ /*
+ ** Make sure we are running on a supported OS version
+ */
+ int
+ ka_init(void)
+ {
++    tcpdiag_fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_TCPDIAG);
+     return 0; /* We always succeed */
+ }
+@@ -56,6 +109,144 @@
+ }
++
++int k_lookup_tcpdiag(struct kernel *kp)
++{
++      struct sockaddr_nl nladdr;
++      struct {
++              struct nlmsghdr nlh;
++              struct tcpdiagreq r;
++      } req;
++      struct msghdr msg;
++      char    buf[8192];
++      struct  iovec iov[1];
++      struct  tcpdiagmsg *r;
++      static  unsigned seqno = 123456;
++
++      memset(&nladdr, 0, sizeof(nladdr));
++      nladdr.nl_family = AF_NETLINK;
++
++      req.nlh.nlmsg_len = sizeof(req);
++      req.nlh.nlmsg_type = TCPDIAG_GETSOCK;
++      req.nlh.nlmsg_flags = NLM_F_REQUEST;
++      req.nlh.nlmsg_pid = 0;
++      req.nlh.nlmsg_seq = ++seqno;
++      memset(&req.r, 0, sizeof(req.r));
++      req.r.tcpdiag_family = AF_INET;
++      req.r.tcpdiag_states = ~0;
++
++      req.r.id.tcpdiag_dport = kp->remote.sin_port;
++      req.r.id.tcpdiag_sport = kp->local.sin_port;
++      req.r.id.tcpdiag_dst[0] = kp->remote.sin_addr.s_addr;
++      req.r.id.tcpdiag_src[0] = kp->local.sin_addr.s_addr;
++      req.r.id.tcpdiag_cookie[0] = TCPDIAG_NOCOOKIE;
++      req.r.id.tcpdiag_cookie[1] = TCPDIAG_NOCOOKIE;
++      kp->ruid = NO_UID;
++
++      iov[0] = (struct iovec){ &req, sizeof(req) };
++
++      msg = (struct msghdr) {
++              (void*)&nladdr, sizeof(nladdr),
++              iov,    1,
++              NULL,   0,
++              0
++      };
++
++      if (sendmsg(tcpdiag_fd, &msg, 0) < 0) {
++              if (errno == ECONNREFUSED) {
++                      close(tcpdiag_fd);
++                      tcpdiag_fd = -1;
++                      return 0;
++              }
++              syslog(LOG_ERR, "system error on tcpdiag sendmsg: %m");
++              return -1;
++      }
++
++      iov[0] = (struct iovec){ buf, sizeof(buf) };
++
++      while (1) {
++              int status;
++              struct nlmsghdr *h;
++
++              msg = (struct msghdr) {
++                      (void*)&nladdr, sizeof(nladdr),
++                      iov,    1,
++                      NULL,   0,
++                      0
++              };
++
++              status = recvmsg(tcpdiag_fd, &msg, 0);
++
++              if (status < 0) {
++                      if (errno == EINTR || errno == EAGAIN)
++                              continue;
++                      return -1;
++              }
++              if (status == 0) {
++                      return -1;
++              }
++
++              h = (struct nlmsghdr*)buf;
++              while (NLMSG_OK(h, status)) {
++                      int err;
++
++                      if (/*h->nlmsg_pid != rth->local.nl_pid ||*/
++                          h->nlmsg_seq != seqno)
++                              goto skip_it;
++
++                      if (h->nlmsg_type == NLMSG_DONE)
++                              return -1;
++                      if (h->nlmsg_type == NLMSG_ERROR) {
++                              struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
++                              if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
++                                      return -1;
++                              } else {
++                                      errno = -err->error;
++                                      if (errno == ECONNREFUSED) {
++                                              close(tcpdiag_fd);
++                                              tcpdiag_fd = -1;
++                                              return 0;
++                                      }
++                                      if (errno != ENOENT)
++                                              syslog(LOG_ERR, "tcpdiag answers: %m");
++                              }
++                              return -1;
++                      }
++
++                      r = NLMSG_DATA(h);
++
++                      /* Lookup _may_ return listening socket, if no
++                       * better matches are found. */
++                      if (r->id.tcpdiag_dport == kp->remote.sin_port &&
++                          r->id.tcpdiag_dst[0] == kp->remote.sin_addr.s_addr) {
++                              kp->ruid = r->tcpdiag_uid;
++                              if (!r->tcpdiag_inode && !r->tcpdiag_uid) {
++                                      /* _NEVER_ return "root" for closed
++                                       * sockets. Otherwise people think
++                                       * that it is sysadmin who abuses their
++                                       * poor ircd. :-) */
++                                      syslog(LOG_NOTICE,
++                                             "Req for stale socket(%d) %d from %x/%d",
++                                             r->tcpdiag_state, ntohs(r->id.tcpdiag_sport),
++                                             r->id.tcpdiag_dst[0], ntohs(r->id.tcpdiag_dport));
++                                      return -1;
++                              }
++                              return 1;
++                      }
++
++                      return -1;
++
++skip_it:
++                      h = NLMSG_NEXT(h, status);
++              }
++              if ((msg.msg_flags & MSG_TRUNC) || status) {
++                      syslog(LOG_ERR, "truncated tcp_diag message");
++                      return -1;
++              }
++      }
++}
++
++
+ int 
+ ka_lookup(void *vp, struct kernel *kp)
+ {
+@@ -64,16 +255,23 @@
+     long r_laddr, r_raddr, myladdr, myraddr;
+     int r_lport, r_rport, mylport, myrport;
+     int euid;
+-    
+-    
++
++    if (tcpdiag_fd >= 0) {
++          int res;
++          if ((res = k_lookup_tcpdiag(kp)) != 0)
++                  return res;
++          syslog(LOG_ERR, "tcp_diag is not loaded, fallback to proc");
++    }
++
++
+     r_rport = ntohs(kp->remote.sin_port);
+     r_lport = ntohs(kp->local.sin_port);
+     r_raddr = kp->remote.sin_addr.s_addr;
+     r_laddr = kp->local.sin_addr.s_addr;
++    kp->ruid = NO_UID;
+     fp = (FILE *) vp;
+-    kp->ruid = NO_UID;
+     rewind(fp);
+     /* eat header */
+@@ -82,13 +280,26 @@
+     while (fgets(buf, sizeof(buf)-1, fp) != NULL)
+     {
+-      if (sscanf(buf, "%*d: %lx:%x %lx:%x %*x %*x:%*x %*x:%*x %*x %d %*d %*d",
+-                 &myladdr, &mylport, &myraddr, &myrport, &euid) == 5)
++        int state, ino;
++      if (sscanf(buf, "%*d: %x:%x %x:%x %x %*x:%*x %*x:%*x %*x %d %*d %u",
++                 &myladdr, &mylport, &myraddr, &myrport,
++                 &state, &euid, &ino) == 7)
+       {
+           if (myladdr == r_laddr && mylport == r_lport &&
+               myraddr == r_raddr && myrport == r_rport)
+           {
+               kp->euid = euid;
++                if (ino == 0 && euid == 0)
++              {
++                      /* _NEVER_ return "root" for closed
++                       * sockets. Otherwise people think
++                       * that it is sysadmin who abuses their
++                       * poor ircd. :-) */
++                  syslog(LOG_NOTICE,
++                         "Req for stale socket(%d) %d from %x/%d",
++                         state, r_rport, r_raddr, r_lport);
++                  return -1;
++              }
+               return 1;
+           }
+       }
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a03ddf226c8bbafd9fa75e9e6e18c2c560cec3e6 100644 (file)
@@ -0,0 +1,230 @@
+--- linux/include/net/route.h.orig     Tue Apr 17 07:25:48 2001
++++ linux/include/net/route.h  Tue Jul 10 23:35:18 2001
+@@ -14,6 +14,7 @@
+  *            Alan Cox        :       Support for TCP parameters.
+  *            Alexey Kuznetsov:       Major changes for new routing code.
+  *            Mike McLagan    :       Routing by source
++ *            Robert Olsson   :       Added rt_cache statistics
+  *
+  *            This program is free software; you can redistribute it and/or
+  *            modify it under the terms of the GNU General Public License
+@@ -90,6 +91,20 @@
+       __u32   o_packets;
+       __u32   i_bytes;
+       __u32   i_packets;
++};
++
++struct rt_cache_stat 
++{
++        unsigned in_hit;
++        unsigned in_slow_tot;
++        unsigned in_slow_mc;
++        unsigned in_no_route;
++        unsigned in_brd;
++        unsigned in_martian_dst;
++        unsigned in_martian_src;
++        unsigned out_hit;
++        unsigned out_slow_tot;
++        unsigned out_slow_mc;
+ };
+ extern struct ip_rt_acct *ip_rt_acct;
+--- linux/net/ipv4/route.c.orig        Wed Mar 28 22:01:15 2001
++++ linux/net/ipv4/route.c     Tue Jul 10 23:27:51 2001
+@@ -52,6 +52,7 @@
+  *    Tobias Ringstrom        :       Uninitialized res.type in ip_route_output_slow.
+  *    Vladimir V. Ivanov      :       IP rule info (flowid) is really useful.
+  *            Marc Boucher    :       routing by fwmark
++ *    Robert Olsson           :       Added rt_cache statistics
+  *
+  *            This program is free software; you can redistribute it and/or
+  *            modify it under the terms of the GNU General Public License
+@@ -201,6 +202,8 @@
+ static unsigned                       rt_hash_mask;
+ static int                    rt_hash_log;
++struct rt_cache_stat rt_cache_stat[NR_CPUS];
++
+ static int rt_intern_hash(unsigned hash, struct rtable * rth, struct rtable ** res);
+ static __inline__ unsigned rt_hash_code(u32 daddr, u32 saddr, u8 tos)
+@@ -270,6 +273,44 @@
+               len = length;
+       return len;
+ }
++
++
++#ifdef CONFIG_PROC_FS
++static int rt_cache_stat_get_info(char *buffer, char **start, off_t offset, int length)
++{
++      int i, lcpu;
++        int len=0;
++      unsigned int dst_entries = atomic_read(&ipv4_dst_ops.entries);
++
++        for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
++                i = cpu_logical_map(lcpu);
++
++              len += sprintf(buffer+len, "%08x  %08x %08x %08x %08x %08x %08x %08x  %08x %08x %08x\n",
++                             dst_entries,                    
++                             rt_cache_stat[i].in_hit,
++                             rt_cache_stat[i].in_slow_tot,
++                             rt_cache_stat[i].in_slow_mc,
++                             rt_cache_stat[i].in_no_route,
++                             rt_cache_stat[i].in_brd,
++                             rt_cache_stat[i].in_martian_dst,
++                             rt_cache_stat[i].in_martian_src,
++
++                             rt_cache_stat[i].out_hit,
++                             rt_cache_stat[i].out_slow_tot,
++                             rt_cache_stat[i].out_slow_mc
++                      );
++      }
++      len -= offset;
++
++      if (len > length)
++              len = length;
++      if (len < 0)
++              len = 0;
++
++      *start = buffer + offset;
++      return len;
++}
++#endif
+   
+ static __inline__ void rt_free(struct rtable *rt)
+ {
+@@ -1163,6 +1204,8 @@
+       u32 spec_dst;
+       struct in_device *in_dev = in_dev_get(dev);
+       u32 itag = 0;
++      int cpu = smp_processor_id();
++
+       /* Primary sanity checks. */
+@@ -1221,6 +1264,7 @@
+       if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
+               rth->u.dst.input = ip_mr_input;
+ #endif
++      rt_cache_stat[cpu].in_slow_mc++;
+       in_dev_put(in_dev);
+       hash = rt_hash_code(daddr, saddr^(dev->ifindex<<5), tos);
+@@ -1259,6 +1303,7 @@
+       u32             spec_dst;
+       int             err = -EINVAL;
+       int             free_res = 0;
++      int cpu = smp_processor_id();
+       /*
+        *      IP on this device is disabled.
+@@ -1308,6 +1353,8 @@
+       }
+       free_res = 1;
++      rt_cache_stat[cpu].in_slow_tot++;
++
+ #ifdef CONFIG_IP_ROUTE_NAT
+       /* Policy is applied before mapping destination,
+          but rerouting after map should be made with old source.
+@@ -1455,6 +1502,7 @@
+       }
+       flags |= RTCF_BROADCAST;
+       res.type = RTN_BROADCAST;
++      rt_cache_stat[cpu].in_brd++;
+ local_input:
+       rth = dst_alloc(&ipv4_dst_ops);
+@@ -1498,6 +1546,7 @@
+       goto intern;
+ no_route:
++      rt_cache_stat[cpu].in_no_route++;
+       spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+       res.type = RTN_UNREACHABLE;
+       goto local_input;
+@@ -1506,6 +1555,7 @@
+        *      Do not cache martian addresses: they should be logged (RFC1812)
+        */
+ martian_destination:
++      rt_cache_stat[cpu].in_martian_dst++;
+ #ifdef CONFIG_IP_ROUTE_VERBOSE
+       if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
+               printk(KERN_WARNING "martian destination %u.%u.%u.%u from %u.%u.%u.%u, dev %s\n",
+@@ -1520,6 +1570,8 @@
+       goto done;
+ martian_source:
++
++      rt_cache_stat[cpu].in_martian_src++;
+ #ifdef CONFIG_IP_ROUTE_VERBOSE
+       if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
+               /*
+@@ -1550,6 +1602,7 @@
+       struct rtable * rth;
+       unsigned        hash;
+       int iif = dev->ifindex;
++      int cpu = smp_processor_id();
+       tos &= IPTOS_RT_MASK;
+       hash = rt_hash_code(daddr, saddr^(iif<<5), tos);
+@@ -1567,6 +1620,7 @@
+                       rth->u.dst.lastuse = jiffies;
+                       dst_hold(&rth->u.dst);
+                       rth->u.dst.__use++;
++                      rt_cache_stat[cpu].in_hit++;
+                       read_unlock(&rt_hash_table[hash].lock);
+                       skb->dst = (struct dst_entry*)rth;
+                       return 0;
+@@ -1621,6 +1675,7 @@
+       int free_res = 0;
+       int err;
+       u32 tos;
++      int cpu = smp_processor_id();
+       tos = oldkey->tos & (IPTOS_RT_MASK|RTO_ONLINK);
+       key.dst = oldkey->dst;
+@@ -1847,14 +1902,18 @@
+       rth->u.dst.output=ip_output;
++      rt_cache_stat[cpu].out_slow_tot++;
++
+       if (flags&RTCF_LOCAL) {
+               rth->u.dst.input = ip_local_deliver;
+               rth->rt_spec_dst = key.dst;
+       }
+       if (flags&(RTCF_BROADCAST|RTCF_MULTICAST)) {
+               rth->rt_spec_dst = key.src;
+-              if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK))
++              if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK)) {
+                       rth->u.dst.output = ip_mc_output;
++                      rt_cache_stat[cpu].out_slow_mc++;
++              }
+ #ifdef CONFIG_IP_MROUTE
+               if (res.type == RTN_MULTICAST) {
+                       struct in_device *in_dev = in_dev_get(dev_out);
+@@ -1894,6 +1953,7 @@
+ {
+       unsigned hash;
+       struct rtable *rth;
++      int cpu = smp_processor_id();
+       hash = rt_hash_code(key->dst, key->src^(key->oif<<5), key->tos);
+@@ -1912,6 +1972,7 @@
+                       rth->u.dst.lastuse = jiffies;
+                       dst_hold(&rth->u.dst);
+                       rth->u.dst.__use++;
++                      rt_cache_stat[cpu].out_hit++;
+                       read_unlock_bh(&rt_hash_table[hash].lock);
+                       *rp = rth;
+                       return 0;
+@@ -2339,6 +2400,7 @@
+       add_timer(&rt_periodic_timer);
+       proc_net_create ("rt_cache", 0, rt_cache_get_info);
++      proc_net_create ("rt_cache_stat", 0, rt_cache_stat_get_info);
+ #ifdef CONFIG_NET_CLS_ROUTE
+       create_proc_read_entry("net/rt_acct", 0, 0, ip_rt_acct_read, NULL);
+ #endif
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..519ea7cd1034526fd5795c0f8d01ad3cc9a3f998 100644 (file)
@@ -0,0 +1,56 @@
+diff -ur ../vger3-010830/linux/net/ipv6/tcp_ipv6.c linux/net/ipv6/tcp_ipv6.c
+--- ../vger3-010830/linux/net/ipv6/tcp_ipv6.c  Wed Jun 13 21:14:05 2001
++++ linux/net/ipv6/tcp_ipv6.c  Fri Oct 12 06:59:07 2001
+@@ -339,13 +339,18 @@
+       return tcp_v6_lookup_listener(daddr, hnum, dif);
+ }
+-#define tcp_v6_lookup(sa, sp, da, dp, dif) \
+-({    struct sock *___sk; \
+-      local_bh_disable(); \
+-      ___sk = __tcp_v6_lookup((sa),(sp),(da),ntohs(dp),(dif)); \
+-      local_bh_enable(); \
+-      ___sk; \
+-})
++__inline__ struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
++                                    struct in6_addr *daddr, u16 dport,
++                                    int dif)
++{
++      struct sock *sk;
++
++      local_bh_disable();
++      sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
++      local_bh_enable();
++
++      return sk;
++}
+ /*
+diff -ur ../vger3-010830/linux/net/netsyms.c linux/net/netsyms.c
+--- ../vger3-010830/linux/net/netsyms.c        Sun Aug 19 22:01:45 2001
++++ linux/net/netsyms.c        Fri Oct 12 07:59:17 2001
+@@ -72,6 +72,11 @@
+ extern int netdev_finish_unregister(struct net_device *dev);
++extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
++                                struct in6_addr *daddr, u16 dport,
++                                int dif);
++extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
++
+ #include <linux/rtnetlink.h>
+ #ifdef CONFIG_IPX_MODULE
+@@ -284,7 +289,11 @@
+ EXPORT_SYMBOL(ndisc_mc_map);
+ EXPORT_SYMBOL(register_inet6addr_notifier);
+ EXPORT_SYMBOL(unregister_inet6addr_notifier);
++EXPORT_SYMBOL(tcp_v6_lookup);
+ #endif
++EXPORT_SYMBOL(tcp_v4_lookup);
++EXPORT_SYMBOL(tcp_timewait_cachep);
++EXPORT_SYMBOL(tcp_hashinfo);
+ #if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE)
+ /* inet functions common to v4 and v6 */
+ EXPORT_SYMBOL(inet_release);
diff --git a/README b/README
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..53a45c2e9d2881849302b265a62f1bc13bd415a6 100644 (file)
--- a/README
+++ b/README
@@ -0,0 +1,65 @@
+Primary FTP site is:
+
+       ftp://ftp.inr.ac.ru/ip-routing/
+
+Mirrors are:
+
+       ftp://linux.wauug.org/pub/net
+       ftp://ftp.nc.ras.ru/pub/mirrors/ftp.inr.ac.ru/ip-routing/
+       ftp://ftp.gts.cz/MIRRORS/ftp.inr.ac.ru/
+       ftp://ftp.funet.fi/pub/mirrors/ftp.inr.ac.ru/ip-routing/ (STM1 to USA)
+       ftp://sunsite.icm.edu.pl/pub/Linux/iproute/
+       ftp://ftp.sunet.se/pub/Linux/ip-routing/
+       ftp://ftp.nvg.ntnu.no/pub/linux/ip-routing/
+       ftp://ftp.crc.ca/pub/systems/linux/ip-routing/
+       ftp://ftp.proxad.net/mirrors/ftp.inr.ac.ru/ip-routing/
+       ftp://donlug.dn.ua/pub/mirrors/ip-routing/
+       ftp://omni.rk.tusur.ru/mirrors/ftp.inr.ac.ru/ip-routing/
+       ftp://ftp.src.uchicago.edu/pub/linux/ip-routing/
+       http://www.asit.ro/ip-routing/
+       ftp://ftp.infoscience.co.jp/pub/linux/ip-routing/ (Japan)
+       ftp://ftp.sucs.swan.ac.uk/pub/mirrors/ftp.inr.ac.ru/ip-routing
+       http://mirror.schell.de/ftp.inr.ac.ru/ip-routing/ (Germany)
+       ftp://ftp.gin.cz/MIRRORS/ftp.inr.ac.ru/ip-routing
+       ftp://mirror.aarnet.edu.au/pub/ip-routing/  (Australia)
+       http://mirror.aarnet.edu.au/pub/ip-routing/ (Australia)
+
+RPMs are available at:
+       ftp://omni.rk.tusur.ru/Tango/
+       ftp://ftp4.dgtu.donetsk.ua/pub/BlackCat/6.0/contrib/SRPMS/i[35]86/
+
+
+
+How to compile this.
+--------------------
+
+
+1. Look at start of Makefile and set correct values for:
+
+KERNEL_INCLUDE should point to correct linux kernel include directory.
+Default (/usr/src/linux/include) is right as rule.
+
+ADDLIB should contain inet_* functions, if your libc contains
+obsolete resolver library (<4.9.4) and you have no correct libresolv.
+ADDLIB should also contain dnet_* functions if you don't have a
+libdnet with support for them. If your libdnet does have support,
+then comment out that line and uncomment the line to add -ldnet to
+LDLIBS.
+
+LDLIBS should be empty, if you have no libresolv.
+
+
+2. make
+
+Utilities "ip" and "rtmon" are in ip/ directory now,
+"tc" is in tc/. That's all.
+
+3. To make documentation, cd to doc/ directory , then
+   look at start of Makefile and set correct values for
+   PAGESIZE=a4         , ie: a4 , letter ...   (string)
+   PAGESPERPAGE=2      , ie: 1 , 2 ...         (numeric)
+   and make there. It assumes, that latex, dvips and psnup
+   are in your path.
+
+Alexey Kuznetsov
+kuznet@ms2.inr.ac.ru
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4d7453aa308b27adfef8b216ce81a78b75e9f930 100644 (file)
@@ -0,0 +1,41 @@
+
+Here are a few quick points about DECnet support...
+
+ o No name resolution is available as yet, all addresses must be
+   entered numerically.
+
+ o The neighbour cache may well list every entry as having the address
+   0.170. This is due to a problem that I need to sort out kernel side.
+   It is harmless (but don't try and use neigh add yet) just look in
+   /proc/net/decnet_neigh to see the real addresses for now.
+
+ o The rtnetlink support in the kernel is rather exprimental, expect a
+   few odd things to happen for the next few DECnet kernel releases.
+
+ o Whilst you can use ip addr add to add more than one DECnet address to an
+   interface, don't expect addresses which are not the same as the
+   kernels node address to work properly. i.e. You will break the DECnet
+   protocol if you do add anything other than the automatically generated
+   interface addresses to ethernet cards. This option is there for future
+   link layer support, where the device will have to be configed for
+   DECnet explicitly.
+
+ o The DECnet support is currently self contained. You do not need the
+   libdnet library to use it. In fact until I've sent the dnet_pton and
+   dnet_ntop functions to Patrick to add, you can't use libdnet.
+
+ o If you are not using the very latest 2.3.xx series kernels, don't
+   try and list DECnet routes if you've got IPv6 compiled into the
+   kernel. It will oops.
+
+ o My main reason for writing the DECnet support for iproute2 was to
+   check out the DECnet routing code, so the route get and
+   route show cache commands are likely to be the most debugged out of
+   all of them.
+
+ o If you find bugs in the DECnet support, please send them to me in the
+   first instance, and then I'll send Alexey a patch to fix it. IPv4/6
+   bugs should be sent to Alexey as before.
+
+Steve Whitehouse <SteveW@ACM.org>
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..edd79c0ec1e696ed8f3a8e175ad17c337f428d47 100644 (file)
@@ -0,0 +1,119 @@
+iproute2+tc*
+
+It's the first release of Linux traffic control engine.
+
+
+NOTES.
+* csz scheduler is inoperational at the moment, and probably
+  never will be repaired but replaced with h-pfq scheduler.
+* To use "fw" classifier you will need ipfwchains patch.
+* No manual available. Ask me, if you have problems (only try to guess
+  answer yourself at first 8)).
+
+
+Micro-manual how to start it the first time
+-------------------------------------------
+
+A. Attach CBQ to eth1:
+
+tc qdisc add dev eth1 root handle 1: cbq bandwidth 10Mbit allot 1514 cell 8 \
+avpkt 1000 mpu 64
+
+B. Add root class:
+
+tc class add dev eth1 parent 1:0 classid 1:1 cbq bandwidth 10Mbit rate 10Mbit \
+allot 1514 cell 8 weight 1Mbit prio 8 maxburst 20 avpkt 1000
+
+C. Add default interactive class:
+
+tc class add dev eth1 parent 1:1 classid 1:2 cbq bandwidth 10Mbit rate 1Mbit \
+allot 1514 cell 8 weight 100Kbit prio 3 maxburst 20 avpkt 1000 split 1:0 \
+defmap c0
+
+D. Add default class:
+
+tc class add dev eth1 parent 1:1 classid 1:3 cbq bandwidth 10Mbit rate 8Mbit \
+allot 1514 cell 8 weight 800Kbit prio 7 maxburst 20 avpkt 1000 split 1:0 \
+defmap 3f
+
+etc. etc. etc. Well, it is enough to start 8) The rest can be guessed 8)
+Look also at more elaborated example, ready to start rsvpd,
+in rsvp/cbqinit.eth1.
+
+
+Terminology and advices about setting CBQ parameters may be found in Sally Floyd
+papers. 
+
+
+Pairs X:Y are class handles, X:0 are qdisc heandles.
+weight should be proportional to rate for leaf classes
+(I choosed it ten times less, but it is not necessary)
+
+defmap is bitmap of logical priorities served by this class.
+
+E. Another qdiscs are simpler. F.e. let's join TBF on class 1:2
+
+tc qdisc add dev eth1 parent 1:2 tbf rate 64Kbit buffer 5Kb/8 limit 10Kb
+
+F. Look at all that we created:
+
+tc qdisc ls dev eth1
+tc class ls dev eth1
+
+G. Install "route" classifier on root of cbq and map destination from realm
+1 to class 1:2
+
+tc filter add dev eth1 parent 1:0 protocol ip prio 100 route to 1 classid 1:2
+
+H. Assign routes to 10.11.12.0/24 to realm 1
+
+ip route add 10.11.12.0/24 dev eth1 via whatever realm 1
+
+etc. The same thing can be made with rules.
+I still did not test ipchains, but they should work too.
+
+Setup of rsvp and u32 classifiers is more hairy.
+If you read RSVP specs, you will understand how rsvp classifier
+works easily. What's about u32... That's example:
+
+
+
+#! /bin/sh
+
+TC=/home/root/tc
+
+# Setup classifier root on eth1 root (it is cbq)
+$TC filter add dev eth1 parent 1:0 prio 5 protocol ip u32
+
+# Create hash table of 256 slots with ID 1:
+$TC filter add dev eth1 parent 1:0 prio 5 handle 1: u32 divisor 256
+
+# Add to 6th slot of hash table rule to select tcp/telnet to 193.233.7.75
+# direct it to class 1:4 and prescribe to fall to best effort,
+# if traffic violate TBF (32kbit,5K)
+$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:6: \
+       match ip dst 193.233.7.75 \
+       match tcp dst 0x17 0xffff \
+       flowid 1:4 \
+       police rate 32kbit buffer 5kb/8 mpu 64 mtu 1514 index 1
+
+# Add to 1th slot of hash table rule to select icmp to 193.233.7.75
+# direct it to class 1:4 and prescribe to fall to best effort,
+# if traffic violate TBF (10kbit,5K)
+$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:: \
+       sample ip protocol 1 0xff \
+       match ip dst 193.233.7.75 \
+       flowid 1:4 \
+       police rate 10kbit buffer 5kb/8 mpu 64 mtu 1514 index 2
+
+# Lookup hash table, if it is not fragmented frame
+# Use protocol as hash key
+$TC filter add dev eth1 parent 1:0 prio 5 handle ::1 u32 ht 800:: \
+       match ip nofrag \
+       offset mask 0x0F00 shift 6 \
+       hashkey mask 0x00ff0000 at 8 \
+       link 1:
+
+
+Alexey Kuznetsov
+kuznet@ms2.inr.ac.ru
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..17f00111cf3d0a1c55b2ba2f551fe14dd70a1937 100644 (file)
--- a/RELNOTES
+++ b/RELNOTES
@@ -0,0 +1,168 @@
+[020116]
+! 1. Compile with rh-7.2
+! 2. What the hell some people blame on socklen_t defined in unistd.h? Check.
+ * Kim Woelders <kim@woelders.dk>, various useful fixups: compilation
+   with old kernels, cross-compiling, "all" == "any" in prefix spec. 
+ * Collected from my disk, cleaned and packed to directory iproute2/misc/
+   several utilities: ss, nstat, ifstat, rtacct, arpd and module tcp_diag.
+   Writing some docs. me.
+ * prepared patchlet for pidentd to use tcp_diag.
+ * David Miller: 64bit (and even worse 64bit kernel/32 bit user :-) fixes
+   to above. tcp_diag is merged to main tree.
+ * Alexandr D. Kanevskiy <kad@blackcatlinux.com>: various flaws in ss
+ * Alexandr D. Kanevskiy <kad@blackcatlinux.com>: oops, more aggressive caching
+   of names opened old bugs: ip started to print garbage in some places.
+ * Robert Olsson, rt_cache_stat. Renamed to rtstat.
+ * An old bug in "ip maddr ls": reduntant empty lines in output.
+   Seeing this crap for ages but lucky match of desire/ability to repair
+   and a huff about this happened only today. :-)
+ * "Mr. James W. Laferriere" <babydr@baby-dragons.com>
+   doc: option to produce ps output for non-a4 and not only 2 pages/sheet. 
+ * Jamal's patch for ingres qdisc.
+ * Bernd Eckenfels <ecki@lina.inka.de>: deleted orphaned bogus #include
+   in include/utils.h.
+ * Julian Anastasov <ja@ssi.bg>: uninitialized fields in nexthop
+   producing funny "dead" nexthops in multipath routes.
+   Stupid me, look at the first line in [010803]... Was it difficult to guess
+   this that time? People blame for several months. :-)
+   Special thanks to bert hubert <ahu@ds9a.nl> who raised the issue in netdev.
+   Thanks and apologies to Terry Schmidt <terry@nycwireless.net>,
+   Ruben Puettmann <ruben.puettmann@freenet-ag.de>,
+   Mark Ivens <mivens@clara.net>.
+ * willy tarreau <wtarreau@yahoo.fr>: "make install" target.
+ * Tunable limit for sch_sfq. Patch to kernel activating this
+   is about to be submitted. Reminded by Adi Nugroho <Adi@iNterNUX.co.id>.
+
+[010824]
+ * ip address add sets scope of loopback addreses to "host".
+   Advised by David Miller.
+ * ZIP! <zip@killerlabs.com> and David Ford <david@blue-labs.org>
+   Some strcpy's changed to strncpy's.
+ * David Ford <david@blue-labs.org>, test for compilation with gcc3.
+ * David Ford <david@blue-labs.org>. Damn, I broke rtnl_talk in previous
+   snapshot.
+
+[010803]
+ * If "dev" is not specified in multipath route, ifindex remained
+   uninitialized. Grr. Thanks to Kunihiro Ishiguro <kunihiro@zebra.org>.
+ * Rafal Maszkowski <rzm@icm.edu.pl>, batch mode tc. The most old patch.
+ * Updates list of data protocol ids.
+   Lots of reporters. I bring my apologies.
+ * Jan Rekorajski <baggins@sith.mimuw.edu.pl>. Updated list of datalink types. 
+ * Christina Chen <chenchristina@cwc.nus.edu.sg>. Bug in parsing IPv6 address match in u32. 
+ * Pekka Savola <pekkas@netcore.fi>. ip -6 route flush dev lo stuck
+   on deleting root of the table.
+ * Werner. dsmark fixes.
+ * Alexander Demenshin <aldem-reply@aldem.net>. Old miracleous bug
+   in ip monitor. It was puzzle, people permanently blame that
+   it prints some crap.
+ * Rui Prior <rprior@inescporto.pt>. f_route failed to resolve fromif.
+   Werner also noticed this and sent patch. Bad place... [RETHINK]
+ * Kim Woelders <kim@woelders.dk>. 
+   - changes in Makefile for cross-compile
+   - understand "all" as alias for "any"
+   - bug in iprule.c
+!  [ NB. Also he sent patch for kernel. Do not forget! ]
+ * Werner. Fix to tc core files: wrong exits etc.
+ * Bernd Jendrissek <berndj@prism.co.za>. Some sanitizations of tc.c
+!* Marian Jancar <marian.jancar@infonet.cz>. He say q_tbf prints wrong latency!
+!  Seems, he is wrong.
+ * Werner (and Nikolai Vladychevski <niko@isl.net.mx>) check ->print_copts
+   to avoid segfault.
+
+[001007]
+  * Compiles under rh-7.0
+
+[000928]
+  * Sorry. I have lost all the CVS with changes made since 000305.
+    If someone sent me a patch after this date, please, resubmit.
+    Restored from the last backup and mailboxes:
+
+  * Edit ip-cref.tex by raf <raf2@zip.com.au>.
+  * RTAX_REORDERING support.
+  * IFLA_MASTER support.
+  * Bug in rtnl_talk(), libnetlink.c. Reported by David P. Olshfski
+       <olshef@us.ibm.com>
+
+[000305]
+  * Bugs in RESOLVE_HOSTNAMES. Bratislav Ilich <bilik@@zepter.ru>
+  * ARPHRD_IEEE802_TR
+
+[000225]
+  * ECN in q_red.c.
+
+[000221]
+  * diffserv update from Jamal Hadi Salim
+  * Some bits of IPX from Steve Whitehouse.
+  * ATM qdisc from Werner Almesberger
+  * Support for new attributes on routes in linux-2.3.
+
+[991023]
+  No news, only several bugs are fixed.
+  * Since ss990630 "ip rule list" printed wrong prefix length.
+      Vladimir V. Ivanov <vlad@alis.tusur.ru>
+  * "ip rule" parsed >INT_MAX values of metric incorrectly.
+      Matthew G. Marsh <mgm@paktronix.com>
+  * Some improvements in doc/Makefile advised by
+      Andi Kleen and Werner Almesberger.
+
+[990824]
+  * new attributes in "ip route": rtt, rttvar, cwnd, ssthresh and advmss.
+  * some updates in documentaion to reflect new status.
+
+[990630]
+  * DiffServ support.
+       Werner Almesberger <almesber@lrc.di.epfl.ch>
+       Jamal Hadi Salim <hadi@nortelnetworks.com> 
+  * DECnet support.
+       Steve Whitehouse <SteveW@ACM.org>
+  * Some minor tweaks in docs and code.
+
+[990530]
+  * routel script. Stephen R. van den Berg <srb@cuci.nl>
+  * Bug in tc/q_prio.c resetting priomap. Reported by
+       Ole Husgaard <sparre@login.dknet.dk> and
+       Jan Kasprzak <kas@informatics.muni.cz>
+  * IP command reference manual is published (ip-cref.tex).
+    I am sorry, but tc-cref.tex is still not ready, to be more
+    exact the draft does not describe current tc 8-)
+  * ip, rtmon, rtacct utilities are updated according to manual 8-)
+    Lots of changes:
+       - (MAIN) "flush" command for addr, neigh and route.
+       - error messages are sanitized; now it does not print
+         usage() page on each error.
+       - output format is improved.
+       - "oneline" mode is added.
+       - etc.
+  * Name databases; resolution acsii <-> numeric is split out to lib/*
+  * scripts ifcfg, ifone and rtpr.
+  * examples/dhcp-client-script is copied from my patch to ISC dhcp.
+  * Makefile in doc/ directory.
+
+[990417]
+  * "pmtudisc" flag to "ip tunnel". Phil Karn <karn@ka9q.ampr.org>
+  * bug in tc/q_tbf.c preventing setting peak_rate, Martin Mares <mj@ucw.cz>
+  * doc/flowlabels.tex
+
+[990329]
+
+  * This snapshot fixes some compatibility problems, which I introduced
+    occasionally to previous snapshots.
+  * Namely, "allot" to "tc qdisc add ... cbq" is accepted but ignored.
+  * Another changes are supposed to be shown in the next snapshot, but
+    because of troubles with "allot" I am forced to release premature
+    version. Namely, "cell", "prio", "weight" etc. are optional now.
+  * doc/ip-tunnels.tex
+
+[990327]
+  * History was not recorded.
+
+[981002]
+  * Rani Assaf <rani@magic.metawire.com> contributed resolving
+    addresses to names.
+       BEWARE! DO NOT USE THIS OPTION, WHEN REPORTING BUGS IN
+       IPROUTE OR IN KERENEL. ALL THE BUG REPORTS MUST CONTAIN
+       ONLY NUMERIC ADDRESSES.
+
+[981101]
+  * now it should compile for any libc.
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..636b32885668c19bca686c155f0a6376b565ec97 100644 (file)
@@ -0,0 +1,57 @@
+PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps
+# tc-cref.ps
+# api-rtnl.tex api-pmtudisc.tex api-news.tex
+# iki-netdev.ps iki-neighdst.ps
+
+
+LATEX=latex
+DVIPS=dvips
+SGML2DVI=sgml2latex --output=dvi
+SGML2HTML=sgml2html -s 0
+LPR=lpr -Zsduplex
+SHELL=bash
+PAGESIZE=a4
+PAGESPERPAGE=2
+
+HTMLFILES=$(subst .sgml,.html,$(shell echo *.sgml))
+DVIFILES=$(subst .ps,.dvi,$(PSFILES))
+
+
+all: pstwocol
+
+pstwocol: $(PSFILES)
+
+html: $(HTMLFILES)
+
+dvi: $(DVIFILES)
+
+print: $(PSFILES)
+       $(LPR) $(PSFILES)
+
+%.dvi: %.sgml
+       $(SGML2DVI) $<
+
+%.dvi: %.tex
+       @set -e; pass=2; echo "Running LaTeX $<"; \
+       while [ `$(LATEX) $< </dev/null 2>&1 | \
+                grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
+               if [ $$pass -gt 3 ]; then \
+                       echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
+               fi; \
+               echo "Re-running LaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
+       done
+
+%.ps: %.dvi
+       $(DVIPS) $< -o $@.tmp
+       ./do-psnup $@.tmp $@ $(PAGESIZE) $(PAGESPERPAGE)
+       rm -f $@.tmp
+
+%.html: %.sgml
+       $(SGML2HTML) $<
+
+install:
+       install -m 0644 $(shell echo *.tex) $(DESTDIR)$(DOCDIR)
+       install -m 0644 $(shell echo *.sgml) $(DESTDIR)$(DOCDIR)
+
+clean:
+       rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..55f478ea347a430908e489fd79018164ad34e65f 100644 (file)
--- a/doc/Plan
+++ b/doc/Plan
@@ -0,0 +1,16 @@
+Partially finished work.
+
+1.  User Reference manuals.
+1.1 IP Command reference (ip-cref.tex, published)
+1.2 TC Command reference (tc-cref.tex)
+1.3 IP tunnels (ip-tunnels.tex, published)
+
+2.  Linux-2.2 Networking API
+2.1 RTNETLINK (api-rtnl.tex)
+2.2 Path MTU Discovery (api-pmtudisc.tex)
+2.3 IPv6 Flow Labels (api-ip6-flowlabels.tex, published)
+2.4 Miscellaneous extensions (api-misc.tex)
+
+3.  Linux-2.2 Networking Intra-Kernel Interfaces
+3.1 NetDev --- Networking Devices and netdev... (iki-netdev.tex)
+3.2 Neighbour cache and destination cache. (iki-neighdst.tex)
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7ed0298402e1d1ad6e70c05363a6b5d889a1d97a 100644 (file)
@@ -0,0 +1 @@
+\def\Draft{020116}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..aa34e94735101d44fa07bbce6d28faa5e14dd78b 100644 (file)
@@ -0,0 +1,429 @@
+\documentstyle[12pt,twoside]{article}
+\def\TITLE{IPv6 Flow Labels}
+\input preamble
+\begin{center}
+\Large\bf IPv6 Flow Labels in Linux-2.2.
+\end{center}
+
+
+\begin{center}
+{ \large Alexey~N.~Kuznetsov } \\
+\em Institute for Nuclear Research, Moscow \\
+\verb|kuznet@ms2.inr.ac.ru| \\
+\rm April 11, 1999
+\end{center}
+
+\vspace{5mm}
+
+\tableofcontents
+
+\section{Introduction.}
+
+Every IPv6 packet carries 28 bits of flow information. RFC2460 splits
+these bits to two fields: 8 bits of traffic class (or DS field, if you
+prefer this term) and 20 bits of flow label. Currently there exist
+no well-defined API to manage IPv6 flow information. In this document
+I describe an attempt to design the API for Linux-2.2 IPv6 stack.
+
+\vskip 1mm
+
+The API must solve the following tasks:
+
+\begin{enumerate}
+
+\item To allow user to set traffic class bits.
+
+\item To allow user to read traffic class bits of received packets.
+This feature is not so useful as the first one, however it will be
+necessary f.e.\ to implement ECN [RFC2481] for datagram oriented services
+or to implement receiver side of SRP or another end-to-end protocol
+using traffic class bits.
+
+\item To assign flow labels to packets sent by user.
+
+\item To get flow labels of received packets. I do not know
+any applications of this feature, but it is possible that receiver will
+want to use flow labels to distinguish sub-flows.
+
+\item To allocate flow labels in the way, compliant to RFC2460. Namely:
+
+\begin{itemize}
+\item
+Flow labels must be uniformly distributed (pseudo-)random numbers,
+so that any subset of 20 bits can be used as hash key.
+
+\item
+Flows with coinciding source address and flow label must have identical
+destination address and not-fragmentable extensions headers (i.e.\ 
+hop by hop options and all the headers up to and including routing header,
+if it is present.)
+
+\begin{NB}
+There is a hole in specs: some hop-by-hop options can be
+defined only on per-packet base (f.e.\  jumbo payload option).
+Essentially, it means that such options cannot present in packets
+with flow labels.
+\end{NB}
+\begin{NB}
+NB notes here and below reflect only my personal opinion,
+they should be read with smile or should not be read at all :-).
+\end{NB}
+
+
+\item
+Flow labels have finite lifetime and source is not allowed to reuse
+flow label for another flow within the maximal lifetime has expired,
+so that intermediate nodes will be able to invalidate flow state before
+the label is taken over by another flow.
+Flow state, including lifetime, is propagated along datagram path
+by some application specific methods
+(f.e.\ in RSVP PATH messages or in some hop-by-hop option).
+
+
+\end{itemize}
+
+\end{enumerate}
+
+\section{Sending/receiving flow information.}
+
+\paragraph{Discussion.}
+\addcontentsline{toc}{subsection}{Discussion}
+It was proposed (Where? I do not remember any explicit statement)
+to solve the first four tasks using
+\verb|sin6_flowinfo| field added to \verb|struct| \verb|sockaddr_in6|
+(see RFC2553).
+
+\begin{NB}
+       This method is difficult to consider as reasonable, because it
+       puts additional overhead to all the services, despite of only
+       very small subset of them (none, to be more exact) really use it.
+       It contradicts both to IETF spirit and the letter. Before RFC2553
+       one justification existed, IPv6 address alignment left 4 byte
+       hole in \verb|sockaddr_in6| in any case. Now it has no justification.
+\end{NB}
+
+We have two problems with this method. The first one is common for all OSes:
+if \verb|recvmsg()| initializes \verb|sin6_flowinfo| to flow info
+of received packet, we loose one very important property of BSD socket API,
+namely, we are not allowed to use received address for reply directly
+and have to mangle it, even if we are not interested in flowinfo subtleties.
+
+\begin{NB}
+       RFC2553 adds new requirement: to clear \verb|sin6_flowinfo|.
+       Certainly, it is not solution but rather attempt to force applications
+       to make unnecessary work. Well, as usually, one mistake in design
+       is followed by attempts to patch the hole and more mistakes...
+\end{NB}
+
+Another problem is Linux specific. Historically Linux IPv6 did not
+initialize \verb|sin6_flowinfo| at all, so that, if kernel does not
+support flow labels, this field is not zero, but a random number.
+Some applications also did not take care about it. 
+
+\begin{NB}
+Following RFC2553 such applications can be considered as broken,
+but I still think that they are right: clearing all the address
+before filling known fields is robust but stupid solution.
+Useless wasting CPU cycles and
+memory bandwidth is not a good idea. Such patches are acceptable
+as temporary hacks, but not as standard of the future.
+\end{NB}
+
+
+\paragraph{Implementation.}
+\addcontentsline{toc}{subsection}{Implementation}
+By default Linux IPv6 does not read \verb|sin6_flowinfo| field
+assuming that common applications are not obliged to initialize it
+and are permitted to consider it as pure alignment padding.
+In order to tell kernel that application
+is aware of this field, it is necessary to set socket option
+\verb|IPV6_FLOWINFO_SEND|.
+
+\begin{verbatim}
+  int on = 1;
+  setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO_SEND,
+             (void*)&on, sizeof(on));
+\end{verbatim}
+
+Linux kernel never fills \verb|sin6_flowinfo| field, when passing
+message to user space, though the kernels which support flow labels
+initialize it to zero. If user wants to get received flowinfo, he
+will set option \verb|IPV6_FLOWINFO| and after this he will receive
+flowinfo as ancillary data object of type \verb|IPV6_FLOWINFO|
+(cf.\ RFC2292).
+
+\begin{verbatim}
+  int on = 1;
+  setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO, (void*)&on, sizeof(on));
+\end{verbatim}
+
+Flowinfo received and latched by a connected TCP socket also may be fetched
+with \verb|getsockopt()| \verb|IPV6_PKTOPTIONS| together with
+another optional information.
+
+Besides that, in the spirit of RFC2292 the option \verb|IPV6_FLOWINFO|
+may be used as alternative way to send flowinfo with \verb|sendmsg()| or
+to latch it with \verb|IPV6_PKTOPTIONS|.
+
+\paragraph{Note about IPv6 options and destination address.}
+\addcontentsline{toc}{subsection}{IPv6 options and destination address}
+If \verb|sin6_flowinfo| does contain not zero flow label,
+destination address in \verb|sin6_addr| and non-fragmentable
+extension headers are ignored. Instead, kernel uses the values
+cached at flow setup (see below). However, for connected sockets
+kernel prefers the values set at connection time.
+
+\paragraph{Example.}
+\addcontentsline{toc}{subsection}{Example}
+After setting socket option \verb|IPV6_FLOWINFO|
+flowlabel and DS field are received as ancillary data object
+of type \verb|IPV6_FLOWINFO| and level \verb|SOL_IPV6|.
+In the cases when it is convenient to use \verb|recvfrom(2)|,
+it is possible to replace library variant with your own one,
+sort of:
+
+\begin{verbatim}
+#include <sys/socket.h>
+#include <netinet/in6.h>
+
+size_t recvfrom(int fd, char *buf, size_t len, int flags,
+                struct sockaddr *addr, int *addrlen)
+{
+  size_t cc;
+  char cbuf[128];
+  struct cmsghdr *c;
+  struct iovec iov = { buf, len };
+  struct msghdr msg = { addr, *addrlen,
+                        &iov,  1,
+                        cbuf, sizeof(cbuf),
+                        0 };
+
+  cc = recvmsg(fd, &msg, flags);
+  if (cc < 0)
+    return cc;
+  ((struct sockaddr_in6*)addr)->sin6_flowinfo = 0;
+  *addrlen = msg.msg_namelen;
+  for (c=CMSG_FIRSTHDR(&msg); c; c = CMSG_NEXTHDR(&msg, c)) {
+    if (c->cmsg_level != SOL_IPV6 ||
+      c->cmsg_type != IPV6_FLOWINFO)
+        continue;
+    ((struct sockaddr_in6*)addr)->sin6_flowinfo = *(__u32*)CMSG_DATA(c);
+  }
+  return cc;
+}
+\end{verbatim}
+
+
+
+\section{Flow label management.}
+
+\paragraph{Discussion.}
+\addcontentsline{toc}{subsection}{Discussion}
+Requirements of RFC2460 are pretty tough. Particularly, lifetimes
+longer than boot time require to store allocated labels at stable
+storage, so that the full implementation necessarily includes user space flow
+label manager. There are at least three different approaches:
+
+\begin{enumerate}
+\item {\bf ``Cooperative''. } We could leave flow label allocation wholly
+to user space. When user needs label he requests manager directly. The approach
+is valid, but as any ``cooperative'' approach it suffers of security problems.
+
+\begin{NB}
+One idea is to disallow not privileged user to allocate flow
+labels, but instead to pass the socket to manager via \verb|SCM_RIGHTS|
+control message, so that it will allocate label and assign it to socket
+itself. Hmm... the idea is interesting.
+\end{NB}
+
+\item {\bf ``Indirect''.} Kernel redirects requests to user level daemon
+and does not install label until the daemon acknowledged the request.
+The approach is the most promising, it is especially pleasant to recognize
+parallel with IPsec API [RFC2367,Craig]. Actually, it may share API with
+IPsec.
+
+\item {\bf ``Stupid''.} To allocate labels in kernel space. It is the simplest
+method, but it suffers of two serious flaws: the first,
+we cannot lease labels with lifetimes longer than boot time, the second, 
+it is sensitive to DoS attacks. Kernel have to remember all the obsolete
+labels until their expiration and malicious user may fastly eat all the
+flow label space.
+
+\end{enumerate}
+
+Certainly, I choose the most ``stupid'' method. It is the cheapest one
+for implementor (i.e.\ me), and taking into account that flow labels
+still have no serious applications it is not useful to work on more
+advanced API, especially, taking into account that eventually we
+will get it for no fee together with IPsec.
+
+
+\paragraph{Implementation.}
+\addcontentsline{toc}{subsection}{Implementation}
+Socket option \verb|IPV6_FLOWLABEL_MGR| allows to
+request flow label manager to allocate new flow label, to reuse
+already allocated one or to delete old flow label.
+Its argument is \verb|struct| \verb|in6_flowlabel_req|:
+
+\begin{verbatim}
+struct in6_flowlabel_req
+{
+        struct in6_addr flr_dst;
+        __u32           flr_label;
+        __u8            flr_action;
+        __u8            flr_share;
+        __u16           flr_flags;
+        __u16           flr_expires;
+        __u16           flr_linger;
+        __u32         __flr_reserved;
+        /* Options in format of IPV6_PKTOPTIONS */
+};
+\end{verbatim}
+
+\begin{itemize}
+
+\item \verb|dst| is IPv6 destination address associated with the label.
+
+\item \verb|label| is flow label value in network byte order. If it is zero,
+kernel will allocate new pseudo-random number. Otherwise, kernel will try
+to lease flow label ordered by user. In this case, it is user task to provide
+necessary flow label randomness.
+
+\item \verb|action| is requested operation. Currently, only three operations
+are defined:
+
+\begin{verbatim}
+#define IPV6_FL_A_GET   0   /* Get flow label */
+#define IPV6_FL_A_PUT   1   /* Release flow label */
+#define IPV6_FL_A_RENEW 2   /* Update expire time */
+\end{verbatim}
+
+\item \verb|flags| are optional modifiers. Currently
+only \verb|IPV6_FL_A_GET| has modifiers:
+
+\begin{verbatim}
+#define IPV6_FL_F_CREATE 1   /* Allowed to create new label */
+#define IPV6_FL_F_EXCL   2   /* Do not create new label */
+\end{verbatim}
+
+
+\item \verb|share| defines who is allowed to reuse the same flow label.
+
+\begin{verbatim}
+#define IPV6_FL_S_NONE    0   /* Not defined */
+#define IPV6_FL_S_EXCL    1   /* Label is private */
+#define IPV6_FL_S_PROCESS 2   /* May be reused by this process */
+#define IPV6_FL_S_USER    3   /* May be reused by this user */
+#define IPV6_FL_S_ANY     255 /* Anyone may reuse it */
+\end{verbatim}
+
+\item \verb|linger| is time in seconds. After the last user releases flow
+label, it will not be reused with different destination and options at least
+during this time. If \verb|share| is not \verb|IPV6_FL_S_EXCL| the label
+still can be shared by another sockets. Current implementation does not allow
+unprivileged user to set linger longer than 60 sec.
+
+\item \verb|expires| is time in seconds. Flow label will be kept at least
+for this time, but it will not be destroyed before user released it explicitly
+or closed all the sockets using it. Current implementation does not allow
+unprivileged user to set timeout longer than 60 sec. Proviledged applications
+MAY set longer lifetimes, but in this case they MUST save allocated
+labels at stable storage and restore them back after reboot before the first
+application allocates new flow.
+
+\end{itemize}
+
+This structure is followed by optional extension headers associated
+with this flow label in format of \verb|IPV6_PKTOPTIONS|. Only
+\verb|IPV6_HOPOPTS|, \verb|IPV6_RTHDR| and, if \verb|IPV6_RTHDR| presents,
+\verb|IPV6_DSTOPTS| are allowed.
+
+\paragraph{Example.}
+\addcontentsline{toc}{subsection}{Example}
+ The function \verb|get_flow_label| allocates
+private flow label.
+
+\begin{verbatim}
+int get_flow_label(int fd, struct sockaddr_in6 *dst, __u32 fl)
+{
+        int on = 1;
+        struct in6_flowlabel_req freq;
+
+        memset(&freq, 0, sizeof(freq));
+        freq.flr_label = htonl(fl);
+        freq.flr_action = IPV6_FL_A_GET;
+        freq.flr_flags = IPV6_FL_F_CREATE | IPV6_FL_F_EXCL;
+        freq.flr_share = IPV6_FL_S_EXCL;
+        memcpy(&freq.flr_dst, &dst->sin6_addr, 16);
+        if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
+                       &freq, sizeof(freq)) == -1) {
+                perror ("can't lease flowlabel");
+                return -1;
+        }
+        dst->sin6_flowinfo |= freq.flr_label;
+
+        if (setsockopt(fd, SOL_IPV6, IPV6_FLOWINFO_SEND,
+                       &on, sizeof(on)) == -1) {
+                perror ("can't send flowinfo");
+
+                freq.flr_action = IPV6_FL_A_PUT;
+                setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
+                           &freq, sizeof(freq));
+                return -1;
+        }
+        return 0;
+}
+\end{verbatim}
+
+A bit more complicated example using routing header can be found
+in \verb|ping6| utility (\verb|iputils| package). Linux rsvpd backend
+contains an example of using operation \verb|IPV6_FL_A_RENEW|.
+
+\paragraph{Listing flow labels.} 
+\addcontentsline{toc}{subsection}{Listing flow labels}
+List of currently allocated
+flow labels may be read from \verb|/proc/net/ip6_flowlabel|.
+
+\begin{verbatim}
+Label S Owner Users Linger Expires Dst                              Opt
+A1BE5 1 0     0     6      3       3ffe2400000000010a0020fffe71fb30 0
+\end{verbatim}
+
+\begin{itemize}
+\item \verb|Label| is hexadecimal flow label value.
+\item \verb|S| is sharing style.
+\item \verb|Owner| is ID of creator, it is zero, pid or uid, depending on
+               sharing style.
+\item \verb|Users| is number of applications using the label now.
+\item \verb|Linger| is \verb|linger| of this label in seconds.
+\item \verb|Expires| is time until expiration of the label in seconds. It may
+       be negative, if the label is in use.
+\item \verb|Dst| is IPv6 destination address.
+\item \verb|Opt| is length of options, associated with the label. Option
+       data are not accessible.
+\end{itemize}
+
+
+\paragraph{Flow labels and RSVP.} 
+\addcontentsline{toc}{subsection}{Flow labels and RSVP}
+RSVP daemon supports IPv6 flow labels
+without any modifications to standard ISI RAPI. Sender must allocate
+flow label, fill corresponding sender template and submit it to local rsvp
+daemon. rsvpd will check the label and start to announce it in PATH
+messages. Rsvpd on sender node will renew the flow label, so that it will not
+be reused before path state expires and all the intermediate
+routers and receiver purge flow state.
+
+\verb|rtap| utility is modified to parse flow labels. F.e.\ if user allocated
+flow label \verb|0xA1234|, he may write:
+
+\begin{verbatim}
+RTAP> sender 3ffe:2400::1/FL0xA1234 <Tspec>
+\end{verbatim}
+
+Receiver makes reservation with command:
+\begin{verbatim}
+RTAP> reserve ff 3ffe:2400::1/FL0xA1234 <Flowspec>
+\end{verbatim}
+
+\end{document}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0ab79c6090fd40cc127b06f80f85095873d1bae5 100644 (file)
@@ -0,0 +1,130 @@
+<!doctype linuxdoc system>
+
+<article>
+
+<title>ARPD Daemon
+<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
+<date>some_negative_number, 20 Sep 2001
+<abstract>
+<tt/arpd/ is daemon collecting gratuitous ARP information, saving
+it on local disk and feeding it to kernel on demand to avoid
+redundant broadcasting due to limited size of kernel ARP cache. 
+</abstract>
+
+
+<p><bf/Description/
+
+<p>The format of the command is:
+
+<tscreen><verb>
+       arpd OPTIONS [ INTERFACE [ INTERFACE ... ] ]
+</verb></tscreen>
+
+<p> <tt/OPTIONS/ are:
+
+<itemize>
+
+<item><tt/-l/ - dump <tt/arpd/ database to stdout and exit. Output consists
+of three columns: interface index, IP address and MAC address.
+Negative entries for dead hosts are also shown, in this case MAC address
+is replaced by word <tt/FAILED/ followed by colon and time when the fact
+that host is dead was proven the last time.
+
+<item><tt/-f FILE/  - read and load <tt/arpd/ database from <tt/FILE/
+in text format similar dumped by option <tt/-l/. Exit after load,
+probably listing resulting database, if option <tt/-l/ is also given.
+If <tt/FILE/ is <tt/-/, <tt/stdin/ is read to get ARP table.
+<item><tt/-b DATABASE/  - location of database file. Default location is
+<tt>/var/lib/arpd/arpd.db</tt>.
+
+<item><tt/-a NUMBER/ - <tt/arpd/ not only passively listens ARP on wire, but
+also send brodcast queries itself. <tt/NUMBER/ is number of such queries
+to make before destination is considered as dead. When <tt/arpd/ is started
+as kernel helper (i.e. with <tt/app_solicit/ enabled in <tt/sysctl/
+or even with option <tt/-k/) without this option and still did not learn enough
+information, you can observe 1 second gaps in service. Not fatal, but
+not good.
+
+<item><tt/-k/ - suppress sending broadcast queries by kernel. It takes
+sense together with option <tt/-a/.
+
+<item><tt/-n TIME/ - timeout of negative cache. When resolution fails <tt/arpd/
+suppresses further attempts to resolve for this period. It makes sense
+only together with option <tt/-k/. This timeout should not be too much
+longer than boot time of a typical host not supporting gratuitous ARP.
+Default value is 60 seconds.
+
+<item><tt/-R RATE/ - maximal steady rate of broadcasts sent by <tt/arpd/
+in packets per second. Default value is 1.
+
+<item><tt/-B NUMBER/ - number of broadcasts sent by <tt/arpd/ back to back.
+Default value is 3. Together with option <tt/-R/ this option allows
+to police broadcasting not to exceed <tt/B+R*T/ over any interval
+of time <tt/T/.
+
+</itemize>
+
+<p><tt/INTERFACE/ is name of networking inteface to watch.
+If no interfaces given, <tt/arpd/ monitors all the interfaces.
+In this case <tt/arpd/ does not adjust <tt/sysctl/ parameters,
+it is supposed user does this himself after <tt/arpd/ is started.
+
+
+<p> Signals
+
+<p> <tt/arpd/ exits gracefully syncing database and restoring adjusted
+<tt/sysctl/ parameters, when receives <tt/SIGINT/ or <tt/SIGTERM/.
+<tt/SIGHUP/ syncs database to disk. <tt/SIGUSR1/ sends some statistics
+to <tt/syslog/. Effect of another signals is undefined, they may corrupt
+database and leave <tt/sysctl/ parameters in an unpredictable state.
+
+<p> Note
+
+<p> In order to <tt/arpd/ be able to serve as ARP resolver, kernel must be
+compiled with the option <tt/CONFIG_ARPD/ and, in the case when interface list
+is not given on command line, variable <tt/app_solicit/
+on interfaces of interest should be set in <tt>/proc/sys/net/ipv4/neigh/*</tt>.
+If this is not made <tt/arpd/ still collects gratuitous ARP information
+in its database.
+
+<p> Examples
+
+<enum>
+<item> Start <tt/arpd/ to collect gratuitous ARP, but not messing
+with kernel functionality:
+
+<tscreen><verb>
+   arpd -b /var/tmp/arpd.db
+</verb></tscreen>
+
+<item> Look at result after some time:
+
+<tscreen><verb>
+   killall arpd
+   arpd -l -b /var/tmp/arpd.db
+</verb></tscreen>
+
+<item> To enable kernel helper, leaving leading role to kernel:
+
+<tscreen><verb>
+   arpd -b /var/tmp/arpd.db -a 1 eth0 eth1
+</verb></tscreen>
+
+<item> Completely replace kernel resolution on interfaces <tt/eth0/
+and <tt/eth1/. In this case kernel still does unicast probing to
+validate entries, but all the broadcast activity is suppressed
+and made under authority of <tt/arpd/: 
+
+<tscreen><verb>
+   arpd -b /var/tmp/arpd.db -a 3 -k eth0 eth1
+</verb></tscreen>
+
+This is mode which <tt/arpd/ is supposed to work normally.
+It is not default just to prevent occasional enabling of too aggressive
+mode occasionally.
+
+</enum>
+
+</article>
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2dce848e267debbf83fda00e31845e6e776182fe 100644 (file)
@@ -0,0 +1,16 @@
+#! /bin/bash
+# $1 = Temporary file . "string"
+# $2 = File to process . "string"
+# $3 = Page size . ie: a4 , letter ... "string"
+# $4 = Number of pages to fit on a single sheet . "numeric"
+
+if type psnup >&/dev/null; then
+       echo "psnup -$4 -p$3 $1 $2"
+       psnup -$4 -p$3 $1 $2
+elif type psmulti >&/dev/null; then
+       echo "psmulti $1 > $2"
+       psmulti $1 > $2
+else
+       echo "cp $1 $2"
+       cp $1 $2
+fi
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..5eaa4a896eb15ebf743d71b5b091c570c0234d47 100644 (file)
+\documentstyle[12pt,twoside]{article}
+\def\TITLE{IP Command Reference}
+\input preamble
+\begin{center}
+\Large\bf IP Command Reference.
+\end{center}
+
+
+\begin{center}
+{ \large Alexey~N.~Kuznetsov } \\
+\em Institute for Nuclear Research, Moscow \\
+\verb|kuznet@ms2.inr.ac.ru| \\
+\rm April 14, 1999
+\end{center}
+
+\vspace{5mm}
+
+\tableofcontents
+
+\newpage
+
+\section{About this document}
+
+This document presents a comprehensive description of the \verb|ip| utility
+from the \verb|iproute2| package. It is not a tutorial or user's guide.
+It is a {\em dictionary\/}, not explaining terms,
+but translating them into other terms, which may also be unknown to the reader.
+However, the document is self-contained and the reader, provided they have a
+basic networking background, will find enough information
+and examples to understand and configure Linux-2.2 IP and IPv6
+networking.
+
+This document is split into sections explaining \verb|ip| commands
+and options, decrypting \verb|ip| output and containing a few examples.
+More voluminous examples and some topics, which require more elaborate
+discussion, are in the appendix.
+
+The paragraphs beginning with NB contain side notes, warnings about
+bugs and design drawbacks. They may be skipped at the first reading.
+
+\section{{\tt ip} --- command syntax}
+
+The generic form of an \verb|ip| command is:
+\begin{verbatim}
+ip [ OPTIONS ] OBJECT [ COMMAND [ ARGUMENTS ]]
+\end{verbatim}
+where \verb|OPTIONS| is a set of optional modifiers affecting the
+general behaviour of the \verb|ip| utility or changing its output. All options
+begin with the character \verb|'-'| and may be used in either long or abbreviated 
+forms. Currently, the following options are available:
+
+\begin{itemize}
+\item \verb|-V|, \verb|-Version|
+
+--- print the version of the \verb|ip| utility and exit.
+
+
+\item \verb|-s|, \verb|-stats|, \verb|-statistics|
+
+--- output more information. If the option
+appears twice or more, the amount of information increases.
+As a rule, the information is statistics or some time values.
+
+
+\item \verb|-f|, \verb|-family| followed by a protocol family
+identifier: \verb|inet|, \verb|inet6| or \verb|link|.
+
+--- enforce the protocol family to use. If the option is not present,
+the protocol family is guessed from other arguments. If the rest of the command
+line does not give enough information to guess the family, \verb|ip| falls back to the default
+one, usually \verb|inet| or \verb|any|. \verb|link| is a special family
+identifier meaning that no networking protocol is involved.
+
+\item \verb|-4|
+
+--- shortcut for \verb|-family inet|.
+
+\item \verb|-6|
+
+--- shortcut for \verb|-family inet6|.
+
+\item \verb|-0|
+
+--- shortcut for \verb|-family link|.
+
+
+\item \verb|-o|, \verb|-oneline|
+
+--- output each record on a single line, replacing line feeds
+with the \verb|'\'| character. This is convenient when you want to
+count records with \verb|wc| or to \verb|grep| the output. The trivial
+script \verb|rtpr| converts the output back into readable form.
+
+\item \verb|-r|, \verb|-resolve|
+
+--- use the system's name resolver to print DNS names instead of
+host addresses.
+
+\begin{NB}
+ Do not use this option when reporting bugs or asking for advice.
+\end{NB}
+\begin{NB}
+ \verb|ip| never uses DNS to resolve names to addresses.
+\end{NB}
+
+\end{itemize}
+
+\verb|OBJECT| is the object to manage or to get information about.
+The object types currently understood by \verb|ip| are:
+
+\begin{itemize}
+\item \verb|link| --- network device
+\item \verb|address| --- protocol (IP or IPv6) address on a device
+\item \verb|neighbour| --- ARP or NDISC cache entry
+\item \verb|route| --- routing table entry
+\item \verb|rule| --- rule in routing policy database
+\item \verb|maddress| --- multicast address
+\item \verb|mroute| --- multicast routing cache entry
+\item \verb|tunnel| --- tunnel over IP
+\end{itemize}
+
+Again, the names of all objects may be written in full or
+abbreviated form, f.e.\ \verb|address| is abbreviated as \verb|addr|
+or just \verb|a|.
+
+\verb|COMMAND| specifies the action to perform on the object.
+The set of possible actions depends on the object type.
+As a rule, it is possible to \verb|add|, \verb|delete| and
+\verb|show| (or \verb|list|) objects, but some objects
+do not allow all of these operations or have some additional commands.
+The \verb|help| command is available for all objects. It prints
+out a list of available commands and argument syntax conventions.
+
+If no command is given, some default command is assumed.
+Usually it is \verb|list| or, if the objects of this class
+cannot be listed, \verb|help|.
+
+\verb|ARGUMENTS| is a list of arguments to the command.
+The arguments depend on the command and object. There are two types of arguments:
+{\em flags\/}, consisting of a single keyword, and {\em parameters\/},
+consisting of a keyword followed by a value. For convenience,
+each command has some {\em default parameter\/}
+which may be omitted. F.e.\ parameter \verb|dev| is the default
+for the {\tt ip link} command, so {\tt ip link ls eth0} is equivalent
+to {\tt ip link ls dev eth0}.
+In the command descriptions below such parameters
+are distinguished with the marker: ``(default)''.
+
+Almost all keywords may be abbreviated with several first (or even single)
+letters. The shortcuts are convenient when \verb|ip| is used interactively,
+but they are not recommended in scripts or when reporting bugs
+or asking for advice. ``Officially'' allowed abbreviations are listed
+in the document body.
+
+
+
+\section{{\tt ip} --- error messages}
+
+\verb|ip| may fail for one of the following reasons:
+
+\begin{itemize}
+\item
+A syntax error on the command line: an unknown keyword, incorrectly formatted
+IP address {\em et al\/}. In this case \verb|ip| prints an error message
+and exits. As a rule, the error message will contain information
+about the reason for the failure. Sometimes it also prints a help page.
+
+\item
+The arguments did not pass verification for self-consistency.
+
+\item
+\verb|ip| failed to compile a kernel request from the arguments
+because the user didn't give enough information.
+
+\item
+The kernel returned an error to some syscall. In this case \verb|ip|
+prints the error message, as it is output with \verb|perror(3)|,
+prefixed with a comment and a syscall identifier.
+
+\item
+The kernel returned an error to some RTNETLINK request.
+In this case \verb|ip| prints the error message, as it is output
+with \verb|perror(3)| prefixed with ``RTNETLINK answers:''.
+
+\end{itemize}
+
+All the operations are atomic, i.e.\ 
+if the \verb|ip| utility fails, it does not change anything
+in the system. One harmful exception is \verb|ip link| command
+(Sec.\ref{IP-LINK}, p.\pageref{IP-LINK}),
+which may change only some of the device parameters given
+on command line.
+
+It is difficult to list all the error messages (especially
+syntax errors). However, as a rule, their meaning is clear
+from the context of the command.
+
+The most common mistakes are:
+
+\begin{enumerate}
+\item Netlink is not configured in the kernel. The message is:
+\begin{verbatim}
+Cannot open netlink socket: Invalid value
+\end{verbatim}
+
+\item RTNETLINK is not configured in the kernel. In this case
+one of the following messages may be printed, depending on the command:
+\begin{verbatim}
+Cannot talk to rtnetlink: Connection refused
+Cannot send dump request: Connection refused
+\end{verbatim}
+
+\item The \verb|CONFIG_IP_MULTIPLE_TABLES| option was not selected
+when configuring the kernel. In this case any attempt to use the
+\verb|ip| \verb|rule| command will fail, f.e.
+\begin{verbatim}
+kuznet@kaiser $ ip rule list
+RTNETLINK error: Invalid argument
+dump terminated
+\end{verbatim}
+
+\end{enumerate}
+
+
+\section{{\tt ip link} --- network device configuration}
+\label{IP-LINK}
+
+\paragraph{Object:} A \verb|link| is a network device and the corresponding
+commands display and change the state of devices.
+
+\paragraph{Commands:} \verb|set| and \verb|show| (or \verb|list|).
+
+\subsection{{\tt ip link set} --- change device attributes}
+
+\paragraph{Abbreviations:} \verb|set|, \verb|s|.
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+\item \verb|dev NAME| (default)
+
+--- \verb|NAME| specifies the network device on which to operate.
+
+\item \verb|up| and \verb|down|
+
+--- change the state of the device to \verb|UP| or \verb|DOWN|.
+
+\item \verb|arp on| or \verb|arp off|
+
+--- change the \verb|NOARP| flag on the device.
+
+\begin{NB}
+This operation is {\em not allowed\/} if the device is in state \verb|UP|.
+Though neither the \verb|ip| utility nor the kernel check for this condition.
+You can get unpredictable results changing this flag while the
+device is running.
+\end{NB}
+
+\item \verb|multicast on| or \verb|multicast off|
+
+--- change the \verb|MULTICAST| flag on the device.
+
+\item \verb|dynamic on| or \verb|dynamic off|
+
+--- change the \verb|DYNAMIC| flag on the device.
+
+\item \verb|name NAME|
+
+--- change the name of the device. This operation is not
+recommended if the device is running or has some addresses
+already configured.
+
+\item \verb|txqueuelen NUMBER| or \verb|txqlen NUMBER|
+
+--- change the transmit queue length of the device.
+
+\item \verb|mtu NUMBER|
+
+--- change the MTU of the device.
+
+\item \verb|address LLADDRESS|
+
+--- change the station address of the interface.
+
+\item \verb|broadcast LLADDRESS|, \verb|brd LLADDRESS| or \verb|peer LLADDRESS|
+
+--- change the link layer broadcast address or the peer address when
+the interface is \verb|POINTOPOINT|.
+
+\vskip 1mm
+\begin{NB}
+For most devices (f.e.\ for Ethernet) changing the link layer
+broadcast address will break networking.
+Do not use it, if you do not understand what this operation really does.
+\end{NB}
+
+\end{itemize}
+
+\vskip 1mm
+\begin{NB}
+The {\tt ip} utility does not change the \verb|PROMISC| 
+or \verb|ALLMULTI| flags. These flags are considered
+obsolete and should not be changed administratively.
+\end{NB}
+
+\paragraph{Warning:} If multiple parameter changes are requested,
+\verb|ip| aborts immediately after any of the changes have failed.
+This is the only case when \verb|ip| can move the system to
+an unpredictable state. The solution is to avoid changing
+several parameters with one {\tt ip link set} call.
+
+\paragraph{Examples:}
+\begin{itemize}
+\item \verb|ip link set dummy address 00:00:00:00:00:01|
+
+--- change the station address of the interface \verb|dummy|.
+
+\item \verb|ip link set dummy up|
+
+--- start the interface \verb|dummy|.
+
+\end{itemize}
+
+
+\subsection{{\tt ip link show} --- display device attributes}
+\label{IP-LINK-SHOW}
+
+\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|,
+\verb|l|.
+
+\paragraph{Arguments:}
+\begin{itemize}
+\item \verb|dev NAME| (default)
+
+--- \verb|NAME| specifies the network device to show.
+If this argument is omitted all devices are listed.
+
+\item \verb|up|
+
+--- only display running interfaces.
+
+\end{itemize}
+
+
+\paragraph{Output format:}
+
+\begin{verbatim}
+kuznet@alisa:~ $ ip link ls eth0
+3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
+    link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
+kuznet@alisa:~ $ ip link ls sit0
+5: sit0@NONE: <NOARP,UP> mtu 1480 qdisc noqueue
+    link/sit 0.0.0.0 brd 0.0.0.0
+kuznet@alisa:~ $ ip link ls dummy
+2: dummy: <BROADCAST,NOARP> mtu 1500 qdisc noop
+    link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
+kuznet@alisa:~ $ 
+\end{verbatim}
+
+
+The number before each colon is an {\em interface index\/} or {\em ifindex\/}.
+This number uniquely identifies the interface. This is followed by the {\em interface name\/}
+(\verb|eth0|, \verb|sit0| etc.). The interface name is also
+unique at every given moment. However, the interface may disappear from the
+list (f.e.\ when the corresponding driver module is unloaded) and another
+one with the same name may be created later. Besides that,
+the administrator may change the name of any device with
+\verb|ip| \verb|link| \verb|set| \verb|name|
+to make it more intelligible.
+
+The interface name may have another name or \verb|NONE| appended 
+after the \verb|@| sign. This means that this device is bound to some other
+device,
+i.e.\ packets send through it are encapsulated and sent via the ``master''
+device. If the name is \verb|NONE|, the master is unknown.
+
+Then we see the interface {\em mtu\/} (``maximal transfer unit''). This determines
+the maximal size of data which can be sent as a single packet over this interface.
+
+{\em qdisc\/} (``queuing discipline'') shows the queuing algorithm used
+on the interface. Particularly, \verb|noqueue| means that this interface
+does not queue anything and \verb|noop| means that the interface is in blackhole
+mode i.e.\ all packets sent to it are immediately discarded.
+{\em qlen\/} is the default transmit queue length of the device measured
+in packets.
+
+The interface flags are summarized in the angle brackets.
+
+\begin{itemize}
+\item \verb|UP| --- the device is turned on. It is ready to accept
+packets for transmission and it may inject into the kernel packets received
+from other nodes on the network.
+
+\item \verb|LOOPBACK| --- the interface does not communicate with other
+hosts. All packets sent through it will be returned
+and nothing but bounced packets can be received.
+
+\item \verb|BROADCAST| --- the device has the facility to send packets
+to all hosts sharing the same link. A typical example is an Ethernet link.
+
+\item \verb|POINTOPOINT| --- the link has only two ends with one node
+attached to each end. All packets sent to this link will reach the peer
+and all packets received by us came from this single peer.
+
+If neither \verb|LOOPBACK| nor \verb|BROADCAST| nor \verb|POINTOPOINT|
+are set, the interface is assumed to be NMBA (Non-Broadcast Multi-Access).
+This is the most generic type of device and the most complicated one, because
+the host attached to a NBMA link has no means to send to anyone
+without additionally configured information.
+
+\item \verb|MULTICAST| --- is an advisory flag indicating that the interface
+is aware of multicasting i.e.\ sending packets to some subset of neighbouring
+nodes. Broadcasting is a particular case of multicasting, where the multicast
+group consists of all nodes on the link. It is important to emphasize
+that software {\em must not\/} interpret the absence of this flag as the inability
+to use multicasting on this interface. Any \verb|POINTOPOINT| and
+\verb|BROADCAST| link is multicasting by definition, because we have
+direct access to all the neighbours and, hence, to any part of them.
+Certainly, the use of high bandwidth multicast transfers is not recommended
+on broadcast-only links because of high expense, but it is not strictly
+prohibited.
+
+\item \verb|PROMISC| --- the device listens to and feeds to the kernel all
+traffic on the link even if it is not destined for us, not broadcasted
+and not destined for a multicast group of which we are member. Usually
+this mode exists only on broadcast links and is used by bridges and for network
+monitoring.
+
+\item \verb|ALLMULTI| --- the device receives all multicast packets
+wandering on the link. This mode is used by multicast routers.
+
+\item \verb|NOARP| --- this flag is different from the other ones. It has
+no invariant value and its interpretation depends on the network protocols
+involved. As a rule, it indicates that the device needs no address
+resolution and that the software or hardware knows how to deliver packets
+without any help from the protocol stacks.
+
+\item \verb|DYNAMIC| --- is an advisory flag indicating that the interface is
+dynamically created and destroyed.
+
+\item \verb|SLAVE| --- this interface is bonded to some other interfaces
+to share link capacities.
+
+\end{itemize}
+
+\vskip 1mm
+\begin{NB}
+There are other flags but they are either obsolete (\verb|NOTRAILERS|)
+or not implemented (\verb|DEBUG|) or specific to some devices
+(\verb|MASTER|, \verb|AUTOMEDIA| and \verb|PORTSEL|). We do not discuss
+them here.
+\end{NB}
+\begin{NB}
+The values of \verb|PROMISC| and \verb|ALLMULTI| flags
+shown by the \verb|ifconfig| utility and by the \verb|ip| utility
+are {\em different\/}. \verb|ip link ls| shows the true device state,
+while \verb|ifconfig| shows the virtual state which was set with
+\verb|ifconfig| itself.
+\end{NB}
+
+
+The second line contains information on the link layer addresses
+associated with the device. The first word (\verb|ether|, \verb|sit|)
+defines the interface hardware type. This type determines the format and semantics
+of the addresses and is logically part of the address.
+The default format of the station address and the broadcast address
+(or the peer address for pointopoint links) is a
+sequence of hexadecimal bytes separated by colons, but some link
+types may have their natural address format, f.e.\ addresses
+of tunnels over IP are printed as dotted-quad IP addresses.
+
+\vskip 1mm
+\begin{NB}
+  NBMA links have no well-defined broadcast or peer address,
+  however this field may contain useful information, f.e.\
+  about the address of broadcast relay or about the address of the ARP server.
+\end{NB}
+\begin{NB}
+Multicast addresses are not shown by this command, see
+\verb|ip maddr ls| in~Sec.\ref{IP-MADDR} (p.\pageref{IP-MADDR} of this
+document).
+\end{NB}
+
+
+\paragraph{Statistics:} With the \verb|-statistics| option, \verb|ip| also
+prints interface statistics:
+
+\begin{verbatim}
+kuznet@alisa:~ $ ip -s link ls eth0
+3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
+    link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
+    RX: bytes  packets  errors  dropped overrun mcast   
+    2449949362 2786187  0       0       0       0      
+    TX: bytes  packets  errors  dropped carrier collsns 
+    178558497  1783945  332     0       332     35172  
+kuznet@alisa:~ $
+\end{verbatim}
+\verb|RX:| and \verb|TX:| lines summarize receiver and transmitter
+statistics. They contain:
+\begin{itemize}
+\item \verb|bytes| --- the total number of bytes received or transmitted
+on the interface. This number wraps when the maximal length of the data type
+natural for the architecture is exceeded, so continuous monitoring requires
+a user level daemon snapping it periodically.
+\item \verb|packets| --- the total number of packets received or transmitted
+on the interface.
+\item \verb|errors| --- the total number of receiver or transmitter errors.
+\item \verb|dropped| --- the total number of packets dropped due to lack
+of resources.
+\item \verb|overrun| --- the total number of receiver overruns resulting
+in dropped packets. As a rule, if the interface is overrun, it means
+serious problems in the kernel or that your machine is too slow
+for this interface.
+\item \verb|mcast| --- the total number of received multicast packets. This option
+is only supported by a few devices.
+\item \verb|carrier| --- total number of link media failures f.e.\ because
+of lost carrier.
+\item \verb|collsns| --- the total number of collision events
+on Ethernet-like media. This number may have a different sense on other
+link types.
+\item \verb|compressed| --- the total number of compressed packets. This is
+available only for links using VJ header compression.
+\end{itemize}
+
+
+If the \verb|-s| option is entered twice or more,
+\verb|ip| prints more detailed statistics on receiver
+and transmitter errors.
+
+\begin{verbatim}
+kuznet@alisa:~ $ ip -s -s link ls eth0
+3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
+    link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
+    RX: bytes  packets  errors  dropped overrun mcast   
+    2449949362 2786187  0       0       0       0      
+    RX errors: length   crc     frame   fifo    missed
+               0        0       0       0       0      
+    TX: bytes  packets  errors  dropped carrier collsns 
+    178558497  1783945  332     0       332     35172  
+    TX errors: aborted  fifo    window  heartbeat
+               0        0       0       332    
+kuznet@alisa:~ $
+\end{verbatim}
+These error names are pure Ethernetisms. Other devices
+may have non zero values in these fields but they may be
+interpreted differently.
+
+
+\section{{\tt ip address} --- protocol address management}
+
+\paragraph{Abbreviations:} \verb|address|, \verb|addr|, \verb|a|.
+
+\paragraph{Object:} The \verb|address| is a protocol (IP or IPv6) address attached
+to a network device. Each device must have at least one address
+to use the corresponding protocol. It is possible to have several
+different addresses attached to one device. These addresses are not
+discriminated, so that the term {\em alias\/} is not quite appropriate
+for them and we do not use it in this document.
+
+The \verb|ip addr| command displays addresses and their properties,
+adds new addresses and deletes old ones.
+
+\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|flush| and \verb|show|
+(or \verb|list|).
+
+
+\subsection{{\tt ip address add} --- add a new protocol address}
+\label{IP-ADDR-ADD}
+
+\paragraph{Abbreviations:} \verb|add|, \verb|a|.
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+\item \verb|dev NAME|
+
+\noindent--- the name of the device to add the address to.
+
+\item \verb|local ADDRESS| (default)
+
+--- the address of the interface. The format of the address depends
+on the protocol. It is a dotted quad for IP and a sequence of hexadecimal halfwords
+separated by colons for IPv6. The \verb|ADDRESS| may be followed by
+a slash and a decimal number which encodes the network prefix length.
+
+
+\item \verb|peer ADDRESS|
+
+--- the address of the remote endpoint for pointopoint interfaces.
+Again, the \verb|ADDRESS| may be followed by a slash and a decimal number,
+encoding the network prefix length. If a peer address is specified,
+the local address {\em cannot\/} have a prefix length. The network prefix is associated
+with the peer rather than with the local address.
+
+
+\item \verb|broadcast ADDRESS|
+
+--- the broadcast address on the interface.
+
+It is possible to use the special symbols \verb|'+'| and \verb|'-'|
+instead of the broadcast address. In this case, the broadcast address
+is derived by setting/resetting the host bits of the interface prefix.
+
+\vskip 1mm
+\begin{NB}
+Unlike \verb|ifconfig|, the \verb|ip| utility {\em does not\/} set any broadcast
+address unless explicitly requested.
+\end{NB}
+
+
+\item \verb|label NAME|
+
+--- Each address may be tagged with a label string.
+In order to preserve compatibility with Linux-2.0 net aliases,
+this string must coincide with the name of the device or must be prefixed
+with the device name followed by colon.
+
+
+\item \verb|scope SCOPE_VALUE|
+
+--- the scope of the area where this address is valid.
+The available scopes are listed in file \verb|/etc/iproute2/rt_scopes|.
+Predefined scope values are:
+
+ \begin{itemize}
+       \item \verb|global| --- the address is globally valid.
+       \item \verb|site| --- (IPv6 only) the address is site local,
+       i.e.\ it is valid inside this site.
+       \item \verb|link| --- the address is link local, i.e.\ 
+       it is valid only on this device.
+       \item \verb|host| --- the address is valid only inside this host.
+ \end{itemize}
+
+Appendix~\ref{ADDR-SEL} (p.\pageref{ADDR-SEL} of this document)
+contains more details on address scopes.
+
+\end{itemize}
+
+\paragraph{Examples:}
+\begin{itemize}
+\item \verb|ip addr add 127.0.0.1/8 dev lo brd + scope host|
+
+--- add the usual loopback address to the loopback device.
+
+\item \verb|ip addr add 10.0.0.1/24 brd + dev eth0 label eth0:Alias|
+
+--- add the address 10.0.0.1 with prefix length 24 (i.e.\ netmask
+\verb|255.255.255.0|), standard broadcast and label \verb|eth0:Alias|
+to the interface \verb|eth0|.
+\end{itemize}
+
+
+\subsection{{\tt ip address delete} --- delete a protocol address}
+
+\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
+
+\paragraph{Arguments:} coincide with the arguments of \verb|ip addr add|.
+The device name is a required argument. The rest are optional.
+If no arguments are given, the first address is deleted.
+
+\paragraph{Examples:}
+\begin{itemize}
+\item \verb|ip addr del 127.0.0.1/8 dev lo|
+
+--- deletes the loopback address from the loopback device.
+It would be best not to repeat this experiment.
+
+\item Disable IP on the interface \verb|eth0|:
+\begin{verbatim}
+  while ip -f inet addr del dev eth0; do
+    : nothing
+  done
+\end{verbatim}
+Another method to disable IP on an interface using {\tt ip addr flush}
+may be found in sec.\ref{IP-ADDR-FLUSH}, p.\pageref{IP-ADDR-FLUSH}.
+
+\end{itemize}
+
+
+\subsection{{\tt ip address show} --- display protocol addresses}
+
+\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|,
+\verb|l|.
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+\item \verb|dev NAME| (default)
+
+--- the name of the device.
+
+\item \verb|scope SCOPE_VAL|
+
+--- only list addresses with this scope.
+
+\item \verb|to PREFIX|
+
+--- only list addresses matching this prefix.
+
+\item \verb|label PATTERN|
+
+--- only list addresses with labels matching the \verb|PATTERN|.
+\verb|PATTERN| is a usual shell style pattern.
+
+
+\item \verb|dynamic| and \verb|permanent|
+
+--- (IPv6 only) only list addresses installed due to stateless
+address configuration or only list permanent (not dynamic) addresses.
+
+\item \verb|tentative|
+
+--- (IPv6 only) only list addresses which did not pass duplicate
+address detection.
+
+\item \verb|deprecated|
+
+--- (IPv6 only) only list deprecated addresses.
+
+
+\item  \verb|primary| and \verb|secondary|
+
+--- only list primary (or secondary) addresses.
+
+\end{itemize}
+
+
+\paragraph{Output format:}
+
+\begin{verbatim}
+kuznet@alisa:~ $ ip addr ls eth0
+3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
+    link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
+    inet 193.233.7.90/24 brd 193.233.7.255 scope global eth0
+    inet6 3ffe:2400:0:1:2a0:ccff:fe66:1878/64 scope global dynamic 
+       valid_lft forever preferred_lft 604746sec
+    inet6 fe80::2a0:ccff:fe66:1878/10 scope link 
+kuznet@alisa:~ $ 
+\end{verbatim}
+
+The first two lines coincide with the output of \verb|ip link ls|.
+It is natural to interpret link layer addresses
+as addresses of the protocol family \verb|AF_PACKET|.
+
+Then the list of IP and IPv6 addresses follows, accompanied by
+additional address attributes: scope value (see Sec.\ref{IP-ADDR-ADD},
+p.\pageref{IP-ADDR-ADD} above), flags and the address label.
+
+Address flags are set by the kernel and cannot be changed
+administratively. Currently, the following flags are defined:
+
+\begin{enumerate}
+\item \verb|secondary|
+
+--- the address is not used when selecting the default source address
+of outgoing packets (Cf.\ Appendix~\ref{ADDR-SEL}, p.\pageref{ADDR-SEL}.).
+An IP address becomes secondary if another address with the same
+prefix bits already exists. The first address is primary.
+It is the leader of the group of all secondary addresses. When the leader
+is deleted, all secondaries are purged too.
+
+
+\item \verb|dynamic|
+
+--- the address was created due to stateless autoconfiguration~\cite{RFC-ADDRCONF}.
+In this case the output also contains information on times, when
+the address is still valid. After \verb|preferred_lft| expires the address is
+moved to the deprecated state. After \verb|valid_lft| expires the address
+is finally invalidated.
+
+\item \verb|deprecated|
+
+--- the address is deprecated, i.e.\ it is still valid, but cannot
+be used by newly created connections.
+
+\item \verb|tentative|
+
+--- the address is not used because duplicate address detection~\cite{RFC-ADDRCONF}
+is still not complete or failed.
+
+\end{enumerate}
+
+
+\subsection{{\tt ip address flush} --- flush protocol addresses}
+\label{IP-ADDR-FLUSH}
+
+\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
+
+\paragraph{Description:}This command flushes the protocol addresses
+selected by some criteria.
+
+\paragraph{Arguments:} This command has the same arguments as \verb|show|.
+The difference is that it does not run when no arguments are given.
+
+\paragraph{Warning:} This command (and other \verb|flush| commands
+described below) is pretty dangerous. If you make a mistake, it will
+not forgive it, but will cruelly purge all the addresses.
+
+\paragraph{Statistics:} With the \verb|-statistics| option, the command
+becomes verbose. It prints out the number of deleted addresses and the number
+of rounds made to flush the address list. If this option is given
+twice, \verb|ip addr flush| also dumps all the deleted addresses
+in the format described in the previous subsection.
+
+\paragraph{Example:} Delete all the addresses from the private network
+10.0.0.0/8:
+\begin{verbatim}
+netadm@amber:~ # ip -s -s a f to 10/8
+2: dummy    inet 10.7.7.7/16 brd 10.7.255.255 scope global dummy
+3: eth0    inet 10.10.7.7/16 brd 10.10.255.255 scope global eth0
+4: eth1    inet 10.8.7.7/16 brd 10.8.255.255 scope global eth1
+
+*** Round 1, deleting 3 addresses ***
+*** Flush is complete after 1 round ***
+netadm@amber:~ # 
+\end{verbatim}
+Another instructive example is disabling IP on all the Ethernets:
+\begin{verbatim}
+netadm@amber:~ # ip -4 addr flush label "eth*"
+\end{verbatim}
+And the last example shows how to flush all the IPv6 addresses
+acquired by the host from stateless address autoconfiguration
+after you enabled forwarding or disabled autoconfiguration.
+\begin{verbatim}
+netadm@amber:~ # ip -6 addr flush dynamic
+\end{verbatim}
+
+
+
+\section{{\tt ip neighbour} --- neighbour/arp tables management}
+
+\paragraph{Abbreviations:} \verb|neighbour|, \verb|neighbor|, \verb|neigh|,
+\verb|n|.
+
+\paragraph{Object:} \verb|neighbour| objects establish bindings between protocol
+addresses and link layer addresses for hosts sharing the same link.
+Neighbour entries are organized into tables. The IPv4 neighbour table
+is known by another name --- the ARP table.
+
+The corresponding commands display neighbour bindings
+and their properties, add new neighbour entries and delete old ones.
+
+\paragraph{Commands:} \verb|add|, \verb|change|, \verb|replace|,
+\verb|delete|, \verb|flush| and \verb|show| (or \verb|list|).
+
+\paragraph{See also:} Appendix~\ref{PROXY-NEIGH}, p.\pageref{PROXY-NEIGH}
+describes how to manage proxy ARP/NDISC with the \verb|ip| utility.
+
+
+\subsection{{\tt ip neighbour add} --- add a new neighbour entry\\
+       {\tt ip neighbour change} --- change an existing entry\\
+       {\tt ip neighbour replace} --- add a new entry or change an existing one}
+
+\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
+\verb|replace|,        \verb|repl|.
+
+\paragraph{Description:} These commands create new neighbour records
+or update existing ones.
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+\item \verb|to ADDRESS| (default)
+
+--- the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
+
+\item \verb|dev NAME|
+
+--- the interface to which this neighbour is attached.
+
+
+\item \verb|lladdr LLADDRESS|
+
+--- the link layer address of the neighbour. \verb|LLADDRESS| can also be
+\verb|null|. 
+
+\item \verb|nud NUD_STATE|
+
+--- the state of the neighbour entry. \verb|nud| is an abbreviation for ``Neighbour
+Unreachability Detection''. The state can take one of the following values:
+
+\begin{enumerate}
+\item \verb|permanent| --- the neighbour entry is valid forever and can be only be removed
+administratively.
+\item \verb|noarp| --- the neighbour entry is valid. No attempts to validate
+this entry will be made but it can be removed when its lifetime expires.
+\item \verb|reachable| --- the neighbour entry is valid until the reachability
+timeout expires.
+\item \verb|stale| --- the neighbour entry is valid but suspicious.
+This option to \verb|ip neigh| does not change the neighbour state if
+it was valid and the address is not changed by this command.
+\end{enumerate}
+
+\end{itemize}
+
+\paragraph{Examples:}
+\begin{itemize}
+\item \verb|ip neigh add 10.0.0.3 lladdr 0:0:0:0:0:1 dev eth0 nud perm|
+
+--- add a permanent ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|.
+
+\item \verb|ip neigh chg 10.0.0.3 dev eth0 nud reachable|
+
+--- change its state to \verb|reachable|.
+\end{itemize}
+
+
+\subsection{{\tt ip neighbour delete} --- delete a neighbour entry}
+
+\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
+
+\paragraph{Description:} This command invalidates a neighbour entry.
+
+\paragraph{Arguments:} The arguments are the same as with \verb|ip neigh add|,
+except that \verb|lladdr| and \verb|nud| are ignored.
+
+
+\paragraph{Example:}
+\begin{itemize}
+\item \verb|ip neigh del 10.0.0.3 dev eth0|
+
+--- invalidate an ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|.
+
+\end{itemize}
+
+\begin{NB}
+ The deleted neighbour entry will not disappear from the tables
+ immediately. If it is in use it cannot be deleted until the last
+ client releases it. Otherwise it will be destroyed during
+ the next garbage collection.
+\end{NB}
+
+
+\paragraph{Warning:} Attempts to delete or manually change
+a \verb|noarp| entry created by the kernel may result in unpredictable behaviour.
+Particularly, the kernel may try to resolve this address even
+on a \verb|NOARP| interface or if the address is multicast or broadcast.
+
+
+\subsection{{\tt ip neighbour show} --- list neighbour entries}
+
+\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|.
+
+\paragraph{Description:}This commands displays neighbour tables.
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+
+\item \verb|to ADDRESS| (default)
+
+--- the prefix selecting the neighbours to list.
+
+\item \verb|dev NAME|
+
+--- only list the neighbours attached to this device.
+
+\item \verb|unused|
+
+--- only list neighbours which are not currently in use.
+
+\item \verb|nud NUD_STATE|
+
+--- only list neighbour entries in this state. \verb|NUD_STATE| takes
+values listed below or the special value \verb|all| which means all states.
+This option may occur more than once. If this option is absent, \verb|ip|
+lists all entries except for \verb|none| and \verb|noarp|.
+
+\end{itemize}
+
+
+\paragraph{Output format:}
+
+\begin{verbatim}
+kuznet@alisa:~ $ ip neigh ls
+:: dev lo lladdr 00:00:00:00:00:00 nud noarp
+fe80::200:cff:fe76:3f85 dev eth0 lladdr 00:00:0c:76:3f:85 router \
+    nud stale
+0.0.0.0 dev lo lladdr 00:00:00:00:00:00 nud noarp
+193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 nud reachable
+193.233.7.85 dev eth0 lladdr 00:e0:1e:63:39:00 nud stale
+kuznet@alisa:~ $ 
+\end{verbatim}
+
+The first word of each line is the protocol address of the neighbour.
+Then the device name follows. The rest of the line describes the contents of
+the neighbour entry identified by the pair (device, address).
+
+\verb|lladdr| is the link layer address of the neighbour.
+
+\verb|nud| is the state of the ``neighbour unreachability detection'' machine
+for this entry. The detailed description of the neighbour
+state machine can be found in~\cite{RFC-NDISC}. Here is the full list
+of the states with short descriptions:
+
+\begin{enumerate}
+\item\verb|none| --- the state of the neighbour is void.
+\item\verb|incomplete| --- the neighbour is in the process of resolution.
+\item\verb|reachable| --- the neighbour is valid and apparently reachable.
+\item\verb|stale| --- the neighbour is valid, but is probably already
+unreachable, so the kernel will try to check it at the first transmission.
+\item\verb|delay| --- a packet has been sent to the stale neighbour and the kernel is waiting
+for confirmation.
+\item\verb|probe| --- the delay timer expired but no confirmation was received.
+The kernel has started to probe the neighbour with ARP/NDISC messages.
+\item\verb|failed| --- resolution has failed.
+\item\verb|noarp| --- the neighbour is valid. No attempts to check the entry
+will be made.
+\item\verb|permanent| --- it is a \verb|noarp| entry, but only the administrator
+may remove the entry from the neighbour table.
+\end{enumerate}
+
+The link layer address is valid in all states except for \verb|none|,
+\verb|failed| and \verb|incomplete|.
+
+IPv6 neighbours can be marked with the additional flag \verb|router|
+which means that the neighbour introduced itself as an IPv6 router~\cite{RFC-NDISC}.
+
+\paragraph{Statistics:} The \verb|-statistics| option displays some usage
+statistics, f.e.\
+
+\begin{verbatim}
+kuznet@alisa:~ $ ip -s n ls 193.233.7.254
+193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
+    nud reachable
+kuznet@alisa:~ $ 
+\end{verbatim}
+
+Here \verb|ref| is the number of users of this entry
+and \verb|used| is a triplet of time intervals in seconds
+separated by slashes. In this case they show that:
+
+\begin{enumerate}
+\item the entry was used 12 seconds ago.
+\item the entry was confirmed 13 seconds ago.
+\item the entry was updated 20 seconds ago.
+\end{enumerate}
+
+\subsection{{\tt ip neighbour flush} --- flush neighbour entries}
+
+\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
+
+\paragraph{Description:}This command flushes neighbour tables, selecting
+entries to flush by some criteria.
+
+\paragraph{Arguments:} This command has the same arguments as \verb|show|.
+The differences are that it does not run when no arguments are given,
+and that the default neighbour states to be flushed do not include
+\verb|permanent| and \verb|noarp|.
+
+
+\paragraph{Statistics:} With the \verb|-statistics| option, the command
+becomes verbose. It prints out the number of deleted neighbours and the number
+of rounds made to flush the neighbour table. If the option is given
+twice, \verb|ip neigh flush| also dumps all the deleted neighbours
+in the format described in the previous subsection.
+
+\paragraph{Example:}
+\begin{verbatim}
+netadm@alisa:~ # ip -s -s n f 193.233.7.254
+193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
+    nud reachable
+
+*** Round 1, deleting 1 entries ***
+*** Flush is complete after 1 round ***
+netadm@alisa:~ # 
+\end{verbatim}
+
+
+\section{{\tt ip route} --- routing table management}
+\label{IP-ROUTE}
+
+\paragraph{Abbreviations:} \verb|route|, \verb|ro|, \verb|r|.
+
+\paragraph{Object:} \verb|route| entries in the kernel routing tables keep
+information about paths to other networked nodes.
+
+Each route entry has a {\em key\/} consisting of a {\em prefix\/}
+(i.e.\ a pair containing a network address and the length of its mask) and,
+optionally, the TOS value. An IP packet matches the route if the highest
+bits of its destination address are equal to the route prefix at least
+up to the prefix length and if the TOS of the route is zero or equal to
+the TOS of the packet.
+If several routes match the packet, the following pruning rules
+are used to select the best one (see~\cite{RFC1812}):
+\begin{enumerate}
+\item The longest matching prefix is selected. All shorter ones
+are dropped.
+
+\item If the TOS of some route with the longest prefix is equal to the TOS
+of the packet, the routes with different TOS are dropped.
+
+If no exact TOS match was found and routes with TOS=0 exist,
+the rest of routes are pruned.
+
+Otherwise, the route lookup fails.
+
+\item If several routes remain after the previous steps, then
+the routes with the best preference values are selected.
+
+\item If we still have several routes, then the {\em first\/} of them
+is selected.
+
+\begin{NB}
+ Note the ambiguity of the last step. Unfortunately, Linux
+ historically allows such a bizarre situation. The sense of the
+word ``first'' depends on the order of route additions and it is practically
+impossible to maintain a bundle of such routes in this order.
+\end{NB}
+
+For simplicity we will limit ourselves to the case where such a situation
+is impossible and routes are uniquely identified by the triplet
+\{prefix, tos, preference\}. Actually, it is impossible to create
+non-unique routes with \verb|ip| commands described in this section.
+
+One useful exception to this rule is the default route on non-forwarding
+hosts. It is ``officially'' allowed to have several fallback routes
+when several routers are present on directly connected networks.
+In this case, Linux-2.2 makes ``dead gateway detection''~\cite{RFC1122}
+controlled by neighbour unreachability detection and by advice
+from transport protocols to select a working router, so the order
+of the routes is not essential. However, in this case,
+fiddling with default routes manually is not recommended. Use the Router Discovery
+protocol (see Appendix~\ref{EXAMPLE-SETUP}, p.\pageref{EXAMPLE-SETUP})
+instead. Actually, Linux-2.2 IPv6 does not give user level applications
+any access to default routes.
+\end{enumerate}
+
+Certainly, the steps above are not performed exactly
+in this sequence. Instead, the routing table in the kernel is kept
+in some data structure to achieve the final result
+with minimal cost. However, not depending on a particular
+routing algorithm implemented in the kernel, we can summarize
+the statements above as: a route is identified by the triplet
+\{prefix, tos, preference\}. This {\em key\/} lets us locate
+the route in the routing table.
+
+\paragraph{Route attributes:} Each route key refers to a routing
+information record containing
+the data required to deliver IP packets (f.e.\ output device and
+next hop router) and some optional attributes (f.e. the path MTU or
+the preferred source address when communicating with this destination).
+These attributes are described in the following subsection.
+
+\paragraph{Route types:} \label{IP-ROUTE-TYPES}
+It is important that the set
+of required and optional attributes depend on the route {\em type\/}.
+The most important route type
+is \verb|unicast|. It describes real paths to other hosts.
+As a rule, common routing tables contain only such routes. However,
+there are other types of routes with different semantics. The
+full list of types understood by Linux-2.2 is:
+\begin{itemize}
+\item \verb|unicast| --- the route entry describes real paths to the
+destinations covered by the route prefix.
+\item \verb|unreachable| --- these destinations are unreachable. Packets
+are discarded and the ICMP message {\em host unreachable\/} is generated.
+The local senders get an \verb|EHOSTUNREACH| error.
+\item \verb|blackhole| --- these destinations are unreachable. Packets
+are discarded silently. The local senders get an \verb|EINVAL| error.
+\item \verb|prohibit| --- these destinations are unreachable. Packets
+are discarded and the ICMP message {\em communication administratively
+prohibited\/} is generated. The local senders get an \verb|EACCES| error.
+\item \verb|local| --- the destinations are assigned to this
+host. The packets are looped back and delivered locally.
+\item \verb|broadcast| --- the destinations are broadcast addresses.
+The packets are sent as link broadcasts.
+\item \verb|throw| --- a special control route used together with policy
+rules (see sec.\ref{IP-RULE}, p.\pageref{IP-RULE}). If such a route is selected, lookup
+in this table is terminated pretending that no route was found.
+Without policy routing it is equivalent to the absence of the route in the routing
+table. The packets are dropped and the ICMP message {\em net unreachable\/}
+is generated. The local senders get an \verb|ENETUNREACH| error.
+\item \verb|nat| --- a special NAT route. Destinations covered by the prefix
+are considered to be dummy (or external) addresses which require translation
+to real (or internal) ones before forwarding. The addresses to translate to
+are selected with the attribute \verb|via|. More about NAT is
+in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
+\item \verb|anycast| --- ({\em not implemented\/}) the destinations are
+{\em anycast\/} addresses assigned to this host. They are mainly equivalent
+to \verb|local| with one difference: such addresses are invalid when used
+as the source address of any packet.
+\item \verb|multicast| --- a special type used for multicast routing.
+It is not present in normal routing tables.
+\end{itemize}
+
+\paragraph{Route tables:} Linux-2.2 can pack routes into several routing
+tables identified by a number in the range from 1 to 255 or by
+name from the file \verb|/etc/iproute2/rt_tables|. By default all normal
+routes are inserted into the \verb|main| table (ID 254) and the kernel only uses
+this table when calculating routes.
+
+Actually, one other table always exists, which is invisible but
+even more important. It is the \verb|local| table (ID 255). This table
+consists of routes for local and broadcast addresses. The kernel maintains
+this table automatically and the administrator usually need not modify it
+or even look at it.
+
+The multiple routing tables enter the game when {\em policy routing\/}
+is used. See sec.\ref{IP-RULE}, p.\pageref{IP-RULE}.
+In this case, the table identifier effectively becomes
+one more parameter, which should be added to the triplet
+\{prefix, tos, preference\} to uniquely identify the route.
+
+
+\subsection{{\tt ip route add} --- add a new route\\
+       {\tt ip route change} --- change a route\\
+       {\tt ip route replace} --- change a route or add a new one}
+\label{IP-ROUTE-ADD}
+
+\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
+       \verb|replace|, \verb|repl|.
+
+
+\paragraph{Arguments:}
+\begin{itemize}
+\item \verb|to PREFIX| or \verb|to TYPE PREFIX| (default)
+
+--- the destination prefix of the route. If \verb|TYPE| is omitted,
+\verb|ip| assumes type \verb|unicast|. Other values of \verb|TYPE|
+are listed above. \verb|PREFIX| is an IP or IPv6 address optionally followed
+by a slash and the prefix length. If the length of the prefix is missing,
+\verb|ip| assumes a full-length host route. There is also a special
+\verb|PREFIX| --- \verb|default| --- which is equivalent to IP \verb|0/0| or
+to IPv6 \verb|::/0|.
+
+\item \verb|tos TOS| or \verb|dsfield TOS|
+
+--- the Type Of Service (TOS) key. This key has no associated mask and
+the longest match is understood as: First, compare the TOS
+of the route and of the packet. If they are not equal, then the packet
+may still match a route with a zero TOS. \verb|TOS| is either an 8 bit hexadecimal
+number or an identifier from {\tt /etc/iproute2/rt\_dsfield}.
+
+
+\item \verb|metric NUMBER| or \verb|preference NUMBER|
+
+--- the preference value of the route. \verb|NUMBER| is an arbitrary 32bit number.
+
+\item \verb|table TABLEID|
+
+--- the table to add this route to.
+\verb|TABLEID| may be a number or a string from the file
+\verb|/etc/iproute2/rt_tables|. If this parameter is omitted,
+\verb|ip| assumes the \verb|main| table, with the exception of
+\verb|local|, \verb|broadcast| and \verb|nat| routes, which are
+put into the \verb|local| table by default.
+
+\item \verb|dev NAME|
+
+--- the output device name.
+
+\item \verb|via ADDRESS|
+
+--- the address of the nexthop router. Actually, the sense of this field depends
+on the route type. For normal \verb|unicast| routes it is either the true nexthop
+router or, if it is a direct route installed in BSD compatibility mode,
+it can be a local address of the interface.
+For NAT routes it is the first address of the block of translated IP destinations.
+
+\item \verb|src ADDRESS|
+
+--- the source address to prefer when sending to the destinations
+covered by the route prefix.
+
+\item \verb|realm REALMID|
+
+--- the realm to which this route is assigned.
+\verb|REALMID| may be a number or a string from the file
+\verb|/etc/iproute2/rt_realms|. Sec.\ref{RT-REALMS} (p.\pageref{RT-REALMS})
+contains more information on realms.
+
+\item \verb|mtu MTU| or \verb|mtu lock MTU|
+
+--- the MTU along the path to the destination. If the modifier \verb|lock| is
+not used, the MTU may be updated by the kernel due to Path MTU Discovery.
+If the modifier \verb|lock| is used, no path MTU discovery will be tried,
+all packets will be sent without the DF bit in IPv4 case
+or fragmented to MTU for IPv6.
+
+\item \verb|window NUMBER|
+
+--- the maximal window for TCP to advertise to these destinations,
+measured in bytes. It limits maximal data bursts that our TCP
+peers are allowed to send to us.
+
+\item \verb|rtt NUMBER|
+
+--- the initial RTT (``Round Trip Time'') estimate.
+
+
+\item \verb|rttvar NUMBER|
+
+--- \threeonly the initial RTT variance estimate.
+
+
+\item \verb|ssthresh NUMBER|
+
+--- \threeonly an estimate for the initial slow start threshold.
+
+
+\item \verb|cwnd NUMBER|
+
+--- \threeonly the clamp for congestion window. It is ignored if the \verb|lock|
+    flag is not used.
+
+
+\item \verb|advmss NUMBER|
+
+--- \threeonly the MSS (``Maximal Segment Size'') to advertise to these
+    destinations when establishing TCP connections. If it is not given,
+    Linux uses a default value calculated from the first hop device MTU.
+
+\begin{NB}
+  If the path to these destination is asymmetric, this guess may be wrong.
+\end{NB}
+
+\item \verb|reordering NUMBER|
+
+--- \threeonly Maximal reordering on the path to this destination.
+    If it is not given, Linux uses the value selected with \verb|sysctl|
+    variable \verb|net/ipv4/tcp_reordering|.
+
+
+
+\item \verb|nexthop NEXTHOP|
+
+--- the nexthop of a multipath route. \verb|NEXTHOP| is a complex value
+with its own syntax similar to the top level argument lists:
+\begin{itemize}
+\item \verb|via ADDRESS| is the nexthop router.
+\item \verb|dev NAME| is the output device.
+\item \verb|weight NUMBER| is a weight for this element of a multipath
+route reflecting its relative bandwidth or quality.
+\end{itemize}
+
+\item \verb|scope SCOPE_VAL|
+
+--- the scope of the destinations covered by the route prefix.
+\verb|SCOPE_VAL| may be a number or a string from the file
+\verb|/etc/iproute2/rt_scopes|.
+If this parameter is omitted,
+\verb|ip| assumes scope \verb|global| for all gatewayed \verb|unicast|
+routes, scope \verb|link| for direct \verb|unicast| and \verb|broadcast| routes
+and scope \verb|host| for \verb|local| routes.
+
+\item \verb|protocol RTPROTO|
+
+--- the routing protocol identifier of this route.
+\verb|RTPROTO| may be a number or a string from the file
+\verb|/etc/iproute2/rt_protos|. If the routing protocol ID is
+not given, \verb|ip| assumes protocol \verb|boot| (i.e.\
+it assumes the route was added by someone who doesn't
+understand what they are doing). Several protocol values have a fixed interpretation.
+Namely:
+\begin{itemize}
+\item \verb|redirect| --- the route was installed due to an ICMP redirect.
+\item \verb|kernel| --- the route was installed by the kernel during
+autoconfiguration.
+\item \verb|boot| --- the route was installed during the bootup sequence.
+If a routing daemon starts, it will purge all of them.
+\item \verb|static| --- the route was installed by the administrator
+to override dynamic routing. Routing daemon will respect them
+and, probably, even advertise them to its peers.
+\item \verb|ra| --- the route was installed by Router Discovery protocol.
+\end{itemize}
+The rest of the values are not reserved and the administrator is free
+to assign (or not to assign) protocol tags. At least, routing
+daemons should take care of setting some unique protocol values,
+f.e.\ as they are assigned in \verb|rtnetlink.h| or in \verb|rt_protos|
+database.
+
+
+\item \verb|onlink|
+
+--- pretend that the nexthop is directly attached to this link,
+even if it does not match any interface prefix. One application of this
+option may be found in~\cite{IP-TUNNELS}.
+
+\item \verb|equalize|
+
+--- allow packet by packet randomization on multipath routes.
+Without this modifier, the route will be frozen to one selected
+nexthop, so that load splitting will only occur on per-flow base.
+\verb|equalize| only works if the kernel is patched.
+
+
+\end{itemize}
+
+
+\begin{NB}
+  Actually there are more commands: \verb|prepend| does the same
+  thing as classic \verb|route add|, i.e.\ adds a route, even if another
+  route to the same destination exists. Its opposite case is \verb|append|,
+  which adds the route to the end of the list. Avoid these
+  features.
+\end{NB}
+\begin{NB}
+  More sad news, IPv6 only understands the \verb|append| command correctly.
+  All the others are translated into \verb|append| commands. Certainly,
+  this will change in the future.
+\end{NB}
+
+\paragraph{Examples:}
+\begin{itemize}
+\item add a plain route to network 10.0.0/24 via gateway 193.233.7.65
+\begin{verbatim}
+  ip route add 10.0.0/24 via 193.233.7.65
+\end{verbatim}
+\item change it to a direct route via the \verb|dummy| device
+\begin{verbatim}
+  ip ro chg 10.0.0/24 dev dummy
+\end{verbatim}
+\item add a default multipath route splitting the load between \verb|ppp0|
+and \verb|ppp1|
+\begin{verbatim}
+  ip route add default scope global nexthop dev ppp0 \
+                                    nexthop dev ppp1
+\end{verbatim}
+Note the scope value. It is not necessary but it informs the kernel
+that this route is gatewayed rather than direct. Actually, if you
+know the addresses of remote endpoints it would be better to use the
+\verb|via| parameter.
+\item announce that the address 192.203.80.144 is not a real one, but
+should be translated to 193.233.7.83 before forwarding
+\begin{verbatim}
+  ip route add nat 192.203.80.144 via 193.233.7.83
+\end{verbatim}
+Backward translation is setup with policy rules described
+in the following section (sec.\ref{IP-RULE}, p.\pageref{IP-RULE}).
+\end{itemize}
+
+\subsection{{\tt ip route delete} --- delete a route}
+
+\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
+
+\paragraph{Arguments:} \verb|ip route del| has the same arguments as
+\verb|ip route add|, but their semantics are a bit different.
+
+Key values (\verb|to|, \verb|tos|, \verb|preference| and \verb|table|)
+select the route to delete. If optional attributes are present, \verb|ip|
+verifies that they coincide with the attributes of the route to delete.
+If no route with the given key and attributes was found, \verb|ip route del|
+fails.
+\begin{NB}
+Linux-2.0 had the option to delete a route selected only by prefix address,
+ignoring its length (i.e.\ netmask). This option no longer exists
+because it was ambiguous. However, look at {\tt ip route flush}
+(sec.\ref{IP-ROUTE-FLUSH}, p.\pageref{IP-ROUTE-FLUSH}) which
+provides similar and even richer functionality.
+\end{NB}
+
+\paragraph{Example:}
+\begin{itemize}
+\item delete the multipath route created by the command in previous subsection
+\begin{verbatim}
+  ip route del default scope global nexthop dev ppp0 \
+                                    nexthop dev ppp1
+\end{verbatim}
+\end{itemize}
+
+
+
+\subsection{{\tt ip route show} --- list routes}
+
+\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
+
+\paragraph{Description:} the command displays the contents of the routing tables
+or the route(s) selected by some criteria.
+
+
+\paragraph{Arguments:}
+\begin{itemize}
+\item \verb|to SELECTOR| (default)
+
+--- only select routes from the given range of destinations. \verb|SELECTOR|
+consists of an optional modifier (\verb|root|, \verb|match| or \verb|exact|)
+and a prefix. \verb|root PREFIX| selects routes with prefixes not shorter
+than \verb|PREFIX|. F.e.\ \verb|root 0/0| selects the entire routing table.
+\verb|match PREFIX| selects routes with prefixes not longer than
+\verb|PREFIX|. F.e.\ \verb|match 10.0/16| selects \verb|10.0/16|,
+\verb|10/8| and \verb|0/0|, but it does not select \verb|10.1/16| and
+\verb|10.0.0/24|. And \verb|exact PREFIX| (or just \verb|PREFIX|)
+selects routes with this exact prefix. If neither of these options
+are present, \verb|ip| assumes \verb|root 0/0| i.e.\ it lists the entire table.
+
+
+\item \verb|tos TOS| or \verb|dsfield TOS|
+
+ --- only select routes with the given TOS.
+
+
+\item \verb|table TABLEID|
+
+ --- show the routes from this table(s). The default setting is to show
+\verb|table| \verb|main|. \verb|TABLEID| may either be the ID of a real table
+or one of the special values:
+  \begin{itemize}
+  \item \verb|all| --- list all of the tables.
+  \item \verb|cache| --- dump the routing cache.
+  \end{itemize}
+\begin{NB}
+  IPv6 has a single table. However, splitting it into \verb|main|, \verb|local|
+  and \verb|cache| is emulated by the \verb|ip| utility.
+\end{NB}
+
+\item \verb|cloned| or \verb|cached|
+
+--- list cloned routes i.e.\ routes which were dynamically forked from
+other routes because some route attribute (f.e.\ MTU) was updated.
+Actually, it is equivalent to \verb|table cache|.
+
+\item \verb|from SELECTOR|
+
+--- the same syntax as for \verb|to|, but it binds the source address range
+rather than destinations. Note that the \verb|from| option only works with
+cloned routes.
+
+\item \verb|protocol RTPROTO|
+
+--- only list routes of this protocol.
+
+
+\item \verb|scope SCOPE_VAL|
+
+--- only list routes with this scope.
+
+\item \verb|type TYPE|
+
+--- only list routes of this type.
+
+\item \verb|dev NAME|
+
+--- only list routes going via this device.
+
+\item \verb|via PREFIX|
+
+--- only list routes going via the nexthop routers selected by \verb|PREFIX|.
+
+\item \verb|src PREFIX|
+
+--- only list routes with preferred source addresses selected
+by \verb|PREFIX|.
+
+\item \verb|realm REALMID| or \verb|realms FROMREALM/TOREALM|
+
+--- only list routes with these realms.
+
+\end{itemize}
+
+\paragraph{Examples:} Let us count routes of protocol \verb|gated/bgp|
+on a router:
+\begin{verbatim}
+kuznet@amber:~ $ ip ro ls proto gated/bgp | wc
+   1413    9891    79010
+kuznet@amber:~ $
+\end{verbatim}
+To count the size of the routing cache, we have to use the \verb|-o| option
+because cached attributes can take more than one line of output:
+\begin{verbatim}
+kuznet@amber:~ $ ip -o ro ls cloned | wc
+   159    2543    18707
+kuznet@amber:~ $
+\end{verbatim}
+
+
+\paragraph{Output format:} The output of this command consists
+of per route records separated by line feeds.
+However, some records may consist
+of more than one line: particularly, this is the case when the route
+is cloned or you requested additional statistics. If the
+\verb|-o| option was given, then line feeds separating lines inside
+records are replaced with the backslash sign.
+
+The output has the same syntax as arguments given to {\tt ip route add},
+so that it can be understood easily. F.e.\
+\begin{verbatim}
+kuznet@amber:~ $ ip ro ls 193.233.7/24
+193.233.7.0/24 dev eth0  proto gated/conn  scope link \
+    src 193.233.7.65 realms inr.ac 
+kuznet@amber:~ $
+\end{verbatim}
+
+If you list cloned entries, the output contains other attributes which
+are evaluated during route calculation and updated during route
+lifetime. An example of the output is:
+\begin{verbatim}
+kuznet@amber:~ $ ip ro ls 193.233.7.82 tab cache
+193.233.7.82 from 193.233.7.82 dev eth0  src 193.233.7.65 \
+  realms inr.ac/inr.ac 
+    cache <src-direct,redirect>  mtu 1500 rtt 300 iif eth0
+193.233.7.82 dev eth0  src 193.233.7.65 realms inr.ac 
+    cache  mtu 1500 rtt 300
+kuznet@amber:~ $
+\end{verbatim}
+\begin{NB}
+  \label{NB-strange-route}
+  The route looks a bit strange, doesn't it? Did you notice that
+  it is a path from 193.233.7.82 back to 193.233.82? Well, you will
+  see in the section on \verb|ip route get| (p.\pageref{NB-nature-of-strangeness})
+  how it appeared.
+\end{NB}
+The second line, starting with the word \verb|cache|, shows
+additional attributes which normal routes do not possess.
+Cached flags are summarized in angle brackets:
+\begin{itemize}
+\item \verb|local| --- packets are delivered locally.
+It stands for loopback unicast routes, for broadcast routes
+and for multicast routes, if this host is a member of the corresponding
+group.
+
+\item \verb|reject| --- the path is bad. Any attempt to use it results
+in an error. See attribute \verb|error| below (p.\pageref{IP-ROUTE-GET-error}).
+
+\item \verb|mc| --- the destination is multicast.
+
+\item \verb|brd| --- the destination is broadcast.
+
+\item \verb|src-direct| --- the source is on a directly connected
+interface.
+
+\item \verb|redirected| --- the route was created by an ICMP Redirect.
+
+\item \verb|redirect| --- packets going via this route will 
+trigger an ICMP redirect.
+
+\item \verb|fastroute| --- the route is eligible to be used for fastroute.
+
+\item \verb|equalize| --- make packet by packet randomization
+along this path.
+
+\item \verb|dst-nat| --- the destination address requires translation.
+
+\item \verb|src-nat| --- the source address requires translation.
+
+\item \verb|masq| --- the source address requires masquerading.
+This feature disappeared in linux-2.4.
+
+\item \verb|notify| --- ({\em not implemented}) change/deletion
+of this route will trigger RTNETLINK notification.
+\end{itemize}
+
+Then some optional attributes follow:
+\begin{itemize}
+\item \verb|error| --- on \verb|reject| routes it is error code
+returned to local senders when they try to use this route.
+These error codes are translated into ICMP error codes, sent to remote
+senders, according to the rules described above in the subsection
+devoted to route types (p.\pageref{IP-ROUTE-TYPES}).
+\label{IP-ROUTE-GET-error}
+
+\item \verb|expires| --- this entry will expire after this timeout.
+
+\item \verb|iif| --- the packets for this path are expected to arrive
+on this interface.
+\end{itemize}
+
+\paragraph{Statistics:} With the \verb|-statistics| option, more
+information about this route is shown:
+\begin{itemize}
+\item \verb|users| --- the number of users of this entry.
+\item \verb|age| --- shows when this route was last used.
+\item \verb|used| --- the number of lookups of this route since its creation.
+\end{itemize}
+
+
+\subsection{{\tt ip route flush} --- flush routing tables}
+\label{IP-ROUTE-FLUSH}
+
+\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
+
+\paragraph{Description:} this command flushes routes selected
+by some criteria.
+
+\paragraph{Arguments:} the arguments have the same syntax and semantics
+as the arguments of \verb|ip route show|, but routing tables are not
+listed but purged. The only difference is the default action: \verb|show|
+dumps all the IP main routing table but \verb|flush| prints the helper page.
+The reason for this difference does not require any explanation, does it?
+
+
+\paragraph{Statistics:} With the \verb|-statistics| option, the command
+becomes verbose. It prints out the number of deleted routes and the number
+of rounds made to flush the routing table. If the option is given
+twice, \verb|ip route flush| also dumps all the deleted routes
+in the format described in the previous subsection.
+
+\paragraph{Examples:} The first example flushes all the
+gatewayed routes from the main table (f.e.\ after a routing daemon crash).
+\begin{verbatim}
+netadm@amber:~ # ip -4 ro flush scope global type unicast
+\end{verbatim}
+This option deserves to be put into a scriptlet \verb|routef|.
+\begin{NB}
+This option was described in the \verb|route(8)| man page borrowed
+from BSD, but was never implemented in Linux.
+\end{NB}
+
+The second example flushes all IPv6 cloned routes:
+\begin{verbatim}
+netadm@amber:~ # ip -6 -s -s ro flush cache
+3ffe:2400::220:afff:fef4:c5d1 via 3ffe:2400::220:afff:fef4:c5d1 \
+  dev eth0  metric 0 
+    cache  used 2 age 12sec mtu 1500 rtt 300
+3ffe:2400::280:adff:feb7:8034 via 3ffe:2400::280:adff:feb7:8034 \
+  dev eth0  metric 0 
+    cache  used 2 age 15sec mtu 1500 rtt 300
+3ffe:2400::280:c8ff:fe59:5bcc via 3ffe:2400::280:c8ff:fe59:5bcc \
+  dev eth0  metric 0 
+    cache  users 1 used 1 age 23sec mtu 1500 rtt 300
+3ffe:2400:0:1:2a0:ccff:fe66:1878 via 3ffe:2400:0:1:2a0:ccff:fe66:1878 \
+  dev eth1  metric 0 
+    cache  used 2 age 20sec mtu 1500 rtt 300
+3ffe:2400:0:1:a00:20ff:fe71:fb30 via 3ffe:2400:0:1:a00:20ff:fe71:fb30 \
+  dev eth1  metric 0 
+    cache  used 2 age 33sec mtu 1500 rtt 300
+ff02::1 via ff02::1 dev eth1  metric 0 
+    cache  users 1 used 1 age 45sec mtu 1500 rtt 300
+
+*** Round 1, deleting 6 entries ***
+*** Flush is complete after 1 round ***
+netadm@amber:~ # ip -6 -s -s ro flush cache
+Nothing to flush.
+netadm@amber:~ #
+\end{verbatim}
+
+The third example flushes BGP routing tables after a \verb|gated|
+death.
+\begin{verbatim}
+netadm@amber:~ # ip ro ls proto gated/bgp | wc
+   1408    9856    78730
+netadm@amber:~ # ip -s ro f proto gated/bgp
+
+*** Round 1, deleting 1408 entries ***
+*** Flush is complete after 1 round ***
+netadm@amber:~ # ip ro f proto gated/bgp
+Nothing to flush.
+netadm@amber:~ # ip ro ls proto gated/bgp
+netadm@amber:~ #
+\end{verbatim}
+
+
+\subsection{{\tt ip route get} --- get a single route}
+\label{IP-ROUTE-GET}
+
+\paragraph{Abbreviations:} \verb|get|, \verb|g|.
+
+\paragraph{Description:} this command gets a single route to a destination
+and prints its contents exactly as the kernel sees it.
+
+\paragraph{Arguments:} 
+\begin{itemize}
+\item \verb|to ADDRESS| (default)
+
+--- the destination address.
+
+\item \verb|from ADDRESS|
+
+--- the source address.
+
+\item \verb|tos TOS| or \verb|dsfield TOS|
+
+--- the Type Of Service.
+
+\item \verb|iif NAME|
+
+--- the device from which this packet is expected to arrive.
+
+\item \verb|oif NAME|
+
+--- force the output device on which this packet will be routed.
+
+\item \verb|connected|
+
+--- if no source address (option \verb|from|) was given, relookup
+the route with the source set to the preferred address received from the first lookup.
+If policy routing is used, it may be a different route.
+
+\end{itemize}
+
+Note that this operation is not equivalent to \verb|ip route show|.
+\verb|show| shows existing routes. \verb|get| resolves them and
+creates new clones if necessary. Essentially, \verb|get|
+is equivalent to sending a packet along this path.
+If the \verb|iif| argument is not given, the kernel creates a route
+to output packets towards the requested destination.
+This is equivalent to pinging the destination
+with a subsequent {\tt ip route ls cache}, however, no packets are
+actually sent. With the \verb|iif| argument, the kernel pretends
+that a packet arrived from this interface and searches for
+a path to forward the packet.
+
+\paragraph{Output format:} This command outputs routes in the same
+format as \verb|ip route ls|.
+
+\paragraph{Examples:} 
+\begin{itemize}
+\item Find a route to output packets to 193.233.7.82:
+\begin{verbatim}
+kuznet@amber:~ $ ip route get 193.233.7.82
+193.233.7.82 dev eth0  src 193.233.7.65 realms inr.ac
+    cache  mtu 1500 rtt 300
+kuznet@amber:~ $
+\end{verbatim}
+
+\item Find a route to forward packets arriving on \verb|eth0|
+from 193.233.7.82 and destined for 193.233.7.82:
+\begin{verbatim}
+kuznet@amber:~ $ ip r g 193.233.7.82 from 193.233.7.82 iif eth0
+193.233.7.82 from 193.233.7.82 dev eth0  src 193.233.7.65 \
+  realms inr.ac/inr.ac 
+    cache <src-direct,redirect>  mtu 1500 rtt 300 iif eth0
+kuznet@amber:~ $
+\end{verbatim}
+\begin{NB}
+  \label{NB-nature-of-strangeness}
+  This is the command that created the funny route from 193.233.7.82
+  looped back to 193.233.7.82 (cf.\ NB on~p.\pageref{NB-strange-route}).
+  Note the \verb|redirect| flag on it.
+\end{NB}
+
+\item Find a multicast route for packets arriving on \verb|eth0|
+from host 193.233.7.82 and destined for multicast group 224.2.127.254
+(it is assumed that a multicast routing daemon is running.
+In this case, it is \verb|pimd|)
+\begin{verbatim}
+kuznet@amber:~ $ ip r g 224.2.127.254 from 193.233.7.82 iif eth0
+multicast 224.2.127.254 from 193.233.7.82 dev lo  \
+  src 193.233.7.65 realms inr.ac/cosmos 
+    cache <mc> iif eth0 Oifs: eth1 pimreg
+kuznet@amber:~ $
+\end{verbatim}
+This route differs from the ones seen before. It contains a ``normal'' part
+and a ``multicast'' part. The normal part is used to deliver (or not to
+deliver) the packet to local IP listeners. In this case the router
+is not a member
+of this group, so that route has no \verb|local| flag and only
+forwards packets. The output device for such entries is always loopback.
+The multicast part consists of an additional \verb|Oifs:| list showing
+the output interfaces.
+\end{itemize}
+
+
+It is time for a more complicated example. Let us add an invalid
+gatewayed route for a destination which is really directly connected:
+\begin{verbatim}
+netadm@alisa:~ # ip route add 193.233.7.98 via 193.233.7.254
+netadm@alisa:~ # ip route get 193.233.7.98
+193.233.7.98 via 193.233.7.254 dev eth0  src 193.233.7.90
+    cache  mtu 1500 rtt 3072
+netadm@alisa:~ #
+\end{verbatim}
+and probe it with ping:
+\begin{verbatim}
+netadm@alisa:~ # ping -n 193.233.7.98
+PING 193.233.7.98 (193.233.7.98) from 193.233.7.90 : 56 data bytes
+From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
+64 bytes from 193.233.7.98: icmp_seq=0 ttl=255 time=3.5 ms
+From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
+64 bytes from 193.233.7.98: icmp_seq=1 ttl=255 time=2.2 ms
+64 bytes from 193.233.7.98: icmp_seq=2 ttl=255 time=0.4 ms
+64 bytes from 193.233.7.98: icmp_seq=3 ttl=255 time=0.4 ms
+64 bytes from 193.233.7.98: icmp_seq=4 ttl=255 time=0.4 ms
+^C
+--- 193.233.7.98 ping statistics ---
+5 packets transmitted, 5 packets received, 0% packet loss
+round-trip min/avg/max = 0.4/1.3/3.5 ms
+netadm@alisa:~ #
+\end{verbatim}
+What happened? Router 193.233.7.254 understood that we have a much
+better path to the destination and sent us an ICMP redirect message.
+We may retry \verb|ip route get| to see what we have in the routing
+tables now:
+\begin{verbatim}
+netadm@alisa:~ # ip route get 193.233.7.98
+193.233.7.98 dev eth0  src 193.233.7.90 
+    cache <redirected>  mtu 1500 rtt 3072
+netadm@alisa:~ #
+\end{verbatim}
+
+
+
+\section{{\tt ip rule} --- routing policy database management}
+\label{IP-RULE}
+
+\paragraph{Abbreviations:} \verb|rule|, \verb|ru|.
+
+\paragraph{Object:} \verb|rule|s in the routing policy database control
+the route selection algorithm.
+
+Classic routing algorithms used in the Internet make routing decisions
+based only on the destination address of packets (and in theory,
+but not in practice, on the TOS field). The seminal review of classic
+routing algorithms and their modifications can be found in~\cite{RFC1812}.
+
+In some circumstances we want to route packets differently depending not only
+on destination addresses, but also on other packet fields: source address,
+IP protocol, transport protocol ports or even packet payload.
+This task is called ``policy routing''.
+
+\begin{NB}
+  ``policy routing'' $\neq$ ``routing policy''.
+
+\noindent      ``policy routing'' $=$ ``cunning routing''.
+
+\noindent      ``routing policy'' $=$ ``routing tactics'' or ``routing plan''.
+\end{NB}
+
+To solve this task, the conventional destination based routing table, ordered
+according to the longest match rule, is replaced with a ``routing policy
+database'' (or RPDB), which selects routes
+by executing some set of rules. The rules may have lots of keys of different
+natures and therefore they have no natural ordering, but one imposed
+by the administrator. Linux-2.2 RPDB is a linear list of rules
+ordered by numeric priority value.
+RPDB explicitly allows matching a few packet fields:
+
+\begin{itemize}
+\item packet source address.
+\item packet destination address.
+\item TOS.
+\item incoming interface (which is packet metadata, rather than a packet field).
+\end{itemize}
+
+Matching IP protocols and transport ports is also possible,
+indirectly, via \verb|ipchains|, by exploiting their ability
+to mark some classes of packets with \verb|fwmark|. Therefore,
+\verb|fwmark| is also included in the set of keys checked by rules.
+
+Each policy routing rule consists of a {\em selector\/} and an {\em action\/}
+predicate. The RPDB is scanned in the order of increasing priority. The selector
+of each rule is applied to \{source address, destination address, incoming
+interface, tos, fwmark\} and, if the selector matches the packet,
+the action is performed.  The action predicate may return with success.
+In this case, it will either give a route or failure indication
+and the RPDB lookup is terminated. Otherwise, the RPDB program
+continues on the next rule.
+
+What is the action, semantically? The natural action is to select the
+nexthop and the output device. This is what
+Cisco IOS~\cite{IOS} does. Let us call it ``match \& set''.
+The Linux-2.2 approach is more flexible. The action includes
+lookups in destination-based routing tables and selecting
+a route from these tables according to the classic longest match algorithm.
+The ``match \& set'' approach is the simplest case of the Linux one. It is realized
+when a second level routing table contains a single default route.
+Recall that Linux-2.2 supports multiple tables
+managed with the \verb|ip route| command, described in the previous section.
+
+At startup time the kernel configures the default RPDB consisting of three
+rules:
+
+\begin{enumerate}
+\item Priority: 0, Selector: match anything, Action: lookup routing
+table \verb|local| (ID 255).
+The \verb|local| table is a special routing table containing
+high priority control routes for local and broadcast addresses.
+
+Rule 0 is special. It cannot be deleted or overridden.
+
+
+\item Priority: 32766, Selector: match anything, Action: lookup routing
+table \verb|main| (ID 254).
+The \verb|main| table is the normal routing table containing all non-policy
+routes. This rule may be deleted and/or overridden with other
+ones by the administrator.
+
+\item Priority: 32767, Selector: match anything, Action: lookup routing
+table \verb|default| (ID 253).
+The \verb|default| table is empty. It is reserved for some
+post-processing if no previous default rules selected the packet.
+This rule may also be deleted.
+
+\end{enumerate}
+
+Do not confuse routing tables with rules: rules point to routing tables,
+several rules may refer to one routing table and some routing tables
+may have no rules pointing to them. If the administrator deletes all the rules
+referring to a table, the table is not used, but it still exists
+and will disappear only after all the routes contained in it are deleted.
+
+
+\paragraph{Rule attributes:} Each RPDB entry has additional
+attributes. F.e.\ each rule has a pointer to some routing
+table. NAT and masquerading rules have an attribute to select new IP
+address to translate/masquerade. Besides that, rules have some
+optional attributes, which routes have, namely \verb|realms|.
+These values do not override those contained in the routing tables. They
+are only used if the route did not select any attributes.
+
+
+\paragraph{Rule types:} The RPDB may contain rules of the following
+types:
+\begin{itemize}
+\item \verb|unicast| --- the rule prescribes to return the route found
+in the routing table referenced by the rule.
+\item \verb|blackhole| --- the rule prescribes to silently drop the packet.
+\item \verb|unreachable| --- the rule prescribes to generate a ``Network
+is unreachable'' error.
+\item \verb|prohibit| --- the rule prescribes to generate
+``Communication is administratively prohibited'' error.
+\item \verb|nat| --- the rule prescribes to translate the source address
+of the IP packet into some other value. More about NAT is
+in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
+\end{itemize}
+
+
+\paragraph{Commands:} \verb|add|, \verb|delete| and \verb|show|
+(or \verb|list|).
+
+\subsection{{\tt ip rule add} --- insert a new rule\\
+       {\tt ip rule delete} --- delete a rule}
+\label{IP-RULE-ADD}
+
+\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|,
+       \verb|d|.
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+\item \verb|type TYPE| (default)
+
+--- the type of this rule. The list of valid types was given in the previous
+subsection.
+
+\item \verb|from PREFIX|
+
+--- select the source prefix to match.
+
+\item \verb|to PREFIX|
+
+--- select the destination prefix to match.
+
+\item \verb|iif NAME|
+
+--- select the incoming device to match. If the interface is loopback,
+the rule only matches packets originating from this host. This means that you
+may create separate routing tables for forwarded and local packets and,
+hence, completely segregate them.
+
+\item \verb|tos TOS| or \verb|dsfield TOS|
+
+--- select the TOS value to match.
+
+\item \verb|fwmark MARK|
+
+--- select the \verb|fwmark| value to match.
+
+\item \verb|priority PREFERENCE|
+
+--- the priority of this rule. Each rule should have an explicitly
+set {\em unique\/} priority value.
+\begin{NB}
+  Really, for historical reasons \verb|ip rule add| does not require a
+  priority value and allows them to be non-unique.
+  If the user does not supplied a priority, it is selected by the kernel.
+  If the user creates a rule with a priority value that
+  already exists, the kernel does not reject the request. It adds
+  the new rule before all old rules of the same priority.
+
+  It is mistake in design, no more. And it will be fixed one day,
+  so do not rely on this feature. Use explicit priorities.
+\end{NB}
+
+
+\item \verb|table TABLEID|
+
+--- the routing table identifier to lookup if the rule selector matches.
+
+\item \verb|realms FROM/TO|
+
+--- Realms to select if the rule matched and the routing table lookup
+succeeded. Realm \verb|TO| is only used if the route did not select
+any realm.
+
+\item \verb|nat ADDRESS|
+
+--- The base of the IP address block to translate (for source addresses).
+The \verb|ADDRESS| may be either the start of the block of NAT addresses
+(selected by NAT routes) or in linux-2.2 a local host address (or even zero).
+In the last case the router does not translate the packets,
+but masquerades them to this address; this feature disappered in 2.4.
+More about NAT is in Appendix~\ref{ROUTE-NAT},
+p.\pageref{ROUTE-NAT}.
+
+\end{itemize}
+
+\paragraph{Warning:} Changes to the RPDB made with these commands
+do not become active immediately. It is assumed that after
+a script finishes a batch of updates, it flushes the routing cache
+with \verb|ip route flush cache|.
+
+\paragraph{Examples:}
+\begin{itemize}
+\item Route packets with source addresses from 192.203.80/24
+according to routing table \verb|inr.ruhep|:
+\begin{verbatim}
+ip ru add from 192.203.80.0/24 table inr.ruhep prio 220
+\end{verbatim}
+
+\item Translate packet source address 193.233.7.83 into 192.203.80.144
+and route it according to table \#1 (actually, it is \verb|inr.ruhep|):
+\begin{verbatim}
+ip ru add from 193.233.7.83 nat 192.203.80.144 table 1 prio 320
+\end{verbatim}
+
+\item Delete the unused default rule:
+\begin{verbatim}
+ip ru del prio 32767
+\end{verbatim}
+
+\end{itemize}
+
+
+
+\subsection{{\tt ip rule show} --- list rules}
+\label{IP-RULE-SHOW}
+
+\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
+
+
+\paragraph{Arguments:} Good news, this is one command that has no arguments.
+
+\paragraph{Output format:}
+
+\begin{verbatim}
+kuznet@amber:~ $ ip ru ls
+0:     from all lookup local 
+200:   from 192.203.80.0/24 to 193.233.7.0/24 lookup main
+210:   from 192.203.80.0/24 to 192.203.80.0/24 lookup main
+220:   from 192.203.80.0/24 lookup inr.ruhep realms inr.ruhep/radio-msu
+300:   from 193.233.7.83 to 193.233.7.0/24 lookup main
+310:   from 193.233.7.83 to 192.203.80.0/24 lookup main
+320:   from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
+32766: from all lookup main 
+kuznet@amber:~ $
+\end{verbatim}
+
+In the first column is the rule priority value followed
+by a colon. Then the selectors follow. Each key is prefixed
+with the same keyword that was used to create the rule.
+
+The keyword \verb|lookup| is followed by a routing table identifier,
+as it is recorded in the file \verb|/etc/iproute2/rt_tables|.
+
+If the rule does NAT (f.e.\ rule \#320), it is shown by the keyword
+\verb|map-to| followed by the start of the block of addresses to map.
+
+The sense of this example is pretty simple. The prefixes
+192.203.80.0/24 and 193.233.7.0/24 form the internal network, but
+they are routed differently when the packets leave it.
+Besides that, the host 193.233.7.83 is translated into
+another prefix to look like 192.203.80.144 when talking
+to the outer world.
+
+
+
+\section{{\tt ip maddress} --- multicast addresses management}
+\label{IP-MADDR}
+
+\paragraph{Object:} \verb|maddress| objects are multicast addresses.
+
+\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|show| (or \verb|list|).
+
+\subsection{{\tt ip maddress show} --- list multicast addresses}
+
+\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+
+\item \verb|dev NAME| (default)
+
+--- the device name.
+
+\end{itemize}
+
+\paragraph{Output format:}
+
+\begin{verbatim}
+kuznet@alisa:~ $ ip maddr ls dummy
+2:  dummy
+    link  33:33:00:00:00:01
+    link  01:00:5e:00:00:01
+    inet  224.0.0.1 users 2
+    inet6 ff02::1
+kuznet@alisa:~ $ 
+\end{verbatim}
+
+The first line of the output shows the interface index and its name.
+Then the multicast address list follows. Each line starts with the
+protocol identifier. The word \verb|link| denotes a link layer
+multicast addresses.
+
+If a multicast address has more than one user, the number
+of users is shown after the \verb|users| keyword.
+
+One additional feature not present in the example above
+is the \verb|static| flag, which indicates that the address was joined
+with \verb|ip maddr add|. See the following subsection.
+
+
+
+\subsection{{\tt ip maddress add} --- add a multicast address\\
+           {\tt ip maddress delete} --- delete a multicast address}
+
+\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|, \verb|d|.
+
+\paragraph{Description:} these commands attach/detach
+a static link layer multicast address to listen on the interface.
+Note that it is impossible to join protocol multicast groups
+statically. This command only manages link layer addresses.
+
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+\item \verb|address LLADDRESS| (default)
+
+--- the link layer multicast address.
+
+\item \verb|dev NAME|
+
+--- the device to join/leave this multicast address.
+
+\end{itemize}
+
+
+\paragraph{Example:} Let us continue with the example from the previous subsection.
+
+\begin{verbatim}
+netadm@alisa:~ # ip maddr add 33:33:00:00:00:01 dev dummy
+netadm@alisa:~ # ip -0 maddr ls dummy
+2:  dummy
+    link  33:33:00:00:00:01 users 2 static
+    link  01:00:5e:00:00:01
+netadm@alisa:~ # ip maddr del 33:33:00:00:00:01 dev dummy
+\end{verbatim}
+
+\begin{NB}
+ Neither \verb|ip| nor the kernel check for multicast address validity.
+ Particularly, this means that you can try to load a unicast address
+ instead of a multicast address. Most drivers will ignore such addresses,
+ but several (f.e.\ Tulip) will intern it to their on-board filter.
+ The effects may be strange. Namely, the addresses become additional
+ local link addresses and, if you loaded the address of another host
+ to the router, wait for duplicated packets on the wire.
+ It is not a bug, but rather a hole in the API and intra-kernel interfaces.
+ This feature is really more useful for traffic monitoring, but using it
+ with Linux-2.2 you {\em have to\/} be sure that the host is not
+ a router and, especially, that it is not a transparent proxy or masquerading
+ agent.
+\end{NB}
+
+
+
+\section{{\tt ip mroute} --- multicast routing cache management}
+\label{IP-MROUTE}
+
+\paragraph{Abbreviations:} \verb|mroute|, \verb|mr|.
+
+\paragraph{Object:} \verb|mroute| objects are multicast routing cache
+entries created by a user level mrouting daemon
+(f.e.\ \verb|pimd| or \verb|mrouted|).
+
+Due to the limitations of the current interface to the multicast routing
+engine, it is impossible to change \verb|mroute| objects administratively,
+so we may only display them. This limitation will be removed
+in the future.
+
+\paragraph{Commands:} \verb|show| (or \verb|list|).
+
+
+\subsection{{\tt ip mroute show} --- list mroute cache entries}
+
+\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+\item \verb|to PREFIX| (default)
+
+--- the prefix selecting the destination multicast addresses to list.
+
+
+\item \verb|iif NAME|
+
+--- the interface on which multicast packets are received.
+
+
+\item \verb|from PREFIX|
+
+--- the prefix selecting the IP source addresses of the multicast route.
+
+
+\end{itemize}
+
+\paragraph{Output format:}
+
+\begin{verbatim}
+kuznet@amber:~ $ ip mroute ls
+(193.232.127.6, 224.0.1.39)      Iif: unresolved 
+(193.232.244.34, 224.0.1.40)     Iif: unresolved 
+(193.233.7.65, 224.66.66.66)     Iif: eth0       Oifs: pimreg 
+kuznet@amber:~ $ 
+\end{verbatim}
+
+Each line shows one (S,G) entry in the multicast routing cache,
+where S is the source address and G is the multicast group. \verb|Iif| is
+the interface on which multicast packets are expected to arrive.
+If the word \verb|unresolved| is there instead of the interface name,
+it means that the routing daemon still hasn't resolved this entry.
+The keyword \verb|oifs| is followed by a list of output interfaces, separated
+by spaces. If a multicast routing entry is created with non-trivial
+TTL scope, administrative distances are appended to the device names
+in the \verb|oifs| list.
+
+\paragraph{Statistics:} The \verb|-statistics| option also prints the
+number of packets and bytes forwarded along this route and
+the number of packets that arrived on the wrong interface, if this number is not zero.
+
+\begin{verbatim}
+kuznet@amber:~ $ ip -s mr ls 224.66/16
+(193.233.7.65, 224.66.66.66)     Iif: eth0       Oifs: pimreg 
+  9383 packets, 300256 bytes
+kuznet@amber:~ $
+\end{verbatim}
+
+
+\section{{\tt ip tunnel} --- tunnel configuration}
+\label{IP-TUNNEL}
+
+\paragraph{Abbreviations:} \verb|tunnel|, \verb|tunl|.
+
+\paragraph{Object:} \verb|tunnel| objects are tunnels, encapsulating
+packets in IPv4 packets and then sending them over the IP infrastructure.
+
+\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|change|, \verb|show|
+(or \verb|list|).
+
+\paragraph{See also:} A more informal discussion of tunneling
+over IP and the \verb|ip tunnel| command can be found in~\cite{IP-TUNNELS}.
+
+\subsection{{\tt ip tunnel add} --- add a new tunnel\\
+       {\tt ip tunnel change} --- change an existing tunnel\\
+       {\tt ip tunnel delete} --- destroy a tunnel}
+
+\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
+\verb|delete|, \verb|del|, \verb|d|.
+
+
+\paragraph{Arguments:}
+
+\begin{itemize}
+
+\item \verb|name NAME| (default)
+
+--- select the tunnel device name.
+
+\item \verb|mode MODE|
+
+--- set the tunnel mode. Three modes are currently available:
+       \verb|ipip|, \verb|sit| and \verb|gre|.
+
+\item \verb|remote ADDRESS|
+
+--- set the remote endpoint of the tunnel.
+
+\item \verb|local ADDRESS|
+
+--- set the fixed local address for tunneled packets.
+It must be an address on another interface of this host.
+
+\item \verb|ttl N|
+
+--- set a fixed TTL \verb|N| on tunneled packets.
+       \verb|N| is a number in the range 1--255. 0 is a special value
+       meaning that packets inherit the TTL value. 
+               The default value is: \verb|inherit|.
+
+\item \verb|tos T| or \verb|dsfield T|
+
+--- set a fixed TOS \verb|T| on tunneled packets.
+               The default value is: \verb|inherit|.
+
+
+
+\item \verb|dev NAME| 
+
+--- bind the tunnel to the device \verb|NAME| so that
+       tunneled packets will only be routed via this device and will
+       not be able to escape to another device when the route to endpoint changes.
+
+\item \verb|nopmtudisc|
+
+--- disable Path MTU Discovery on this tunnel.
+       It is enabled by default. Note that a fixed ttl is incompatible
+       with this option: tunnelling with a fixed ttl always makes pmtu discovery.
+
+\item \verb|key K|, \verb|ikey K|, \verb|okey K|
+
+--- (only GRE tunnels) use keyed GRE with key \verb|K|. \verb|K| is
+       either a number or an IP address-like dotted quad.
+   The \verb|key| parameter sets the key to use in both directions.
+   The \verb|ikey| and \verb|okey| parameters set different keys for input and output.
+   
+
+\item \verb|csum|, \verb|icsum|, \verb|ocsum|
+
+--- (only GRE tunnels) generate/require checksums for tunneled packets.
+   The \verb|ocsum| flag calculates checksums for outgoing packets.
+   The \verb|icsum| flag requires that all input packets have the correct
+   checksum. The \verb|csum| flag is equivalent to the combination
+  ``\verb|icsum| \verb|ocsum|''.
+
+\item \verb|seq|, \verb|iseq|, \verb|oseq|
+
+--- (only GRE tunnels) serialize packets.
+   The \verb|oseq| flag enables sequencing of outgoing packets.
+   The \verb|iseq| flag requires that all input packets are serialized.
+   The \verb|seq| flag is equivalent to the combination ``\verb|iseq| \verb|oseq|''.
+
+\begin{NB}
+ I think this option does not
+       work. At least, I did not test it, did not debug it and
+       do not even understand how it is supposed to work or for what
+       purpose Cisco planned to use it. Do not use it.
+\end{NB}
+
+
+\end{itemize}
+
+\paragraph{Example:} Create a pointopoint IPv6 tunnel with maximal TTL of 32.
+\begin{verbatim}
+netadm@amber:~ # ip tunl add Cisco mode sit remote 192.31.7.104 \
+    local 192.203.80.142 ttl 32 
+\end{verbatim}
+
+\subsection{{\tt ip tunnel show} --- list tunnels}
+
+\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
+
+
+\paragraph{Arguments:} None.
+
+\paragraph{Output format:}
+\begin{verbatim}
+kuznet@amber:~ $ ip tunl ls Cisco
+Cisco: ipv6/ip  remote 192.31.7.104  local 192.203.80.142  ttl 32 
+kuznet@amber:~ $ 
+\end{verbatim}
+The line starts with the tunnel device name followed by a colon.
+Then the tunnel mode follows. The parameters of the tunnel are listed
+with the same keywords that were used when creating the tunnel.
+
+\paragraph{Statistics:}
+
+\begin{verbatim}
+kuznet@amber:~ $ ip -s tunl ls Cisco
+Cisco: ipv6/ip  remote 192.31.7.104  local 192.203.80.142  ttl 32 
+RX: Packets    Bytes        Errors CsumErrs OutOfSeq Mcasts
+    12566      1707516      0      0        0        0       
+TX: Packets    Bytes        Errors DeadLoop NoRoute  NoBufs
+    13445      1879677      0      0        0        0     
+kuznet@amber:~ $ 
+\end{verbatim}
+Essentially, these numbers are the same as the numbers
+printed with {\tt ip -s link show}
+(sec.\ref{IP-LINK-SHOW}, p.\pageref{IP-LINK-SHOW}) but the tags are different
+to reflect that they are tunnel specific.
+\begin{itemize}
+\item \verb|CsumErrs| --- the total number of packets dropped
+because of checksum failures for a GRE tunnel with checksumming enabled.
+\item \verb|OutOfSeq| --- the total number of packets dropped
+because they arrived out of sequence for a GRE tunnel with
+serialization enabled.
+\item \verb|Mcasts| --- the total number of multicast packets
+received on a broadcast GRE tunnel.
+\item \verb|DeadLoop| --- the total number of packets which were not
+transmitted because the tunnel is looped back to itself.
+\item \verb|NoRoute| --- the total number of packets which were not
+transmitted because there is no IP route to the remote endpoint.
+\item \verb|NoBufs| --- the total number of packets which were not
+transmitted because the kernel failed to allocate a buffer.
+\end{itemize}
+
+
+\section{{\tt ip monitor} and {\tt rtmon} --- state monitoring}
+\label{IP-MONITOR}
+
+The \verb|ip| utility can monitor the state of devices, addresses
+and routes continuously. This option has a slightly different format.
+Namely,
+the \verb|monitor| command is the first in the command line and then
+the object list follows:
+\begin{verbatim}
+  ip monitor [ file FILE ] [ all | OBJECT-LIST ]
+\end{verbatim}
+\verb|OBJECT-LIST| is the list of object types that we want to monitor.
+It may contain \verb|link|, \verb|address| and \verb|route|.
+If no \verb|file| argument is given, \verb|ip| opens RTNETLINK,
+listens on it and dumps state changes in the format described
+in previous sections.
+
+If a file name is given, it does not listen on RTNETLINK,
+but opens the file containing RTNETLINK messages saved in binary format
+and dumps them. Such a history file can be generated with the
+\verb|rtmon| utility. This utility has a command line syntax similar to
+\verb|ip monitor|.
+Ideally, \verb|rtmon| should be started before
+the first network configuration command is issued. F.e.\ if
+you insert:
+\begin{verbatim}
+  rtmon file /var/log/rtmon.log
+\end{verbatim}
+in a startup script, you will be able to view the full history
+later.
+
+Certainly, it is possible to start \verb|rtmon| at any time.
+It prepends the history with the state snapshot dumped at the moment
+of starting.
+
+
+\section{Route realms and policy propagation, {\tt rtacct}}
+\label{RT-REALMS}
+
+On routers using OSPF ASE or, especially, the BGP protocol, routing
+tables may be huge. If we want to classify or to account for the packets
+per route, we will have to keep lots of information. Even worse, if we
+want to distinguish the packets not only by their destination, but
+also by their source, the task gets quadratic complexity and its solution
+is physically impossible.
+
+One approach to propagating the policy from routing protocols
+to the forwarding engine has been proposed in~\cite{IOS-BGP-PP}.
+Essentially, Cisco Policy Propagation via BGP is based on the fact
+that dedicated routers all have the RIB (Routing Information Base)
+close to the forwarding engine, so policy routing rules can
+check all the route attributes, including ASPATH information
+and community strings.
+
+The Linux architecture, splitting the RIB (maintained by a user level
+daemon) and the kernel based FIB (Forwarding Information Base),
+does not allow such a simple approach.
+
+It is to our fortune because there is another solution
+which allows even more flexible policy and richer semantics.
+
+Namely, routes can be clustered together in user space, based on their
+attributes.  F.e.\ a BGP router knows route ASPATH, its community;
+an OSPF router knows the route tag or its area. The administrator, when adding
+routes manually, also knows their nature. Providing that the number of such
+aggregates (we call them {\em realms\/}) is low, the task of full
+classification both by source and destination becomes quite manageable.
+
+So each route may be assigned to a realm. It is assumed that
+this identification is made by a routing daemon, but static routes
+can also be handled manually with \verb|ip route| (see sec.\ref{IP-ROUTE},
+p.\pageref{IP-ROUTE}).
+\begin{NB}
+  There is a patch to \verb|gated|, allowing classification of routes
+  to realms with all the set of policy rules implemented in \verb|gated|:
+  by prefix, by ASPATH, by origin, by tag etc.
+\end{NB}
+
+To facilitate the construction (f.e.\ in case the routing
+daemon is not aware of realms), missing realms may be completed
+with routing policy rules, see sec.~\ref{IP-RULE}, p.\pageref{IP-RULE}.
+
+For each packet the kernel calculates a tuple of realms: source realm
+and destination realm, using the following algorithm:
+
+\begin{enumerate}
+\item If the route has a realm, the destination realm of the packet is set to it.
+\item If the rule has a source realm, the source realm of the packet is set to it.
+If the destination realm was not inherited from the route and the rule has a destination realm,
+it is also set.
+\item If at least one of the realms is still unknown, the kernel finds
+the reversed route to the source of the packet.
+\item If the source realm is still unknown, get it from the reversed route.
+\item If one of the realms is still unknown, swap the realms of reversed
+routes and apply step 2 again.
+\end{enumerate}
+
+After this procedure is completed we know what realm the packet
+arrived from and the realm where it is going to propagate to.
+If some of the realms are unknown, they are initialized to zero
+(or realm \verb|unknown|).
+
+The main application of realms is the TC \verb|route| classifier~\cite{TC-CREF},
+where they are used to help assign packets to traffic classes,
+to account, police and schedule them according to this
+classification.
+
+A much simpler but still very useful application is incoming packet
+accounting by realms. The kernel gathers a packet statistics summary
+which can be viewed with the \verb|rtacct| utility.
+\begin{verbatim}
+kuznet@amber:~ $ rtacct russia
+Realm      BytesTo    PktsTo     BytesFrom  PktsFrom   
+russia     20576778   169176     47080168   153805     
+kuznet@amber:~ $
+\end{verbatim}
+This shows that this router received 153805 packets from
+the realm \verb|russia| and forwarded 169176 packets to \verb|russia|.
+The realm \verb|russia| consists of routes with ASPATHs not leaving
+Russia.
+
+Note that locally originating packets are not accounted here,
+\verb|rtacct| shows incoming packets only. Using the \verb|route|
+classifier (see~\cite{TC-CREF}) you can get even more detailed
+accounting information about outgoing packets, optionally
+summarizing traffic not only by source or destination, but
+by any pair of source and destination realms.
+
+
+\begin{thebibliography}{99}
+\addcontentsline{toc}{section}{References}
+\bibitem{RFC-NDISC} T.~Narten, E.~Nordmark, W.~Simpson.
+``Neighbor Discovery for IP Version 6 (IPv6)'', RFC-2461.
+
+\bibitem{RFC-ADDRCONF} S.~Thomson, T.~Narten.
+``IPv6 Stateless Address Autoconfiguration'', RFC-2462.
+
+\bibitem{RFC1812} F.~Baker.
+``Requirements for IP Version 4 Routers'', RFC-1812.
+
+\bibitem{RFC1122} R.~T.~Braden.
+``Requirements for Internet hosts --- communication layers'', RFC-1122.
+
+\bibitem{IOS} ``Cisco IOS Release 12.0 Network Protocols
+Command Reference, Part 1'' and
+``Cisco IOS Release 12.0 Quality of Service Solutions
+Configuration Guide: Configuring Policy-Based Routing'',\\
+http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
+
+\bibitem{IP-TUNNELS} A.~N.~Kuznetsov.
+``Tunnels over IP in Linux-2.2'', \\
+In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
+
+\bibitem{TC-CREF} A.~N.~Kuznetsov. ``TC Command Reference'',\\
+In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
+
+\bibitem{IOS-BGP-PP} ``Cisco IOS Release 12.0 Quality of Service Solutions
+Configuration Guide: Configuring QoS Policy Propagation via
+Border Gateway Protocol'',\\
+http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
+
+\bibitem{RFC-DHCP} R.~Droms.
+``Dynamic Host Configuration Protocol.'', RFC-2131
+
+\end{thebibliography}
+
+
+
+
+\appendix
+\addcontentsline{toc}{section}{Appendix}
+
+\section{Source address selection}
+\label{ADDR-SEL}
+
+When a host creates an IP packet, it must select some source
+address. Correct source address selection is a critical procedure,
+because it gives the receiver the information needed to deliver a
+reply. If the source is selected incorrectly, in the best case,
+the backward path may appear different to the forward one which
+is harmful for performance. In the worst case, when the addresses
+are administratively scoped, the reply may be lost entirely.
+
+Linux-2.2 selects source addresses using the following algorithm:
+
+\begin{itemize}
+\item
+The application may select a source address explicitly with \verb|bind(2)|
+syscall or supplying it to \verb|sendmsg(2)| via the ancillary data object
+\verb|IP_PKTINFO|. In this case the kernel only checks the validity
+of the address and never tries to ``improve'' an incorrect user choice,
+generating an error instead.
+\begin{NB}
+ Never say ``Never''. The sysctl option \verb|ip_dynaddr| breaks
+ this axiom. It has been made deliberately with the purpose
+ of automatically reselecting the address on hosts with dynamic dial-out interfaces.
+ However, this hack {\em must not\/} be used on multihomed hosts
+ and especially on routers: it would break them.
+\end{NB}
+
+
+\item Otherwise, IP routing tables can contain an explicit source
+address hint for this destination. The hint is set with the \verb|src| parameter
+to the \verb|ip route| command, sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}.
+
+
+\item Otherwise, the kernel searches through the list of addresses
+attached to the interface through which the packets will be routed.
+The search strategies are different for IP and IPv6. Namely:
+
+\begin{itemize}
+\item IPv6 searches for the first valid, not deprecated address
+with the same scope as the destination.
+
+\item IP searches for the first valid address with a scope wider
+than the scope of the destination but it prefers addresses
+which fall to the same subnet as the nexthop of the route
+to the destination. Unlike IPv6, the scopes of IPv4 destinations
+are not encoded in their addresses but are supplied
+in routing tables instead (the \verb|scope| parameter to the \verb|ip route| command,
+sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}).
+
+\end{itemize}
+
+
+\item Otherwise, if the scope of the destination is \verb|link| or \verb|host|,
+the algorithm fails and returns a zero source address.
+
+\item Otherwise, all interfaces are scanned to search for an address
+with an appropriate scope. The loopback device \verb|lo| is always the first
+in the search list, so that if an address with global scope (not 127.0.0.1!)
+is configured on loopback, it is always preferred.
+
+\end{itemize}
+
+
+\section{Proxy ARP/NDISC}
+\label{PROXY-NEIGH}
+
+Routers may answer ARP/NDISC solicitations on behalf of other hosts.
+In Linux-2.2 proxy ARP on an interface may be enabled
+by setting the kernel \verb|sysctl| variable 
+\verb|/proc/sys/net/ipv4/conf/<dev>/proxy_arp| to 1. After this, the router
+starts to answer ARP requests on the interface \verb|<dev>|, provided
+the route to the requested destination does {\em not\/} go back via the same
+device.
+
+The variable \verb|/proc/sys/net/ipv4/conf/all/proxy_arp| enables proxy
+ARP on all the IP devices.
+
+However, this approach fails in the case of IPv6 because the router
+must join the solicited node multicast address to listen for the corresponding
+NDISC queries. It means that proxy NDISC is possible only on a per destination
+basis.
+
+Logically, proxy ARP/NDISC is not a kernel task. It can easily be implemented
+in user space. However, similar functionality was present in BSD kernels
+and in Linux-2.0, so we have to preserve it at least to the extent that
+is standardized in BSD.
+\begin{NB}
+  Linux-2.0 ARP had a feature called {\em subnet\/} proxy ARP.
+  It is replaced with the sysctl flag in Linux-2.2.
+\end{NB}
+
+
+The \verb|ip| utility provides a way to manage proxy ARP/NDISC
+with the \verb|ip neigh| command, namely:
+\begin{verbatim}
+  ip neigh add proxy ADDRESS [ dev NAME ]
+\end{verbatim}
+adds a new proxy ARP/NDISC record and
+\begin{verbatim}
+  ip neigh del proxy ADDRESS [ dev NAME ]
+\end{verbatim}
+deletes it.
+
+If the name of the device is not given, the router will answer solicitations
+for address \verb|ADDRESS| on all devices, otherwise it will only serve
+the device \verb|NAME|. Even if the proxy entry is created with
+\verb|ip neigh|, the router {\em will not\/} answer a query if the route
+to the destination goes back via the interface from which the solicitation
+was received.
+
+It is important to emphasize that proxy entries have {\em no\/}
+parameters other than these (IP/IPv6 address and optional device).
+Particularly, the entry does not store any link layer address.
+It always advertises the station address of the interface
+on which it sends advertisements (i.e. it's own station address).
+
+\section{Route NAT status}
+\label{ROUTE-NAT}
+
+NAT (or ``Network Address Translation'') remaps some parts
+of the IP address space into other ones. Linux-2.2 route NAT is supposed
+to be used to facilitate policy routing by rewriting addresses
+to other routing domains or to help while renumbering sites
+to another prefix.
+
+\paragraph{What it is not:}
+It is necessary to emphasize that {\em it is not supposed\/}
+to be used to compress address space or to split load.
+This is not missing functionality but a design principle.
+Route NAT is {\em stateless\/}. It does not hold any state
+about translated sessions. This means that it handles any number
+of sessions flawlessly. But it also means that it is {\em static\/}.
+It cannot detect the moment when the last TCP client stops
+using an address. For the same reason, it will not help to split
+load between several servers.
+\begin{NB}
+It is a pretty commonly held belief that it is useful to split load between
+several servers with NAT. This is a mistake. All you get from this
+is the requirement that the router keep the state of all the TCP connections
+going via it. Well, if the router is so powerful, run apache on it. 8)
+\end{NB}
+
+The second feature: it does not touch packet payload,
+does not try to ``improve'' broken protocols by looking
+through its data and mangling it. It mangles IP addresses,
+only IP addresses and nothing but IP addresses.
+This also, is not missing any functionality.
+
+To resume: if you need to compress address space or keep
+active FTP clients happy, your choice is not route NAT but masquerading,
+port forwarding, NAPT etc. 
+\begin{NB}
+By the way, you may also want to look at
+http://www.suse.com/\~mha/HyperNews/get/linux-ip-nat.html
+\end{NB}
+
+
+\paragraph{How it works.}
+Some part of the address space is reserved for dummy addresses
+which will look for all the world like some host addresses
+inside your network. No other hosts may use these addresses,
+however other routers may also be configured to translate them.
+\begin{NB}
+A great advantage of route NAT is that it may be used not
+only in stub networks but in environments with arbitrarily complicated
+structure. It does not firewall, it {\em forwards.}
+\end{NB}
+These addresses are selected by the \verb|ip route| command
+(sec.\ref{IP-ROUTE-ADD}, p.\pageref{IP-ROUTE-ADD}). F.e.\
+\begin{verbatim}
+  ip route add nat 192.203.80.144 via 193.233.7.83
+\end{verbatim}
+states that the single address 192.203.80.144 is a dummy NAT address.
+For all the world it looks like a host address inside our network.
+For neighbouring hosts and routers it looks like the local address
+of the translating router. The router answers ARP for it, advertises
+this address as routed via it, {\em et al\/}. When the router
+receives a packet destined for 192.203.80.144, it replaces 
+this address with 193.233.7.83 which is the address of some real
+host and forwards the packet. If you need to remap
+blocks of addresses, you may use a command like:
+\begin{verbatim}
+  ip route add nat 192.203.80.192/26 via 193.233.7.64
+\end{verbatim}
+This command will map a block of 63 addresses 192.203.80.192-255 to
+193.233.7.64-127.
+
+When an internal host (193.233.7.83 in the example above)
+sends something to the outer world and these packets are forwarded
+by our router, it should translate the source address 193.233.7.83
+into 192.203.80.144. This task is solved by setting a special
+policy rule (sec.\ref{IP-RULE-ADD}, p.\pageref{IP-RULE-ADD}):
+\begin{verbatim}
+  ip rule add prio 320 from 193.233.7.83 nat 192.203.80.144
+\end{verbatim}
+This rule says that the source address 193.233.7.83
+should be translated into 192.203.80.144 before forwarding.
+It is important that the address after the \verb|nat| keyword
+is some NAT address, declared by {\tt ip route add nat}.
+If it is just a random address the router will not map to it.
+\begin{NB}
+The exception is when the address is a local address of this
+router (or 0.0.0.0) and masquerading is configured in the linux-2.2
+kernel. In this case the router will masquerade the packets as this address.
+If 0.0.0.0 is selected, the result is equivalent to one
+obtained with firewalling rules. Otherwise, you have the way
+to order Linux to masquerade to this fixed address.
+NAT mechanism used in linux-2.4 is more flexible than
+masquerading, so that this feature has lost meaning and disabled.
+\end{NB}
+
+If the network has non-trivial internal structure, it is
+useful and even necessary to add rules disabling translation
+when a packet does not leave this network. Let us return to the
+example from sec.\ref{IP-RULE-SHOW} (p.\pageref{IP-RULE-SHOW}).
+\begin{verbatim}
+300:   from 193.233.7.83 to 193.233.7.0/24 lookup main
+310:   from 193.233.7.83 to 192.203.80.0/24 lookup main
+320:   from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
+\end{verbatim}
+This block of rules causes normal forwarding when
+packets from 193.233.7.83 do not leave networks 193.233.7/24
+and 192.203.80/24. Also, if the \verb|inr.ruhep| table does not
+contain a route to the destination (which means that the routing
+domain owning addresses from 192.203.80/24 is dead), no translation
+will occur. Otherwise, the packets are translated.
+
+\paragraph{How to only translate selected ports:}
+If you only want to translate selected ports (f.e.\ http)
+and leave the rest intact, you may use \verb|ipchains|
+to \verb|fwmark| a class of packets.
+Suppose you did and all the packets from 193.233.7.83
+destined for port 80 are marked with marker 0x1234 in input fwchain.
+In this case you may replace rule \#320 with:
+\begin{verbatim}
+320:   from 193.233.7.83 fwmark 1234 lookup main map-to 192.203.80.144
+\end{verbatim}
+and translation will only be enabled for outgoing http requests.
+
+\section{Example: minimal host setup}
+\label{EXAMPLE-SETUP}
+
+The following script gives an example of a fault safe
+setup of IP (and IPv6, if it is compiled into the kernel)
+in the common case of a node attached to a single broadcast
+network. A more advanced script, which may be used both on multihomed
+hosts and on routers, is described in the following
+section.
+
+The utilities used in the script may be found in the
+directory ftp://ftp.inr.ac.ru/ip-routing/:
+\begin{enumerate}
+\item \verb|ip| --- package \verb|iproute2|.
+\item \verb|arping| --- package \verb|iputils|.
+\item \verb|rdisc| --- package \verb|iputils|.
+\end{enumerate}
+\begin{NB}
+It also refers to a DHCP client, \verb|dhcpcd|. I should refrain from
+recommending a good DHCP client to use. All that I can
+say is that ISC \verb|dhcp-2.0b1pl6| patched with the patch that
+can be found in the \verb|dhcp.bootp.rarp| subdirectory of
+the same ftp site {\em does\/} work,
+at least on Ethernet and Token Ring.
+\end{NB}
+
+\begin{verbatim}
+#! /bin/bash
+\end{verbatim}
+\begin{flushleft}
+\# {\bf Usage: \verb|ifone ADDRESS[/PREFIX-LENGTH] [DEVICE]|}\\
+\# {\bf Parameters:}\\
+\# \$1 --- Static IP address, optionally followed by prefix length.\\
+\# \$2 --- Device name. If it is missing, \verb|eth0| is asssumed.\\
+\# F.e. \verb|ifone 193.233.7.90|
+\end{flushleft}
+\begin{verbatim}
+dev=$2
+: ${dev:=eth0}
+ipaddr=
+\end{verbatim}
+\# Parse IP address, splitting prefix length.
+\begin{verbatim}
+if [ "$1" != "" ]; then
+  ipaddr=${1%/*}
+  if [ "$1" != "$ipaddr" ]; then
+    pfxlen=${1#*/}
+  fi
+  : ${pfxlen:=24}
+fi
+pfx="${ipaddr}/${pfxlen}"
+\end{verbatim}
+
+\begin{flushleft}
+\# {\bf Step 0} --- enable loopback.\\
+\#\\
+\# This step is necessary on any networked box before attempt\\
+\# to configure any other device.\\
+\end{flushleft}
+\begin{verbatim}
+ip link set up dev lo
+ip addr add 127.0.0.1/8 dev lo brd + scope host
+\end{verbatim}
+\begin{flushleft}
+\# IPv6 autoconfigure themself on loopback.\\
+\#\\
+\# If user gave loopback as device, we add the address as alias and exit.
+\end{flushleft}
+\begin{verbatim}
+if [ "$dev" = "lo" ]; then
+  if [ "$ipaddr" != "" -a  "$ipaddr" != "127.0.0.1" ]; then
+    ip address add $ipaddr dev $dev
+    exit $?
+  fi
+  exit 0
+fi
+\end{verbatim}
+
+\noindent\# {\bf Step 1} --- enable device \verb|$dev|
+
+\begin{verbatim}
+if ! ip link set up dev $dev ; then
+  echo "Cannot enable interface $dev. Aborting." 1>&2
+  exit 1
+fi
+\end{verbatim}
+\begin{flushleft}
+\# The interface is \verb|UP|. IPv6 started stateless autoconfiguration itself,\\
+\# and its configuration finishes here. However,\\
+\# IP still needs some static preconfigured address.
+\end{flushleft}
+\begin{verbatim}
+if [ "$ipaddr" = "" ]; then
+  echo "No address for $dev is configured, trying DHCP..." 1>&2
+  dhcpcd
+  exit $?
+fi
+\end{verbatim}
+
+\begin{flushleft}
+\# {\bf Step 2} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
+\# Send two probes and wait for result for 3 seconds.\\
+\# If the interface opens slower f.e.\ due to long media detection,\\
+\# you want to increase the timeout.\\
+\end{flushleft}
+\begin{verbatim}
+if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
+  echo "Address $ipaddr is busy, trying DHCP..." 1>&2
+  dhcpcd
+  exit $?
+fi
+\end{verbatim}
+\begin{flushleft}
+\# OK, the address is unique, we may add it on the interface.\\
+\#\\
+\# {\bf Step 3} --- Configure the address on the interface.
+\end{flushleft}
+
+\begin{verbatim}
+if ! ip address add $pfx brd + dev $dev; then
+  echo "Failed to add $pfx on $dev, trying DHCP..." 1>&2
+  dhcpcd
+  exit $?
+fi
+\end{verbatim}
+
+\noindent\# {\bf Step 4} --- Announce our presence on the link.
+\begin{verbatim}
+arping -A -c 1 -I $dev $ipaddr
+noarp=$?
+( sleep 2;
+  arping -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
+\end{verbatim}
+
+\begin{flushleft}
+\# {\bf Step 5} (optional) --- Add some control routes.\\
+\#\\
+\# 1. Prohibit link local multicast addresses.\\
+\# 2. Prohibit link local (alias, limited) broadcast.\\
+\# 3. Add default multicast route.
+\end{flushleft}
+\begin{verbatim}
+ip route add unreachable 224.0.0.0/24 
+ip route add unreachable 255.255.255.255
+if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
+  ip route add 224.0.0.0/4 dev $dev scope global
+fi
+\end{verbatim}
+
+\begin{flushleft}
+\# {\bf Step 6} --- Add fallback default route with huge metric.\\
+\# If a proxy ARP server is present on the interface, we will be\\
+\# able to talk to all the Internet without further configuration.\\
+\# It is not so cheap though and we still hope that this route\\
+\# will be overridden by more correct one by rdisc.\\
+\# Do not make this step if the device is not ARPable,\\
+\# because dead nexthop detection does not work on them.
+\end{flushleft}
+\begin{verbatim}
+if [ "$noarp" = "0" ]; then
+  ip ro add default dev $dev metric 30000 scope global
+fi
+\end{verbatim}
+
+\begin{flushleft}
+\# {\bf Step 7} --- Restart router discovery and exit.
+\end{flushleft}
+\begin{verbatim}
+killall -HUP rdisc || rdisc -fs
+exit 0
+\end{verbatim}
+
+
+\section{Example: {\protect\tt ifcfg} --- interface address management}
+\label{EXAMPLE-IFCFG}
+
+This is a simplistic script replacing one option of \verb|ifconfig|,
+namely, IP address management. It not only adds
+addresses, but also carries out Duplicate Address Detection~\cite{RFC-DHCP},
+sends unsolicited ARP to update the caches of other hosts sharing
+the interface, adds some control routes and restarts Router Discovery
+when it is necessary.
+
+I strongly recommend using it {\em instead\/} of \verb|ifconfig| both
+on hosts and on routers.
+
+\begin{verbatim}
+#! /bin/bash
+\end{verbatim}
+\begin{flushleft}
+\# {\bf Usage: \verb?ifcfg DEVICE[:ALIAS] [add|del] ADDRESS[/LENGTH] [PEER]?}\\
+\# {\bf Parameters:}\\
+\# ---Device name. It may have alias suffix, separated by colon.\\
+\# ---Command: add, delete or stop.\\
+\# ---IP address, optionally followed by prefix length.\\
+\# ---Optional peer address for pointopoint interfaces.\\
+\# F.e. \verb|ifcfg eth0 193.233.7.90/24|
+
+\noindent\# This function determines, whether it is router or host.\\
+\# It returns 0, if the host is apparently not router.
+\end{flushleft}
+\begin{verbatim}
+CheckForwarding () {
+  local sbase fwd
+  sbase=/proc/sys/net/ipv4/conf
+  fwd=0
+  if [ -d $sbase ]; then
+    for dir in $sbase/*/forwarding; do
+      fwd=$[$fwd + `cat $dir`]
+    done
+  else
+    fwd=2
+  fi
+  return $fwd
+}
+\end{verbatim}
+\begin{flushleft}
+\# This function restarts Router Discovery.\\
+\end{flushleft}
+\begin{verbatim}
+RestartRDISC () {
+  killall -HUP rdisc || rdisc -fs
+}
+\end{verbatim}
+\begin{flushleft}
+\# Calculate ABC "natural" mask length\\
+\# Arg: \$1 = dotquad address
+\end{flushleft}
+\begin{verbatim}
+ABCMaskLen () {
+  local class;
+  class=${1%%.*}
+  if [ $class -eq 0 -o $class -ge 224 ]; then return 0
+  elif [ $class -ge 192 ]; then return 24
+  elif [ $class -ge 128 ]; then return 16
+  else  return 8 ; fi
+}
+\end{verbatim}
+
+
+\begin{flushleft}
+\# {\bf MAIN()}\\
+\#\\
+\# Strip alias suffix separated by colon.
+\end{flushleft}
+\begin{verbatim}
+label="label $1"
+ldev=$1
+dev=${1%:*}
+if [ "$dev" = "" -o "$1" = "help" ]; then
+  echo "Usage: ifcfg DEV [[add|del [ADDR[/LEN]] [PEER] | stop]" 1>&2
+  echo "       add - add new address" 1>&2
+  echo "       del - delete address" 1>&2
+  echo "       stop - completely disable IP" 1>&2
+  exit 1
+fi
+shift
+
+CheckForwarding
+fwd=$?
+\end{verbatim}
+\begin{flushleft}
+\# Parse command. If it is ``stop'', flush and exit.
+\end{flushleft}
+\begin{verbatim}
+deleting=0
+case "$1" in
+add) shift ;;
+stop)
+  if [ "$ldev" != "$dev" ]; then
+    echo "Cannot stop alias $ldev" 1>&2
+    exit 1;
+  fi
+  ip -4 addr flush dev $dev $label || exit 1
+  if [ $fwd -eq 0 ]; then RestartRDISC; fi
+  exit 0 ;;
+del*)
+  deleting=1; shift ;;
+*)
+esac
+\end{verbatim}
+\begin{flushleft}
+\# Parse prefix, split prefix length, separated by slash.
+\end{flushleft}
+\begin{verbatim}
+ipaddr=
+pfxlen=
+if [ "$1" != "" ]; then
+  ipaddr=${1%/*}
+  if [ "$1" != "$ipaddr" ]; then
+    pfxlen=${1#*/}
+  fi
+  if [ "$ipaddr" = "" ]; then
+    echo "$1 is bad IP address." 1>&2
+    exit 1
+  fi
+fi
+shift
+\end{verbatim}
+\begin{flushleft}
+\# If peer address is present, prefix length is 32.\\
+\# Otherwise, if prefix length was not given, guess it.
+\end{flushleft}
+\begin{verbatim}
+peer=$1
+if [ "$peer" != "" ]; then
+  if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then
+    echo "Peer address with non-trivial netmask." 1>&2
+    exit 1
+  fi
+  pfx="$ipaddr peer $peer"
+else
+  if [ "$pfxlen" = "" ]; then
+    ABCMaskLen $ipaddr
+    pfxlen=$?
+  fi
+  pfx="$ipaddr/$pfxlen"
+fi
+if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then
+  label=
+fi
+\end{verbatim}
+\begin{flushleft}
+\# If deletion was requested, delete the address and restart RDISC
+\end{flushleft}
+\begin{verbatim}
+if [ $deleting -ne 0 ]; then
+  ip addr del $pfx dev $dev $label || exit 1
+  if [ $fwd -eq 0 ]; then RestartRDISC; fi
+  exit 0
+fi
+\end{verbatim}
+\begin{flushleft}
+\# Start interface initialization.\\
+\#\\
+\# {\bf Step 0} --- enable device \verb|$dev|
+\end{flushleft}
+\begin{verbatim}
+if ! ip link set up dev $dev ; then
+  echo "Error: cannot enable interface $dev." 1>&2
+  exit 1
+fi
+if [ "$ipaddr" = "" ]; then exit 0; fi
+\end{verbatim}
+\begin{flushleft}
+\# {\bf Step 1} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
+\# Send two probes and wait for result for 3 seconds.\\
+\# If the interface opens slower f.e.\ due to long media detection,\\
+\# you want to increase the timeout.\\
+\end{flushleft}
+\begin{verbatim}
+if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
+  echo "Error: some host already uses address $ipaddr on $dev." 1>&2
+  exit 1
+fi
+\end{verbatim}
+\begin{flushleft}
+\# OK, the address is unique. We may add it to the interface.\\
+\#\\
+\# {\bf Step 2} --- Configure the address on the interface.
+\end{flushleft}
+\begin{verbatim}
+if ! ip address add $pfx brd + dev $dev $label; then
+  echo "Error: failed to add $pfx on $dev." 1>&2
+  exit 1
+fi
+\end{verbatim}
+\noindent\# {\bf Step 3} --- Announce our presence on the link
+\begin{verbatim}
+arping -q -A -c 1 -I $dev $ipaddr
+noarp=$?
+( sleep 2 ;
+  arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
+\end{verbatim}
+\begin{flushleft}
+\# {\bf Step 4} (optional) --- Add some control routes.\\
+\#\\
+\# 1. Prohibit link local multicast addresses.\\
+\# 2. Prohibit link local (alias, limited) broadcast.\\
+\# 3. Add default multicast route.
+\end{flushleft}
+\begin{verbatim}
+ip route add unreachable 224.0.0.0/24 >& /dev/null 
+ip route add unreachable 255.255.255.255 >& /dev/null
+if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
+  ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null
+fi
+\end{verbatim}
+\begin{flushleft}
+\# {\bf Step 5} --- Add fallback default route with huge metric.\\
+\# If a proxy ARP server is present on the interface, we will be\\
+\# able to talk to all the Internet without further configuration.\\
+\# Do not make this step on router or if the device is not ARPable.\\
+\# because dead nexthop detection does not work on them.
+\end{flushleft}
+\begin{verbatim}
+if [ $fwd -eq 0 ]; then
+  if [ $noarp -eq 0 ]; then
+    ip ro append default dev $dev metric 30000 scope global
+  elif [ "$peer" != "" ]; then
+    if ping -q -c 2 -w 4 $peer ; then
+      ip ro append default via $peer dev $dev metric 30001
+    fi
+  fi
+  RestartRDISC
+fi
+
+exit 0
+\end{verbatim}
+\begin{flushleft}
+\# End of {\bf MAIN()}
+\end{flushleft}
+
+
+\end{document}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0a8c930cb50580efadd9fec0216038b8e8687df8 100644 (file)
@@ -0,0 +1,469 @@
+\documentstyle[12pt,twoside]{article}
+\def\TITLE{Tunnels over IP}
+\input preamble
+\begin{center}
+\Large\bf Tunnels over IP in Linux-2.2
+\end{center}
+
+
+\begin{center}
+{ \large Alexey~N.~Kuznetsov } \\
+\em Institute for Nuclear Research, Moscow \\
+\verb|kuznet@ms2.inr.ac.ru| \\
+\rm March 17, 1999
+\end{center}
+
+\vspace{5mm}
+
+\tableofcontents
+
+
+\section{Instead of introduction: micro-FAQ.}
+
+\begin{itemize}
+
+\item
+Q: In linux-2.0.36 I used:
+\begin{verbatim} 
+    ifconfig tunl1 10.0.0.1 pointopoint 193.233.7.65
+\end{verbatim} 
+to create tunnel. It does not work in 2.2.0!
+
+A: You are right, it does not work. The command written above is split to two commands.
+\begin{verbatim}
+    ip tunnel add MY-TUNNEL mode ipip remote 193.233.7.65
+\end{verbatim} 
+will create tunnel device with name \verb|MY-TUNNEL|. Now you may configure
+it with:
+\begin{verbatim} 
+    ifconfig MY-TUNNEL 10.0.0.1
+\end{verbatim} 
+Certainly, if you prefer name \verb|tunl1| to \verb|MY-TUNNEL|,
+you still may use it.
+
+\item
+Q: In linux-2.0.36 I used:
+\begin{verbatim} 
+    ifconfig tunl0 10.0.0.1
+    route add -net 10.0.0.0 gw 193.233.7.65 dev tunl0
+\end{verbatim} 
+to tunnel net 10.0.0.0 via router 193.233.7.65. It does not
+work in 2.2.0! Moreover, \verb|route| prints a funny error sort of
+``network unreachable'' and after this I found a strange direct route
+to 10.0.0.0 via \verb|tunl0| in routing table.
+
+A: Yes, in 2.2 the rule that {\em normal} gateway must reside on directly
+connected network has not any exceptions. You may tell kernel, that
+this particular route is {\em abnormal}:
+\begin{verbatim} 
+  ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
+  ip route add 10.0.0.0/8 via 193.233.7.65 dev tunl0 onlink
+\end{verbatim}
+Note keyword \verb|onlink|, it is the magic key that orders kernel
+not to check for consistency of gateway address.
+Probably, after this explanation you have already guessed another method
+to cheat kernel:
+\begin{verbatim} 
+  ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
+  route add -host 193.233.7.65 dev tunl0
+  route add -net 10.0.0.0 netmask 255.0.0.0 gw 193.233.7.65
+  route del -host 193.233.7.65 dev tunl0
+\end{verbatim}
+Well, if you like such tricks, nobody may prohibit you to use them.
+Only do not forget
+that between \verb|route add| and \verb|route del| host 193.233.7.65 is
+unreachable.
+
+\item
+Q: In 2.0.36 I used to load \verb|tunnel| device module and \verb|ipip| module.
+I cannot find any \verb|tunnel| in 2.2!
+
+A: Linux-2.2 has single module \verb|ipip| for both directions of tunneling
+and for all IPIP tunnel devices.
+
+\item
+Q: \verb|traceroute| does not work over tunnel! Well, stop... It works,
+     only skips some number of hops.
+
+A: Yes. By default tunnel driver copies \verb|ttl| value from
+inner packet to outer one. It means that path traversed by tunneled
+packets to another endpoint is not hidden. If you dislike this, or if you
+are going to use some routing protocol expecting that packets
+with ttl 1 will reach peering host (f.e.\ RIP, OSPF or EBGP)
+and you are not afraid of
+tunnel loops, you may append option \verb|ttl 64|, when creating tunnel
+with \verb|ip tunnel add|.
+
+\item
+Q: ... Well, list of things, which 2.0 was able to do finishes.
+
+\end{itemize}
+
+\paragraph{Summary of differences between 2.2 and 2.0.}
+
+\begin{itemize}
+
+\item {\bf In 2.0} you could compile tunnel device into kernel
+       and got set of 4 devices \verb|tunl0| ... \verb|tunl3| or,
+       alternatively, compile it as module and load new module
+       for each new tunnel. Also, module \verb|ipip| was necessary
+       to receive tunneled packets.
+
+      {\bf 2.2} has {\em one\/} module \verb|ipip|. Loading it you get base
+       tunnel device \verb|tunl0| and another tunnels may be created with command
+       \verb|ip tunnel add|. These new devices may have arbitrary names.
+
+
+\item {\bf In 2.0} you set remote tunnel endpoint address with
+       the command \verb|ifconfig| ... \verb|pointopoint A|.
+
+       {\bf In 2.2} this command has the same semantics on all
+       the interfaces, namely it sets not tunnel endpoint,
+       but address of peering host, which is directly reachable
+       via this tunnel,
+       rather than via Internet. Actual tunnel endpoint address \verb|A|
+       should be set with \verb|ip tunnel add ... remote A|.
+
+\item {\bf In 2.0} you create tunnel routes with the command:
+\begin{verbatim}
+    route add -net 10.0.0.0 gw A dev tunl0
+\end{verbatim}
+
+       {\bf 2.2} interprets this command equally for all device
+       kinds and gateway is required to be directly reachable via this tunnel,
+       rather than via Internet. You still may use \verb|ip route add ... onlink|
+       to override this behaviour.
+
+\end{itemize}
+
+
+\section{Tunnel setup: basics}
+
+Standard Linux-2.2 kernel supports three flavor of tunnels,
+listed in the following table:
+\vspace{2mm}
+
+\begin{tabular}{lll}
+\vrule depth 0.8ex width 0pt\relax
+Mode & Description  & Base device \\
+ipip & IP over IP & tunl0 \\
+sit & IPv6 over IP & sit0 \\
+gre & ANY over GRE over IP & gre0
+\end{tabular}
+
+\vspace{2mm}
+
+\noindent All the kinds of tunnels are created with one command:
+\begin{verbatim}
+  ip tunnel add <NAME> mode <MODE> [ local <S> ] [ remote <D> ]
+\end{verbatim}
+
+This command creates new tunnel device with name \verb|<NAME>|.
+The \verb|<NAME>| is an arbitrary string. Particularly,
+it may be even \verb|eth0|. The rest of parameters set
+different tunnel characteristics.
+
+\begin{itemize}
+
+\item
+\verb|mode <MODE>| sets tunnel mode. Three modes are available now
+       \verb|ipip|, \verb|sit| and \verb|gre|.
+
+\item
+\verb|remote <D>| sets remote endpoint of the tunnel to IP
+       address \verb|<D>|.
+\item
+\verb|local <S>| sets fixed local address for tunneled
+       packets. It must be an address on another interface of this host.
+
+\end{itemize}
+
+\let\thefootnote\oldthefootnote
+
+Both \verb|remote| and \verb|local| may be omitted. In this case we
+say that they are zero or wildcard. Two tunnels of one mode cannot
+have the same \verb|remote| and \verb|local|. Particularly it means
+that base device or fallback tunnel cannot be replicated.\footnote{
+This restriction is relaxed for keyed GRE tunnels.}
+
+Tunnels are divided to two classes: {\bf pointopoint} tunnels, which
+have some not wildcard \verb|remote| address and deliver all the packets
+to this destination, and {\bf NBMA} (i.e. Non-Broadcast Multi-Access) tunnels,
+which have no \verb|remote|. Particularly, base devices (f.e.\ \verb|tunl0|)
+are NBMA, because they have neither \verb|remote| nor
+\verb|local| addresses.
+
+
+After tunnel device is created you should configure it as you did
+it with another devices. Certainly, the configuration of tunnels has
+some features related to the fact that they work over existing Internet
+routing infrastructure and simultaneously create new virtual links,
+which changes this infrastructure. The danger that not enough careful
+tunnel setup will result in formation of tunnel loops,
+collapse of routing or flooding network with exponentially
+growing number of tunneled fragments is very real.
+
+
+Protocol setup on pointopoint tunnels does not differ of configuration
+of another devices. You should set a protocol address with \verb|ifconfig|
+and add routes with \verb|route| utility.
+
+NBMA tunnels are different. To route something via NBMA tunnel
+you have to explain to driver, where it should deliver packets to.
+The only way to make it is to create special routes with gateway
+address pointing to desired endpoint. F.e.\ 
+\begin{verbatim}
+    ip route add 10.0.0.0/24 via <A> dev tunl0 onlink
+\end{verbatim}
+It is important to use option \verb|onlink|, otherwise
+kernel will refuse request to create route via gateway not directly
+reachable over device \verb|tunl0|. With IPv6 the situation is much simpler:
+when you start device \verb|sit0|, it automatically configures itself
+with all IPv4 addresses mapped to IPv6 space, so that all IPv4
+Internet is {\em really reachable} via \verb|sit0|! Excellent, the command
+\begin{verbatim}
+    ip route add 3FFE::/16 via ::193.233.7.65 dev sit0
+\end{verbatim}
+will route \verb|3FFE::/16| via \verb|sit0|, sending all the packets
+destined to this prefix to 193.233.7.65.
+
+\section{Tunnel setup: options}
+
+Command \verb|ip tunnel add| has several additional options.
+\begin{itemize}
+
+\item \verb|ttl N| --- set fixed TTL \verb|N| on tunneled packets.
+       \verb|N| is number in the range 1--255. 0 is special value,
+       meaning that packets inherit TTL value. 
+               Default value is: \verb|inherit|.
+
+\item \verb|tos T| --- set fixed tos \verb|T| on tunneled packets.
+               Default value is: \verb|inherit|.
+
+\item \verb|dev DEV| --- bind tunnel to device \verb|DEV|, so that
+       tunneled packets will be routed only via this device and will
+       not be able to escape to another device, when route to endpoint changes.
+
+\item \verb|nopmtudisc| --- disable Path MTU Discovery on this tunnel.
+       It is enabled by default. Note that fixed ttl is incompatible
+       with this option: tunnels with fixed ttl always make pmtu discovery.
+
+\end{itemize}
+
+\verb|ipip| and \verb|sit| tunnels have no more options. \verb|gre|
+tunnels are more complicated:
+
+\begin{itemize}
+
+\item \verb|key K| --- use keyed GRE with key \verb|K|. \verb|K| is
+       either number or IP address-like dotted quad.
+
+\item \verb|csum| --- checksum tunneled packets.
+
+\item \verb|seq| --- serialize packets.
+\begin{NB}
+       I think this option does not
+       work. At least, I did not test it, did not debug it and
+       even do not understand, how it is supposed to work and for what
+       purpose Cisco planned to use it.
+\end{NB}
+
+\end{itemize}
+
+
+Actually, these GRE options can be set separately for input and
+output directions by prefixing corresponding keywords with letter
+\verb|i| or \verb|o|. F.e.\ \verb|icsum| orders to accept only
+packets with correct checksum and \verb|ocsum| means, that
+our host will calculate and send checksum.
+
+Command \verb|ip tunnel add| is not the only operation,
+which can be made with tunnels. Certainly, you may get short help page
+with:
+\begin{verbatim}
+    ip tunnel help
+\end{verbatim}
+
+Besides that, you may view list of installed tunnels with the help of command:
+\begin{verbatim}
+    ip tunnel ls
+\end{verbatim}
+Also you may look at statistics:
+\begin{verbatim}
+    ip -s tunnel ls Cisco
+\end{verbatim}
+where \verb|Cisco| is name of tunnel device. Command
+\begin{verbatim}
+    ip tunnel del Cisco
+\end{verbatim}
+destroys tunnel \verb|Cisco|. And, finally,
+\begin{verbatim}
+    ip tunnel change Cisco mode sit local ME remote HE ttl 32
+\end{verbatim}
+changes its parameters.
+
+\section{Differences 2.2 and 2.0 tunnels revisited.}
+
+Now we can discuss more subtle differences between tunneling in 2.0
+and 2.2.
+
+\begin{itemize}
+
+\item In 2.0 all tunneled packets were received promiscuously
+as soon as you loaded module \verb|ipip|. 2.2 tries to select the best
+tunnel device and packet looks as received on this. F.e.\ if host
+received \verb|ipip| packet from host \verb|D| destined to our
+local address \verb|S|, kernel searches for matching tunnels
+in order:
+
+\begin{tabular}{ll}
+1 & \verb|remote| is \verb|D| and \verb|local| is \verb|S| \\
+2 & \verb|remote| is \verb|D| and \verb|local| is wildcard \\
+3 & \verb|remote| is wildcard and \verb|local| is \verb|S| \\
+4 & \verb|tunl0|
+\end{tabular}
+
+If tunnel exists, but it is not in \verb|UP| state, the tunnel is ignored.
+Note, that if \verb|tunl0| is \verb|UP| it receives all the IPIP packets,
+not acknowledged by more specific tunnels.
+Be careful, it means that without carefully installed firewall rules
+anyone on the Internet may inject to your network any packets with
+source addresses indistinguishable from local ones. It is not so bad idea
+to design tunnels in the way enforcing maximal route symmetry
+and to enable reversed path filter (\verb|rp_filter| sysctl option) on
+tunnel devices.
+
+\item In 2.2 you can monitor and debug tunnels with \verb|tcpdump|.
+F.e.\ \verb|tcpdump| \verb|-i Cisco| \verb|-nvv| will dump packets,
+which kernel output, via tunnel \verb|Cisco| and the packets received on it
+from kernel viewpoint.
+
+\end{itemize}
+
+
+\section{Linux and Cisco IOS tunnels.}
+
+Among another tunnels Cisco IOS supports IPIP and GRE.
+Essentially, Cisco setup is subset of options, available for Linux.
+Let us consider the simplest example:
+
+\begin{verbatim}
+interface Tunnel0
+ tunnel mode gre ip
+ tunnel source 10.10.14.1
+ tunnel destination 10.10.13.2
+\end{verbatim}
+
+
+This command set translates to:
+
+\begin{verbatim}
+    ip tunnel add Tunnel0 \
+        mode gre \
+        local 10.10.14.1 \
+        remote 10.10.13.2
+\end{verbatim}
+
+Any questions? No questions.
+
+\section{Interaction IPIP tunnels and DVMRP.}
+
+DVMRP exploits IPIP tunnels to route multicasts via Internet.
+\verb|mrouted| creates
+IPIP tunnels listed in its configuration file automatically.
+From kernel and user viewpoints there are no differences between
+tunnels, created in this way, and tunnels created by \verb|ip tunnel|.
+I.e.\ if \verb|mrouted| created some tunnel, it may be used to
+route unicast packets, provided appropriate routes are added.
+And vice versa, if administrator has already created a tunnel,
+it will be reused by \verb|mrouted|, if it requests DVMRP
+tunnel with the same local and remote addresses.
+
+Do not wonder, if your manually configured tunnel is
+destroyed, when mrouted exits.
+
+
+\section{Broadcast GRE ``tunnels''.}
+
+It is possible to set \verb|remote| for GRE tunnel to a multicast
+address. Such tunnel becomes {\bf broadcast} tunnel (though word
+tunnel is not quite appropriate in this case, it is rather virtual network).
+\begin{verbatim}
+  ip tunnel add Universe local 193.233.7.65 \
+                         remote 224.66.66.66 ttl 16
+  ip addr add 10.0.0.1/16 dev Universe
+  ip link set Universe up
+\end{verbatim}
+This tunnel is true broadcast network and broadcast packets are
+sent to multicast group 224.66.66.66. By default such tunnel starts
+to resolve both IP and IPv6 addresses via ARP/NDISC, so that
+if multicast routing is supported in surrounding network, all GRE nodes
+will find one another automatically and will form virtual Ethernet-like
+broadcast network. If multicast routing does not work, it is unpleasant
+but not fatal flaw. The tunnel becomes NBMA rather than broadcast network.
+You may disable dynamic ARPing by:
+\begin{verbatim}
+  echo 0 > /proc/sys/net/ipv4/neigh/Universe/mcast_solicit
+\end{verbatim}
+and to add required information to ARP tables manually:
+\begin{verbatim}
+  ip neigh add 10.0.0.2 lladdr 128.6.190.2 dev Universe nud permanent
+\end{verbatim}
+In this case packets sent to 10.0.0.2 will be encapsulated in GRE
+and sent to 128.6.190.2. It is possible to facilitate address resolution
+using methods typical for another NBMA networks f.e.\ to start user
+level \verb|arpd| daemon, which will maintain database of hosts attached
+to GRE virtual network or ask for information
+dedicated ARP or NHRP server.
+
+
+Actually, such setup is the most natural for tunneling,
+it is really flexible, scalable and easily managable, so that
+it is strongly recommended to be used with GRE tunnels instead of ugly
+hack with NBMA mode and \verb|onlink| modifier. Unfortunately,
+by historical reasons broadcast mode is not supported by IPIP tunnels,
+but this probably will change in future.
+
+
+
+\section{Traffic control issues.}
+
+Tunnels are devices, hence all the power of Linux traffic control
+applies to them. The simplest (and the most useful in practice)
+example is limiting tunnel bandwidth. The following command:
+\begin{verbatim}
+    tc qdisc add dev tunl0 root tbf \
+        rate 128Kbit burst 4K limit 10K
+\end{verbatim}
+will limit tunneled traffic to 128Kbit with maximal burst size of 4K
+and queuing not more than 10K.
+
+However, you should remember, that tunnels are {\em virtual} devices
+implemented in software and true queue management is impossible for them
+just because they have no queues. Instead, it is better to create classes
+on real physical interfaces and to map tunneled packets to them.
+In general case of dynamic routing you should create such classes
+on all outgoing interfaces, or, alternatively,
+to use option \verb|dev DEV| to bind tunnel to a fixed physical device.
+In the last case packets will be routed only via specified device
+and you need to setup corresponding classes only on it.
+Though you have to pay for this convenience,
+if routing will change, your tunnel will fail.
+
+Suppose that CBQ class \verb|1:ABC| has been created on device \verb|eth0| 
+specially for tunnel \verb|Cisco| with endpoints \verb|S| and \verb|D|.
+Now you can select IPIP packets with addresses \verb|S| and \verb|D|
+with some classifier and map them to class \verb|1:ABC|. F.e.\ 
+it is easy to make with \verb|rsvp| classifier:
+\begin{verbatim}
+    tc filter add dev eth0 pref 100 proto ip rsvp \
+        session D ipproto ipip filter S \
+        classid 1:ABC
+\end{verbatim}
+
+If you want to make more detailed classification of sub-flows
+transmitted via tunnel, you can build CBQ subtree,
+rooted at \verb|1:ABC| and attach to subroot set of rules parsing
+IPIP packets more deeply.
+
+\end{document}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..be9d8bcc74d44458dbf01cd17eccc990fa9cb953 100644 (file)
@@ -0,0 +1,110 @@
+<!doctype linuxdoc system>
+
+<article>
+
+<title>NSTAT, IFSTAT and RTACCT Utilities
+<author>Alexey Kuznetosv, <tt/kuznet@ms2.inr.ac.ru/
+<date>some_negative_number, 20 Sep 2001
+<abstract>
+<tt/nstat/, <tt/ifstat/ and <tt/rtacct/ are simple tools helping
+to monitor kernel snmp counters and network interface statistics.
+</abstract>
+
+<p> These utilities are very similar, so that I describe
+them simultaneously, using name <tt/Xstat/ in the places which apply
+to all of them.
+
+<p>The format of the command is:
+
+<tscreen><verb>
+       Xstat [ OPTIONS ] [ PATTERN [ PATTERN ... ] ]
+</verb></tscreen>
+
+<p>
+<tt/PATTERN/ is shell style pattern, selecting identifier
+of SNMP variables or interfaces to show. Variable is displayed
+if one of patterns matches its name. If no patterns are given,
+<tt/Xstat/ assumes that user wants to see all the variables.  
+
+<p> <tt/OPTIONS/ is list of single letter options, using common unix
+conventions.
+
+<itemize>
+<item><tt/-h/  - show help page
+<item><tt/-?/  - the same, of course
+<item><tt/-v/, <tt/-V/  - print version of <tt/Xstat/ and exit
+<item><tt/-z/ - dump zero counters too. By default they are not shown.
+<item><tt/-a/ - dump absolute values of counters. By default <tt/Xstat/
+                calculates increments since the previous use.
+<item><tt/-s/ - do not update history, so that the next time you will
+                see counters including values accumulated to the moment
+                of this measurement too.
+<item><tt/-n/ - do not display anything, only update history.
+<item><tt/-r/ - reset history.
+<item><tt/-d INTERVAL/ - <tt/Xstat/ is run in daemon mode collecting
+                statistics. <tt/INTERVAL/ is interval between measurements
+                in seconds.
+<item><tt/-t INTERVAL/ - time interval to average rates. Default value
+                is 60 seconds. 
+<item><tt/-e/ - display extended information about errors (<tt/ifstat/ only).
+</itemize>
+
+<p>
+History is just dump saved in file <tt>/tmp/.Xstat.uUID</tt>
+or in file given by environment variables <tt/NSTAT_HISTORY/,
+<tt/IFSTAT_HISTORY/ and <tt/RTACCT_HISTORY/.
+Each time when you use <tt/Xstat/ values there are updated.
+If you use patterns, only the values which you _really_ see
+are updated. If you want to skip an unintersting period,
+use option <tt/-n/, or just output to <tt>/dev/null</tt>.
+
+<p>
+<tt/Xstat/ understands when history is invalidated by system reboot
+or source of information switched between different instances
+of daemonic <tt/Xstat/ and kernel SNMP tables and does not
+use invalid history.
+
+<p> Beware, <tt/Xstat/ will not produce sane output,
+when many processes use it simultaneously. If several processes
+under single user need this utility they should use environment
+variables to put their history in safe places
+or to use it with options <tt/-a -s/.
+
+<p>
+Well, that's all. The utility is very simple, but nevertheless
+very handy.
+
+<p> <bf/Output of XSTAT/
+<p> The first line of output is <tt/#/ followed by identifier
+of source of information, it may be word <tt/kernel/, when <tt/Xstat/
+gets information from kernel or some dotted decimal number followed
+by parameters, when it obtains information from running <tt/Xstat/ daemon.
+
+<p>In the case of <tt/nstat/ the rest of output consists of three columns:
+SNMP MIB identifier,
+its value (or increment since previous measurement) and average
+rate of increase of the counter per second. <tt/ifstat/ outputs
+interface name followed by pairs of counter and rate of its change.
+
+<p> <bf/Daemonic Xstat/
+<p> <tt/Xstat/ may be started as daemon by any user. This makes sense
+to avoid wrapped counters and to obtain reasonable long counters
+for large time. Also <tt/Xstat/ daemon calculates average rates.
+For the first goal sampling interval (option <tt/-d/) may be large enough,
+f.e. for gigabit rates byte counters overflow not more frequently than
+each 40 seconds and you may select interval of 20 seconds.
+From the other hand, when <tt/Xstat/ is used for estimating rates
+interval should be less than averaging period (option <tt/-t/), otherwise
+estimation loses in quality.
+
+Client <tt/Xstat/, before trying to get information from the kernel,
+contacts daemon started by this user, then it tries system wide
+daemon, which is supposed to be started by superuser. And only if
+none of them replied it gets information from kernel.
+
+<p> <bf/Environment/
+<p> <tt/NSTAT_HISTORY/ - name of history file for <tt/nstat/.
+<p> <tt/IFSTAT_HISTORY/ - name of history file for <tt/ifstat/.
+<p> <tt/RTACCT_HISTORY/ - name of history file for <tt/rtacct/.
+
+</article>
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..80ca5087bcec6081c6f53ce0264f658300d70ca9 100644 (file)
@@ -0,0 +1,26 @@
+\textwidth   6.0in
+\textheight  8.5in
+
+\input SNAPSHOT
+
+\pagestyle{myheadings}
+\markboth{\protect\TITLE}{}
+\markright{{\protect\sc iproute2-ss\Draft}}
+
+% To print it in compact form: both sides on one sheet (psnup -2)
+\evensidemargin=\oddsidemargin
+
+\newenvironment{NB}{\bgroup \vskip 1mm\leftskip 1cm \footnotesize \noindent NB.
+}{\par\egroup \vskip 1mm}
+
+\def\threeonly{[2.3.15+ only] }
+
+\begin{document}
+
+\makeatletter
+\renewcommand{\@oddhead}{{\protect\sc iproute2-ss\Draft} \hfill \protect\arabic{page}}
+\makeatother
+\let\oldthefootnote\thefootnote
+\def\thefootnote{}
+\footnotetext{Copyright \copyright~1999 A.N.Kuznetsov}
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..07391c39d04bb42a4f791c90b35bdcea51ccb4c9 100644 (file)
@@ -0,0 +1,52 @@
+<!doctype linuxdoc system>
+
+<article>
+
+<title>RTACCT Utility
+<author>Robert Olsson
+<date>some_negative_number, 20 Dec 2001
+
+<p>
+Here is some code for monitoring the route cache. For systems handling high
+network load, servers, routers, firewalls etc the route cache and its garbage
+collection is crucial. Linux has a solid implementation.
+
+<p>
+The kernel patch (not required since linux-2.4.7) adds statistics counters
+from route cache process into 
+/proc/net/rt_cache_stat. A companion user mode program presents the statistics
+in a vmstat or iostat manner. The ratio between cache hits and misses gives 
+the flow length.
+
+<p>
+Hopefully it can help understanding performance and DoS and other related
+issues.
+
+<p> An URL where newer versions of this utility can be (probably) found
+is ftp://robur.slu.se/pub/Linux/net-development/rt_cache_stat/
+
+
+<p><bf/Description/
+
+<p>The format of the command is:
+
+<tscreen><verb>
+       rtstat [ OPTIONS ]
+</verb></tscreen>
+
+<p> <tt/OPTIONS/ are:
+
+<itemize>
+
+<item><tt/-h/, <tt/-help/ - show help page and version of the utility.
+
+<item><tt/-i INTERVAL/ - interval between snapshots, default value is
+2 seconds.
+
+<item><tt/-s NUMBER/ - whether to print header line. 0 inhibits header line,
+1 prescribes to print it once and 2 (this is default setting) forces header
+line each 20 lines. 
+
+</itemize>
+
+</article>
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0b1b53353f12d8ebf172f13d7ec5d2cdb377505c 100644 (file)
@@ -0,0 +1,525 @@
+<!doctype linuxdoc system>
+
+<article>
+
+<title>SS Utility: Quick Intro
+<author>Alexey Kuznetosv, <tt/kuznet@ms2.inr.ac.ru/
+<date>some_negative_number, 20 Sep 2001
+<abstract>
+<tt/ss/ is one another utility to investigate sockets.
+Functionally it is NOT better than <tt/netstat/ combined
+with some perl/awk scripts and though it is surely faster
+it is not enough to make it much better. :-)
+So, stop reading this now and do not waste your time.
+Well, certainly, it proposes some functionality, which current
+netstat is still not able to do, but surely will soon.
+</abstract>
+
+<sect>Why?
+
+<p> <tt>/proc</tt> interface is inadequate, unfortunately.
+When amount of sockets is enough large, <tt/netstat/ or even
+plain <tt>cat /proc/net/tcp/</tt> cause nothing but pains and curses.
+In linux-2.4 the desease became worse: even if amount
+of sockets is small reading <tt>/proc/net/tcp/</tt> is slow enough.
+
+This utility presents a new approach, which is supposed to scale
+well. I am not going to describe technical details here and
+will concentrate on description of the command.
+The only important thing to say is that it is not so bad idea
+to load module <tt/tcp_diag/, which can be found in directory
+<tt/Modules/ of <tt/iproute2/. If you do not make this <tt/ss/
+will work, but it falls back to <tt>/proc</tt> and becomes slow
+like <tt/netstat/, well, a bit faster yet (see section "Some numbers"). 
+
+<sect>Old news
+
+<p>
+In the simplest form <tt/ss/ is equivalent to netstat
+with some small deviations.
+
+<itemize>
+<item><tt/ss -t -a/ dumps all TCP sockets
+<item><tt/ss -u -a/ dumps all UDP sockets
+<item><tt/ss -w -a/ dumps all RAW sockets
+<item><tt/ss -x -a/ dumps all UNIX sockets
+</itemize>
+
+<p>
+Option <tt/-o/ shows TCP timers state.
+Option <tt/-e/ shows some extended information.
+Etc. etc. etc. Seems, all the options of netstat related to sockets
+are supported. Though not AX.25 and other bizarres. :-)
+If someone wants, he can make support for decnet and ipx.
+Some rudimentary support for them is already present in iproute2 libutils,
+and I will be glad to see these new members.
+
+<p>
+However, standard functionality is a bit different:
+
+<p>
+The first: without option <tt/-a/ sockets in states
+<tt/TIME-WAIT/ and <tt/SYN-RECV/ are skipped too.
+It is more reasonable default, I think.
+
+<p>
+The second: format of UNIX sockets is different. It coincides
+with tcp/udp. Though standard kernel still does not allow to
+see write/read queues and peer address of connected UNIX sockets,
+the patch doing this exists.
+
+<p>
+The third: default is to dump only TCP sockets, rather than all of the types.
+
+<p>
+The next: by default it does not resolve numeric host addresses (like <tt/ip/)!
+Resolving is enabled with option <tt/-r/. Service names, usually stored
+in local files, are resolved by default. Also, if service database
+does not contain references to a port, <tt/ss/ queries system
+<tt/rpcbind/. RPC services are prefixed with <tt/rpc./
+Resolution of services may be suppressed with option <tt/-n/.
+
+<p>
+It does not accept "long" options (I dislike them, sorry).
+So, address family is given with family identifier following
+option <tt/-f/ to be algined to iproute2 conventions.
+Mostly, it is to allow option parser to parse
+addresses correctly, but as side effect it really limits dumping
+to sockets supporting only given family. Option <tt/-A/ followed
+by list of socket tables to dump is also supported.
+Logically, id of socket table is different of _address_ family, which is
+another point of incompatibility. So, id is one of
+<tt/all/, <tt/tcp/, <tt/udp/,
+<tt/raw/, <tt/inet/, <tt/unix/, <tt/packet/, <tt/netlink/. See?
+Well, <tt/inet/ is just abbreviation for <tt/tcp|udp|raw/
+and it is not difficult to guess that <tt/packet/ allows
+to look at packet sockets. Actually, there are also some other abbreviations,
+f.e. <tt/unix_dgram/ selects only datagram UNIX sockets.
+
+<p>
+The next: well, I still do not know. :-)
+
+
+
+
+<sect>Time to talk about new functionality.
+
+<p>It is builtin filtering of socket lists. 
+
+<sect1> Filtering by state.
+
+<p>
+<tt/ss/ allows to filter socket states, using keywords
+<tt/state/ and <tt/exclude/, followed by some state
+identifier.
+
+<p>
+State identifier are standard TCP state names (not listed,
+they are useless for you if you already do not know them)
+or abbreviations:
+
+<itemize>
+<item><tt/all/        - for all the states
+<item><tt/bucket/     - for TCP minisockets (<tt/TIME-WAIT|SYN-RECV/)
+<item><tt/big/       - all except for minisockets
+<item><tt/connected/  - not closed and not listening
+<item><tt/synchronized/ - connected and not <tt/SYN-SENT/
+</itemize>
+
+<p>
+   F.e. to dump all tcp sockets except <tt/SYN-RECV/:
+
+<tscreen><verb>
+   ss exclude SYN-RECV
+</verb></tscreen>
+
+<p>
+   If neither <tt/state/ nor <tt/exclude/ directives
+   are present,
+   state filter defaults to <tt/all/ with option <tt/-a/
+   or to <tt/all/,
+   excluding listening, syn-recv, time-wait and closed sockets.
+
+<sect1> Filtering by addresses and ports.
+
+<p>
+Option list may contain address/port filter.
+It is boolean expression which consists of boolean operation
+<tt/or/, <tt/and/, <tt/not/ and predicates. 
+Actually, all the flavors of names for boolean operations are eaten:
+<tt/&amp/, <tt/&amp&amp/, <tt/|/, <tt/||/, <tt/!/, but do not forget
+about special sense given to these symbols by unix shells and escape
+them correctly, when used from command line.
+
+<p>
+Predicates may be of the folowing kinds:
+
+<itemize>
+<item>A. Address/port match, where address is checked against mask
+      and port is either wildcard or exact. It is one of:
+<tscreen><verb>
+       dst prefix:port
+       src prefix:port
+       src unix:STRING
+       src link:protocol:ifindex
+       src nl:channel:pid
+</verb></tscreen>
+
+      Both prefix and port may be absent or replaced with <tt/*/,
+      which means wildcard. UNIX socket use more powerful scheme
+      matching to socket names by shell wildcards. Also, prefixes
+      unix: and link: may be omitted, if address family is evident
+      from context (with option <tt/-x/ or with <tt/-f unix/
+      or with <tt/unix/ keyword) 
+
+<p>
+      F.e.
+
+<tscreen><verb>
+       dst 10.0.0.1
+       dst 10.0.0.1:
+       dst 10.0.0.1/32:
+       dst 10.0.0.1:*
+</verb></tscreen>
+   are equivalent and mean socket connected to
+                        any port on host 10.0.0.1
+
+<tscreen><verb>
+       dst 10.0.0.0/24:22
+</verb></tscreen>
+   sockets connected to port 22 on network
+                          10.0.0.0...255.
+
+<p>
+      Note that port separated of address with colon, which creates
+      troubles with IPv6 addresses. Generally, we interpret the last
+      colon as splitting port. To allow to give IPv6 addresses,
+      trick like used in IPv6 HTTP URLs may be used:
+
+<tscreen><verb>
+      dst [::1]
+</verb></tscreen>
+       are sockets connected to ::1 on any port
+
+<p>
+      Another way is <tt/dst ::1/128/. / helps to understand that
+      colon is part of IPv6 address.
+
+<p>
+      Now we can add another alias for <tt/dst 10.0.0.1/:
+      <tt/dst [10.0.0.1]/. :-)
+
+<p>   Address may be a DNS name. In this case all the addresses are looked
+      up (in all the address families, if it is not limited by option <tt/-f/
+      or special address prefix <tt/inet:/, <tt/inet6/) and resulting
+      expression is <tt/or/ over all of them.  
+
+<item>   B. Port expressions:
+<tscreen><verb>
+      dport &gt= :1024
+      dport != :22
+      sport &lt :32000
+</verb></tscreen>
+      etc.
+
+      All the relations: <tt/&lt/, <tt/&gt/, <tt/=/, <tt/>=/, <tt/=/, <tt/==/,
+      <tt/!=/, <tt/eq/, <tt/ge/, <tt/lt/, <tt/ne/...
+      Use variant which you like more, but not forget to escape special
+      characters when typing them in command line. :-) 
+
+      Note that port number syntactically coincides to the case A!
+      You may even add an IP address, but it will not participate
+      incomparison, except for <tt/==/ and <tt/!=/, which are equivalent
+      to corresponding predicates of type A. F.e.
+<p>
+<tt/dst 10.0.0.1:22/
+    is equivalent to  <tt/dport eq 10.0.0.1:22/
+      and
+      <tt/not dst 10.0.0.1:22/     is equivalent to
+ <tt/dport neq 10.0.0.1:22/
+
+<item>C. Keyword <tt/autobound/. It matches to sockets bound automatically
+      on local system.
+
+</itemize>
+
+
+<sect> Examples
+
+<p>
+<itemize>
+<item>1. List all the tcp sockets in state <tt/FIN-WAIT-1/ for our apache
+   to network 193.233.7/24 and look at their timers:
+
+<tscreen><verb>
+   ss -o state fin-wait-1 \( sport = :http or sport = :https \) \
+                          dst 193.233.7/24
+</verb></tscreen>
+
+   Oops, forgot to say that missing logical operation is
+   equivalent to <tt/and/.
+
+<item> 2. Well, now look at the rest...
+
+<tscreen><verb>
+   ss -o excl fin-wait-1
+   ss state fin-wait-1 \( sport neq :http and sport neq :https \) \
+                       or not dst 193.233.7/24
+</verb></tscreen>
+
+   Note that we have to do _two_ calls of ss to do this.
+   State match is always anded to address/port match.
+   The reason for this is purely technical: ss does fast skip of
+   not matching states before parsing addresses and I consider the
+   ability to skip fastly gobs of time-wait and syn-recv sockets
+   as more important than logical generality.
+
+<item> 3. So, let's look at all our sockets using autobound ports:
+
+<tscreen><verb>
+   ss -a -A all autobound
+</verb></tscreen>
+
+
+<item> 4. And eventually find all the local processes connected
+   to local X servers:
+
+<tscreen><verb>
+   ss -xp dst "/tmp/.X11-unix/*"
+</verb></tscreen>
+
+   Pardon, this does not work with current kernel, patching is required.
+   But we still can look at server side:
+   
+<tscreen><verb>
+   ss -x src "/tmp/.X11-unix/*"
+</verb></tscreen>
+
+</itemize>
+
+
+<sect> Returning to ground: real manual  
+
+<p>
+<sect1> Command arguments
+
+<p> General format of arguments to <tt/ss/ is:
+
+<tscreen><verb>
+       ss [ OPTIONS ] [ STATE-FILTER ] [ ADDRESS-FILTER ]
+</verb></tscreen>
+
+<sect2><tt/OPTIONS/
+<p> <tt/OPTIONS/ is list of single letter options, using common unix
+conventions.
+
+<itemize>
+<item><tt/-h/  - show help page
+<item><tt/-?/  - the same, of course
+<item><tt/-v/, <tt/-V/  - print version of <tt/ss/ and exit
+<item><tt/-s/  - print summary statistics. This option does not parse
+socket lists obtaining summary from various sources. It is useful
+when amount of sockets is so huge that parsing <tt>/proc/net/tcp</tt>
+is painful.
+<item><tt/-D FILE/  - do not display anything, just dump raw information
+about TCP sockets to <tt/FILE/ after applying filters. If <tt/FILE/ is <tt/-/
+<tt/stdout/ is used. 
+<item><tt/-F FILE/  - read continuation of filter from <tt/FILE/.
+Each line of <tt/FILE/ is interpreted like single command line option.
+If <tt/FILE/ is <tt/-/ <tt/stdin/ is used. 
+<item><tt/-r/  - try to resolve numeric address/ports
+<item><tt/-n/  - do not try to resolve ports
+<item><tt/-o/  - show some optional information, f.e. TCP timers
+<item><tt/-i/  - show some infomration specific to TCP (RTO, congestion
+window, slow start threshould etc.)
+<item><tt/-e/  - show even more optional information
+<item><tt/-m/  - show extended information on memory used by the socket.
+It is available only with <tt/tcp_diag/ enabled.
+<item><tt/-p/  - show list of processes owning the socket
+<item><tt/-f FAMILY/ - default address family used for parsing addresses.
+                 Also this option limits listing to sockets supporting
+                 given address family. Currently the following families
+                 are supported: <tt/unix/, <tt/inet/, <tt/inet6/, <tt/link/,
+                 <tt/netlink/.
+<item><tt/-4/ - alias for <tt/-f inet/
+<item><tt/-6/ - alias for <tt/-f inet6/
+<item><tt/-0/ - alias for <tt/-f link/
+<item><tt/-A LIST-OF-TABLES/ - list of socket tables to dump, separated
+                 by commas. The following identifiers are understood:
+                 <tt/all/, <tt/inet/, <tt/tcp/, <tt/udp/, <tt/raw/,
+                 <tt/unix/, <tt/packet/, <tt/netlink/, <tt/unix_dgram/,
+                 <tt/unix_stream/, <tt/packet_raw/, <tt/packet_dgram/.
+<item><tt/-x/ - alias for <tt/-A unix/
+<item><tt/-t/ - alias for <tt/-A tcp/
+<item><tt/-u/ - alias for <tt/-A udp/
+<item><tt/-w/ - alias for <tt/-A raw/
+<item><tt/-a/ - show sockets of all the states. By default sockets
+                in states <tt/LISTEN/, <tt/TIME-WAIT/, <tt/SYN_RECV/
+                and <tt/CLOSE/ are skipped.
+<item><tt/-l/ - show only sockets in state <tt/LISTEN/ 
+</itemize>
+
+<sect2><tt/STATE-FILTER/
+
+<p><tt/STATE-FILTER/ allows to construct arbitrary set of
+states to match. Its syntax is sequence of keywords <tt/state/
+and <tt/exclude/ followed by identifier of state.
+Available identifiers are:
+
+<p>
+<itemize>
+<item> All standard TCP states: <tt/established/, <tt/syn-sent/,
+<tt/syn-recv/, <tt/fin-wait-1/, <tt/fin-wait-2/, <tt/time-wait/,
+<tt/closed/, <tt/close-wait/, <tt/last-ack/, <tt/listen/ and <tt/closing/.
+
+<item><tt/all/ - for all the states 
+<item><tt/connected/ - all the states except for <tt/listen/ and <tt/closed/ 
+<item><tt/synchronized/ - all the <tt/connected/ states except for 
+<tt/syn-sent/
+<item><tt/bucket/ - states, which are maintained as minisockets, i.e.
+<tt/time-wait/ and <tt/syn-recv/.
+<item><tt/big/ - opposite to <tt/bucket/
+</itemize>
+
+<sect2><tt/ADDRESS_FILTER/
+
+<p><tt/ADDRESS_FILTER/ is boolean expression with operations <tt/and/, <tt/or/
+and <tt/not/, which can be abbreviated in C style f.e. as <tt/&amp/,
+<tt/&amp&amp/.
+
+<p>
+Predicates check socket addresses, both local and remote.
+There are the following kinds of predicates:
+
+<itemize>
+<item> <tt/dst ADDRESS_PATTERN/ - matches remote address and port
+<item> <tt/src ADDRESS_PATTERN/ - matches local address and port
+<item> <tt/dport RELOP PORT/    - compares remote port to a number
+<item> <tt/sport RELOP PORT/    - compares local port to a number
+<item> <tt/autobound/           - checks that socket is bound to an ephemeral
+                                  port
+</itemize>
+
+<p><tt/RELOP/ is some of <tt/&lt=/, <tt/&gt=/, <tt/==/ etc.
+To make this more convinient for use in unix shell, alphabetic
+FORTRAN-like notations <tt/le/, <tt/gt/ etc. are accepted as well.
+
+<p>The format and semantics of <tt/ADDRESS_PATTERN/ depends on address
+family.
+
+<itemize>
+<item><tt/inet/ - <tt/ADDRESS_PATTERN/ consists of IP prefix, optionally
+followed by colon and port. If prefix or port part is absent or replaced
+with <tt/*/, this means wildcard match.
+<item><tt/inet6/ - The same as <tt/inet/, only prefix refers to an IPv6
+address. Unlike <tt/inet/ colon becomes ambiguous, so that <tt/ss/ allows
+to use scheme, like used in URLs, where address is suppounded with
+<tt/[/ ... <tt/]/.
+<item><tt/unix/ - <tt/ADDRESS_PATTERN/ is shell-style wildcard.
+<item><tt/packet/ - format looks like <tt/inet/, only interface index
+stays instead of port and link layer protocol id instead of address.
+<item><tt/netlink/ - format looks like <tt/inet/, only socket pid
+stays instead of port and netlink channel instead of address.
+</itemize>
+
+<p><tt/PORT/ is syntactically <tt/ADDRESS_PATTERN/ with wildcard
+address part. Certainly, it is undefined for UNIX sockets. 
+
+<sect1> Environment variables
+
+<p>
+<tt/ss/ allows to change source of information using various
+environment variables:
+
+<p>
+<itemize>
+<item> <tt/PROC_SLABINFO/  to override <tt>/proc/slabinfo</tt>
+<item> <tt/PROC_NET_TCP/  to override <tt>/proc/net/tcp</tt>
+<item> <tt/PROC_NET_UDP/  to override <tt>/proc/net/udp</tt>
+<item> etc.
+</itemize> 
+
+<p>
+Variable <tt/PROC_ROOT/ allows to change root of all the <tt>/proc/</tt>
+hierarchy.
+
+<p>
+Variable <tt/TCPDIAG_FILE/ prescribes to open a file instead of
+requesting kernel to dump information about TCP sockets.
+
+
+<p> This option is used mainly to investigate bug reports,
+when dumps of files usually found in <tt>/proc/</tt> are recevied
+by e-mail.
+
+<sect1> Output format
+
+<p>Six columns. The first is <tt/Netid/, it denotes socket type and
+transport protocol, when it is ambiguous: <tt/tcp/, <tt/udp/, <tt/raw/,
+<tt/u_str/ is abbreviation for <tt/unix_stream/, <tt/u_dgr/ for UNIX
+datagram sockets, <tt/nl/ for netlink, <tt/p_raw/ and <tt/p_dgr/ for
+raw and datagram packet sockets. This column is optional, it will
+be hidden, if filter selects an unique netid.
+
+<p>
+The second column is <tt/State/. Socket state is displayed here.
+The names are standard TCP names, except for <tt/UNCONN/, which
+cannot happen for TCP, but normal for not connected sockets
+of another types. Again, this column can be hidden.
+
+<p>
+Then two columns (<tt/Recv-Q/ and <tt/Send-Q/) showing amount of data
+queued for receive and transmit.
+
+<p>
+And the last two columns display local address and port of the socket
+and its peer address, if the socket is connected.
+
+<p>
+If options <tt/-o/, <tt/-e/ or <tt/-p/ were given, options are
+displayed not in fixed positions but separated by spaces pairs:
+<tt/option:value/. If value is not a single number, it is presented
+as list of values, enclosed to <tt/(/ ... <tt/)/ and separated with
+commas. F.e.
+
+<tscreen><verb>
+   timer:(keepalive,111min,0)
+</verb></tscreen>
+is typical format for TCP timer (option <tt/-o/).
+
+<tscreen><verb>
+   users:((X,113,3))
+</verb></tscreen>
+is typical for list of users (option <tt/-p/).
+
+
+<sect>Some numbers
+
+<p>
+Well, let us use <tt/pidentd/ and a tool <tt/ibench/ to measure
+its performance. It is 30 requests per second here. Nothing to test,
+it is too slow. OK, let us patch pidentd with patch from directory
+Patches. After this it handles about 4300 requests per second
+and becomes handy tool to pollute socket tables with lots of timewait
+buckets.
+
+<p>
+So, each test starts from pollution tables with 30000 sockets
+and then doing full dump of the table piped to wc and measuring
+timings with time:
+
+<p>Results:
+
+<itemize>
+<item> <tt/netstat -at/ - 15.6 seconds
+<item> <tt/ss -atr/, but without <tt/tcp_diag/     - 5.4 seconds
+<item> <tt/ss -atr/ with <tt/tcp_diag/     - 0.47 seconds
+</itemize>
+
+No comments. Though one comment is necessary, most of time
+without <tt/tcp_diag/ is wasted inside kernel with completely
+blocked networking. More than 10 seconds, yes. <tt/tcp_diag/
+does the same work for 100 milliseconds of system time.
+
+</article>
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..110061a8c08a3d5b22c2de4c9a19bf53aac9bbee 100644 (file)
@@ -0,0 +1,13 @@
+0x10   lowdelay
+0x08   throughput
+0x04   reliability
+# This value overlap with ECT, do not use it!
+0x02   mincost
+# These values seems do not want to die, Cisco likes them by a strange reason.
+0x20   priority
+0x40   immediate
+0x60   flash
+0x80   flash-override
+0xa0   critical
+0xc0   internet
+0xe0   network
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8c985d795c7e06c83836776015b6f00057763373 100644 (file)
@@ -0,0 +1,25 @@
+#
+# Reserved protocols.
+#
+0      unspec
+1      redirect
+2      kernel
+3      boot
+4      static
+8      gated
+9      ra
+10     mrt
+11     zebra
+12     bird
+#
+#      Used by me for gated
+#
+254    gated/aggr
+253    gated/bgp
+252    gated/ospf
+251    gated/ospfase
+250    gated/rip
+249    gated/static
+248    gated/conn
+247    gated/inet
+246    gated/default
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..eedd76d23ffdbfff1d87a6516f3039e0829d4b1b 100644 (file)
@@ -0,0 +1,13 @@
+#
+# reserved values
+#
+0      cosmos
+#
+# local
+#
+#1     inr.ac
+#2     inr.ruhep
+#3     freenet
+#4     radio-msu
+#5     russia
+#6     internet
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8514bc11a0c52af1c5e5f929ce662f5589bb88a0 100644 (file)
@@ -0,0 +1,11 @@
+#
+# reserved values
+#
+0      global
+255    nowhere
+254    host
+253    link
+#
+# pseudo-reserved
+#
+200    site
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..541abfd27256d9160e75ae71138fd2716abb3fd2 100644 (file)
@@ -0,0 +1,11 @@
+#
+# reserved values
+#
+255    local
+254    main
+253    default
+0      unspec
+#
+# local
+#
+#1     inr.ruhep
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8766b679ce36d1868c19d54963181c5c7fa30acf 100644 (file)
@@ -0,0 +1,49 @@
+#! /bin/sh -x
+#
+# sample script on using the ingress capabilities
+# this script shows how one can rate limit incoming SYNs
+# Useful for TCP-SYN attack protection. You can use
+# IPchains to have more powerful additions to the SYN (eg 
+# in addition the subnet)
+#
+#path to various utilities;
+#change to reflect yours.
+#
+IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
+TC=$IPROUTE/tc/tc
+IP=$IPROUTE/ip/ip
+IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
+INDEV=eth2
+#
+# tag all incoming SYN packets through $INDEV as mark value 1
+############################################################ 
+$IPCHAINS -A input -i $INDEV -y -m 1
+############################################################ 
+#
+# install the ingress qdisc on the ingress interface
+############################################################ 
+$TC qdisc add dev $INDEV handle ffff: ingress
+############################################################ 
+
+#
+# 
+# SYN packets are 40 bytes (320 bits) so three SYNs equals
+# 960 bits (approximately 1kbit); so we rate limit below
+# the incoming SYNs to 3/sec (not very sueful really; but
+#serves to show the point - JHS
+############################################################ 
+$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 1 fw \
+police rate 1kbit burst 40 mtu 9k drop flowid :1
+############################################################ 
+
+
+#
+echo "---- qdisc parameters Ingress  ----------"
+$TC qdisc ls dev $INDEV
+echo "---- Class parameters Ingress  ----------"
+$TC class ls dev $INDEV
+echo "---- filter parameters Ingress ----------"
+$TC filter ls dev $INDEV parent ffff:
+
+#deleting the ingress qdisc
+#$TC qdisc del $INDEV ingress
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..226ec1c54072541797b7caf33de435d4c1333207 100644 (file)
@@ -0,0 +1,76 @@
+#! /bin/sh
+
+TC=/home/root/tc
+IP=/home/root/ip
+DEVICE=eth1
+BANDWIDTH="bandwidth 10Mbit"
+
+# Attach CBQ on $DEVICE. It will have handle 1:.
+#   $BANDWIDTH is real $DEVICE bandwidth (10Mbit).
+#   avpkt is average packet size.
+#   mpu is minimal packet size.
+
+$TC qdisc add dev $DEVICE  root  handle 1:  cbq \
+$BANDWIDTH avpkt 1000 mpu 64
+
+# Create root class with classid 1:1. This step is not necessary.
+#   bandwidth is the same as on CBQ itself.
+#   rate == all the bandwidth
+#   allot is MTU + MAC header
+#   maxburst measure allowed class burstiness (please,read S.Floyd and VJ papers)
+#   est 1sec 8sec means, that kernel will evaluate average rate
+#                 on this class with period 1sec and time constant 8sec.
+#                 This rate is viewed with "tc -s class ls dev $DEVICE"
+
+$TC class add dev $DEVICE parent 1:0 classid :1 est 1sec 8sec cbq \
+$BANDWIDTH rate 10Mbit allot 1514 maxburst 50 avpkt 1000
+
+# Bulk.
+#    New parameters are: 
+#    weight, which is set to be proportional to
+#            "rate". It is not necessary, weight=1 will work as well.
+#    defmap and split say that best effort ttraffic, not classfied
+#            by another means will fall to this class.
+
+$TC class add dev $DEVICE parent 1:1 classid :2 est 1sec 8sec cbq \
+$BANDWIDTH rate 4Mbit allot 1514 weight 500Kbit \
+prio 6 maxburst 50 avpkt 1000 split 1:0 defmap ff3d
+
+# OPTIONAL.
+# Attach "sfq" qdisc to this class, quantum is MTU, perturb
+# gives period of hash function perturbation in seconds.
+#
+$TC qdisc add dev $DEVICE parent 1:2 sfq quantum 1514b perturb 15
+
+# Interactive-burst class
+
+$TC class add dev $DEVICE parent 1:1 classid :3 est 2sec 16sec cbq \
+$BANDWIDTH rate 1Mbit allot 1514 weight 100Kbit \
+prio 2 maxburst 100 avpkt 1000 split 1:0 defmap c0
+
+$TC qdisc add dev $DEVICE parent 1:3 sfq quantum 1514b perturb 15
+
+# Background.
+
+$TC class add dev $DEVICE parent 1:1 classid :4 est 1sec 8sec cbq \
+  $BANDWIDTH rate 100Kbit allot 1514 weight 10Mbit \
+  prio 7 maxburst 10 avpkt 1000 split 1:0 defmap 2
+
+$TC qdisc add dev $DEVICE parent 1:4 sfq quantum 1514b perturb 15
+
+# Realtime class for RSVP
+
+$TC class add dev $DEVICE parent 1:1 classid 1:7FFE cbq \
+rate 5Mbit $BANDWIDTH allot 1514b avpkt 1000 \
+maxburst 20
+
+# Reclassified realtime traffic
+#
+# New element: split is not 1:0, but 1:7FFE. It means,
+#     that only real-time packets, which violated policing filters
+#     or exceeded reshaping buffers will fall to it.
+
+$TC class add dev $DEVICE parent 1:7FFE classid 1:7FFF  est 4sec 32sec cbq \
+rate 1Mbit $BANDWIDTH allot 1514b avpkt 1000 weight 10Kbit \
+prio 6 maxburst 10 split 1:7FFE defmap ffff
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..7207b57d2f46944556354748b673d9a50c7eb1de 100644 (file)
@@ -0,0 +1,446 @@
+#!/bin/bash
+#
+# dhclient-script for Linux.
+#
+#              This program is free software; you can redistribute it and/or
+#              modify it under the terms of the GNU General Public License
+#              as published by the Free Software Foundation; either version
+#              2 of the License, or (at your option) any later version.
+#
+# Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+#
+# Probably, I did not understand, what this funny feature as "alias"
+# means exactly. For now I suppose, that it is a static address, which
+# we should install and preserve.
+#
+
+exec >> /tmp/DHS.log 2>&1
+
+echo dhc-script $* reason=$reason
+set | grep "^\(old_\|new_\|check_\)"
+
+LOG () {
+    echo LOG $* ;
+}
+
+# convert 8bit mask to length
+# arg: $1 = mask
+#
+Mask8ToLen() {
+       local l=0;
+
+       while [ $l -le 7 ]; do
+               if [ $[ ( 1 << $l ) + $1 ] -eq 256 ]; then
+                       return  $[ 8 - $l ]
+               fi
+               l=$[ $l + 1 ]
+       done
+       return 0;
+}
+
+# convert inet dotted quad mask to length
+# arg: $1 = dotquad mask
+#
+MaskToLen() {
+ local masklen=0
+ local mask8=$1
+
+ case $1 in
+ 0.0.0.0)
+       return 0;
+       ;;
+ 255.*.0.0)
+       masklen=8
+       mask8=${mask8#255.}
+       mask8=${mask8%.0.0}
+       ;;
+ 255.255.*.0)
+       masklen=16
+       mask8=${mask8#255.255.}
+       mask8=${mask8%.0}
+       ;;
+ 255.255.255.*)
+       masklen=24
+       mask8=${mask8#255.255.255.}
+       ;;
+ *)
+       return 255
+       ;;
+ esac
+ Mask8ToLen $mask8
+ return $[ $? + $masklen ]
+}
+
+# calculate ABC "natural" mask
+# arg: $1 = dotquad address
+#
+ABCMask () {
+ local class;
+
+ class=${1%%.*}
+
+ if [ "$1" = "255.255.255.255" ]; then
+    echo $1
+ elif [ "$1" = "0.0.0.0" ]; then
+    echo $1
+ elif [ $class -ge 224 ]; then
+    echo 240.0.0.0
+ elif [ $class -ge 192 ]; then
+    echo 255.255.255.0
+ elif [ $class -ge 128 ]; then
+    echo 255.255.0.0
+ else
+    echo 255.0.0.0
+ fi
+}
+
+# calculate ABC "natural" mask length
+# arg: $1 = dotquad address
+#
+ABCMaskLen () {
+ local class;
+
+ class=${1%%.*}
+
+ if [ "$1" = "255.255.255.255" ]; then
+    return 32
+ elif [ "$1" = "0.0.0.0" ]; then
+    return 0
+ elif [ $class -ge 224 ]; then
+    return 4;
+ elif [ $class -ge 192 ]; then
+    return 24;
+ elif [ $class -ge 128 ]; then
+    return 16;
+ else
+    return 8;
+ fi
+}
+
+# Delete IP address
+# args: $1 = interface
+#       $2 = address
+#       $3 = mask
+#       $4 = broadcast
+#       $5 = label
+#
+DelINETAddr () {
+  local masklen=32
+  local addrid=$1
+
+  LOG DelINETAddr $*
+
+  if [ "$5" ]; then
+    addrid=$addrid:$5
+  fi
+  LOG ifconfig $addrid down
+  ifconfig $addrid down
+}
+
+# Add IP address
+# args: $1 = interface
+#       $2 = address
+#       $3 = mask
+#       $4 = broadcast
+#       $5 = label
+#
+AddINETAddr () {
+  local mask_arg
+  local brd_arg
+  local addrid=$1
+
+  LOG AddINETAddr $*
+
+  if [ "$5" ]; then
+    addrid=$addrid:$5
+  fi
+  if [ "$3" ]; then
+    mask_arg="netmask $3"
+  fi
+  if [ "$4" ]; then
+    brd_arg="broadcast $4"
+  fi
+
+  LOG ifconfig $addrid $2 $mask_arg $brd_arg up
+  ifconfig $addrid $2 $mask_arg $brd_arg up
+}
+
+# Add default routes
+# args: $1 = routers list
+#
+AddDefaultRoutes() {
+    local router
+
+    if [ "$1" ]; then
+      LOG AddDefaultRoutes $*
+      for router in $1; do
+        LOG route add default gw $router
+        route add default gw $router
+      done ;
+    fi
+}
+
+# Delete default routes
+# args: $1 = routers list
+#
+DelDefaultRoutes() {
+    local router
+
+    if [ "$1" ]; then
+      LOG DelDefaultRoutes $*
+
+      for router in $1; do
+        LOG route del default gw $router
+        route del default gw $router
+      done
+    fi
+}
+
+# ping a host
+# args: $1 = dotquad address of the host
+#
+PingNode() {
+    LOG PingNode $*
+    if ping -q -c 1 -w 2 $1 ; then
+       return 0;
+    fi
+    return 1;
+}
+
+# Check (and add route, if alive) default routers
+# args: $1 = routers list
+# returns: 0 if at least one router is alive.
+#
+CheckRouterList() {
+    local router
+    local succeed=1
+
+    LOG CheckRouterList $*
+
+    for router in $1; do
+      if PingNode $router ; then
+       succeed=0
+        route add default gw $router
+      fi
+    done
+    return $succeed
+}
+
+# Delete/create static routes.
+# args: $1 = operation (del/add)
+#       $2 = routes list in format "dst1 nexthop1 dst2 ..."
+#
+# BEWARE: this feature of DHCP is obsolete, because does not
+#         support subnetting.
+#
+X-StaticRouteList() {
+    local op=$1
+    local lst="$2"
+    local masklen
+
+    LOG X-StaticRouteList $*
+
+    if [ "$lst" ]; then
+      set $lst
+      while [ $# -gt 1 ]; do
+       route $op -net $1 netmask `ABCMask "$1"` gw $2
+       shift; shift;
+      done
+   fi
+}
+
+# Create static routes.
+# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
+#
+AddStaticRouteList() {
+    LOG AddStaticRouteList $*
+    X-StaticRouteList add "$1"
+}
+
+# Delete static routes.
+# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
+#
+DelStaticRouteList() {
+    LOG DelStaticRouteList $*
+    X-StaticRouteList del "$1"
+}
+
+# Broadcast unsolicited ARP to update neighbours' caches.
+# args: $1 = interface
+#       $2 = address
+#
+UnsolicitedARP() {
+    if [ -f /sbin/arping ]; then
+       /sbin/arping -A -c 1 -I "$1" "$2" &
+       (sleep 2 ; /sbin/arping -U -c 1 -I "$1" "$2" ) &
+    fi
+}
+
+# Duplicate address detection.
+# args: $1 = interface
+#       $2 = test address
+# returns: 0, if DAD succeeded.
+DAD() {
+  if [ -f /sbin/arping ]; then
+       /sbin/arping -c 2 -w 3 -D -I "$1" "$2"
+       return $?
+  fi
+  return 0
+}
+
+
+# Setup resolver.
+# args: NO
+#       domain and nameserver list are passed in global variables.
+#
+# NOTE: we try to be careful and not to break user supplied resolv.conf.
+#       The script mangles it, only if it has dhcp magic signature.
+#
+UpdateDNS() {
+    local nameserver
+    local idstring="#### Generated by DHCPCD"
+
+    LOG UpdateDNS $*
+
+    if [ "$new_domain_name" = "" -a "$new_domain_name_servers" = "" ]; then
+       return 0;
+    fi
+
+    echo $idstring > /etc/resolv.conf.dhcp
+    if [ "$new_domain_name" ]; then
+       echo search $new_domain_name >> /etc/resolv.conf.dhcp
+    fi
+    echo options ndots:1 >> /etc/resolv.conf.dhcp
+
+    if [ "$new_domain_name_servers" ]; then
+       for nameserver in $new_domain_name_servers; do
+           echo nameserver $nameserver >> /etc/resolv.conf.dhcp
+       done
+    else
+       echo nameserver 127.0.0.1 >> /etc/resolv.conf.dhcp
+    fi
+
+    if [ -f /etc/resolv.conf ]; then
+       if [ "`head -1 /etc/resolv.conf`" != "$idstring" ]; then
+           return 0
+       fi
+       if [ "$old_domain_name" = "$new_domain_name" -a
+            "$new_domain_name_servers" = "$old_domain_name_servers" ]; then
+            return 0
+       fi
+    fi
+    mv /etc/resolv.conf.dhcp /etc/resolv.conf
+}
+
+case $reason in
+NBI)
+  exit 1
+  ;;
+
+MEDIUM)
+  exit 0
+  ;;
+
+PREINIT)
+  ifconfig $interface:dhcp down
+  ifconfig $interface:dhcp1 down
+  if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
+    ifconfig $interface:dhcp 10.10.10.10 netmask 255.255.255.255
+    ifconfig $interface:dhcp down
+    if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
+       LOG The interface $interface already configured.
+    fi
+  fi
+  ifconfig $interface:dhcp up
+  exit 0
+  ;;
+
+ARPSEND)
+  exit 0
+  ;;
+
+ARPCHECK)
+  if DAD "$interface" "$check_ip_address" ; then
+    exit 0
+  fi
+  exit 1
+  ;;
+
+BOUND|RENEW|REBIND|REBOOT)
+  if [ "$old_ip_address" -a "$alias_ip_address" -a \
+       "$alias_ip_address" != "$old_ip_address" ]; then
+    DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
+  fi
+  if [ "$old_ip_address" -a "$old_ip_address" != "$new_ip_address" ]; then
+    DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
+    DelDefaultRoutes "$old_routers"
+    DelStaticRouteList "$old_static_routes"
+  fi
+  if [ "$old_ip_address" = "" -o "$old_ip_address" != "$new_ip_address" -o \
+       "$reason" = "BOUND" -o "$reason" = "REBOOT" ]; then
+    AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
+    AddStaticRouteList "$new_static_routes"
+    AddDefaultRoutes "$new_routers"
+    UnsolicitedARP "$interface" "$new_ip_address"
+  fi
+  if [ "$new_ip_address" != "$alias_ip_address" -a "$alias_ip_address" ]; then
+    AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
+  fi
+  UpdateDNS
+  exit 0
+  ;;
+
+EXPIRE|FAIL)
+  if [ "$alias_ip_address" ]; then
+    DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
+  fi
+  if [ "$old_ip_address" ]; then
+    DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
+    DelDefaultRoutes "$old_routers"
+    DelStaticRouteList "$old_static_routes"
+  fi
+  if [ "$alias_ip_address" ]; then
+    AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
+  fi
+  exit 0
+  ;;
+
+TIMEOUT)
+  if [ "$alias_ip_address" ]; then
+    DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
+  fi
+# Seems, <null address> means, that no more old leases found.
+# Or does it mean bug in dhcpcd? 8) Fail for now.
+  if [ "$new_ip_address" = "<null address>" ]; then
+    if [ "$old_ip_address" ]; then
+       DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
+    fi
+    if [ "$alias_ip_address" ]; then
+        AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
+    fi
+    exit 1
+  fi
+  if DAD "$interface" "$new_ip_address" ; then
+    AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
+    UnsolicitedARP "$interface" "$new_ip_address"
+    if [ "$alias_ip_address" -a "$alias_ip_address" != "$new_ip_address" ]; then
+      AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
+      UnsolicitedARP "$interface" "$alias_ip_address"
+    fi
+    if CheckRouterList "$new_routers" ; then
+       AddStaticRouteList "$new_static_routes"
+       UpdateDNS
+       exit 0
+    fi
+  fi
+  DelINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
+  DelDefaultRoutes "$old_routers"
+  DelStaticRouteList "$old_static_routes"
+  if [ "$alias_ip_address" ]; then
+    AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
+  fi
+  exit 1
+  ;;
+esac
+
+exit 0
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4ddffdd195f15ba6ebe535a05da6a20fa8961bb0 100644 (file)
@@ -0,0 +1,68 @@
+#! /bin/sh -x
+#
+# sample script on using the ingress capabilities
+# This script just tags on the ingress interfac using Ipchains
+# the result is used for fast classification and re-marking
+# on the egress interface
+#
+#path to various utilities;
+#change to reflect yours.
+#
+IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
+TC=$IPROUTE/tc/tc
+IP=$IPROUTE/ip/ip
+IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
+INDEV=eth2
+EGDEV="dev eth1"
+#
+# tag all incoming packets from host 10.2.0.24 to value 1
+# tag all incoming packets from host 10.2.0.3 to value 2
+# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
+#These values are used in the egress
+#
+############################################################ 
+$IPCHAINS -A input -s 10.2.0.4/24 -m 3
+$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
+$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
+
+######################## Egress side ########################
+
+
+# attach a dsmarker
+#
+$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64 set_tc_index
+#
+# values of the DSCP to change depending on the class
+#
+#becomes EF
+$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
+       value 0xb8
+#becomes AF11
+$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
+       value 0x28
+#becomes AF21
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x48
+#
+#
+# The class mapping
+#
+$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
+$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
+$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
+#
+
+#
+echo "---- qdisc parameters Ingress  ----------"
+$TC qdisc ls dev $INDEV
+echo "---- Class parameters Ingress  ----------"
+$TC class ls dev $INDEV
+echo "---- filter parameters Ingress ----------"
+$TC filter ls dev $INDEV parent 1:0
+
+echo "---- qdisc parameters Egress  ----------"
+$TC qdisc ls $EGDEV
+echo "---- Class parameters Egress  ----------"
+$TC class ls $EGDEV
+echo "---- filter parameters Egress ----------"
+$TC filter ls $EGDEV parent 1:0
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2f78da24e1e77aa4af5c5015c7fd864dc12082b4 100644 (file)
@@ -0,0 +1,87 @@
+#! /bin/sh -x
+#
+# sample script on using the ingress capabilities
+# This script tags the fwmark on the ingress interface using IPchains
+# the result is used first for policing on the Ingress interface then
+# for fast classification and re-marking
+# on the egress interface
+#
+#path to various utilities;
+#change to reflect yours.
+#
+IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
+TC=$IPROUTE/tc/tc
+IP=$IPROUTE/ip/ip
+IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
+INDEV=eth2
+EGDEV="dev eth1"
+#
+# tag all incoming packets from host 10.2.0.24 to value 1
+# tag all incoming packets from host 10.2.0.3 to value 2
+# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
+#These values are used in the egress
+############################################################ 
+$IPCHAINS -A input -s 10.2.0.0/24 -m 3
+$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
+$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
+############################################################ 
+#
+# install the ingress qdisc on the ingress interface
+############################################################ 
+$TC qdisc add dev $INDEV handle ffff: ingress
+############################################################ 
+
+#
+# attach a fw classifier to the ingress which polices anything marked
+# by ipchains to tag value 3 (The rest of the subnet packets -- not
+# tag 1 or 2) to not go beyond 1.5Mbps
+# Allow up to at least 60 packets to burst (assuming maximum packet 
+# size of # 1.5 KB) in the long run and upto about 6 packets in the
+# shot run
+
+############################################################ 
+$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 3 fw \
+police rate 1500kbit burst 90k mtu 9k drop flowid :1
+############################################################ 
+
+######################## Egress side ########################
+
+
+# attach a dsmarker
+#
+$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
+#
+# values of the DSCP to change depending on the class
+#
+$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
+       value 0xb8
+$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
+       value 0x28
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x48
+#
+#
+# The class mapping
+#
+$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
+$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
+$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
+#
+
+#
+echo "---- qdisc parameters Ingress  ----------"
+$TC qdisc ls dev $INDEV
+echo "---- Class parameters Ingress  ----------"
+$TC class ls dev $INDEV
+echo "---- filter parameters Ingress ----------"
+$TC filter ls dev $INDEV parent ffff:
+
+echo "---- qdisc parameters Egress  ----------"
+$TC qdisc ls $EGDEV
+echo "---- Class parameters Egress  ----------"
+$TC class ls $EGDEV
+echo "---- filter parameters Egress ----------"
+$TC filter ls $EGDEV parent 1:0
+#
+#deleting the ingress qdisc
+#$TC qdisc del $DEV ingress
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..25e6c0b1039993029e8e2e6f1ea8d5c2d9596e17 100644 (file)
@@ -0,0 +1,170 @@
+#! /bin/sh -x
+#
+# sample script on using the ingress capabilities using u32 classifier
+# This script tags tcindex based on metering on the ingress 
+# interface the result is used for fast classification and re-marking
+# on the egress interface
+# This is an example of a color aware mode marker with PIR configured
+# based on draft-wahjak-mcm-00.txt (section 3.1)
+#
+# The colors are defined using the Diffserv Fields
+#path to various utilities;
+#change to reflect yours.
+#
+IPROUTE=/usr/src/iproute2-current
+TC=$IPROUTE/tc/tc
+IP=$IPROUTE/ip/ip
+INDEV=eth0
+EGDEV="dev eth1"
+CIR1=1500kbit
+CIR2=1000kbit
+
+#The CBS is about 60 MTU sized packets
+CBS1=90k
+CBS2=90k
+
+############################################################ 
+#
+# install the ingress qdisc on the ingress interface
+$TC qdisc add dev $INDEV handle ffff: ingress
+############################################################ 
+#
+# Create u32 filters 
+$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1: u32 \
+divisor 1
+############################################################ 
+
+# The meters: Note that we have shared meters in this case as identified
+# by the index parameter
+meter1=" police index 1 rate $CIR1 burst $CBS1 "
+meter2=" police index 2 rate $CIR2 burst $CBS1 "
+meter3=" police index 3 rate $CIR2 burst $CBS2 "
+meter4=" police index 4 rate $CIR1 burst $CBS2 "
+meter5=" police index 5 rate $CIR1 burst $CBS2 "
+
+# All packets are marked with a tcindex value which is used on the egress
+# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
+
+# *********************** AF41 *************************** 
+#AF41 (DSCP 0x22) is passed on with a tcindex value 1
+#if it doesnt exceed its CIR/CBS 
+#policer 1  is used.
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
+match ip tos 0x88 0xfc \
+$meter1 \
+continue flowid :1
+#
+# if it exceeds the above but not the extra rate/burst below, it gets a 
+# tcindex value  of 2
+# policer 2 is used
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
+match ip tos 0x88 0xfc \
+$meter2 \
+continue flowid :2
+#
+# if it exceeds the above but not the rule below, it gets a tcindex value
+# of 3 (policer 3)
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
+match ip tos 0x88 0xfc \
+$meter3 \
+drop flowid :3
+#
+
+# *********************** AF42 *************************** 
+#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
+#if it doesnt exceed its CIR/CBS 
+#policer 2 is used. Note that this is shared with the AF41
+#
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
+match ip tos 0x90 0xfc \
+$meter2 \
+continue flowid :2
+#
+# if it exceeds the above but not the rule below, it gets a tcindex value
+# of 3 (policer 3)
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
+match ip tos 0x90 0xfc \
+$meter3 \
+drop flowid :3
+#
+# *********************** AF43 *************************** 
+#
+#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
+#if it doesnt exceed its CIR/CBS
+#policer 3 is used. Note that this is shared with the AF41 and AF42
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
+match ip tos 0x98 0xfc \
+$meter3 \
+drop flowid :3
+#
+# *********************** BE *************************** 
+#
+# Anything else (not from the AF4*) gets discarded if it 
+# exceeds 1Mbps and by default goes to BE if it doesnt
+# Note that the BE class is also used by the AF4* in the worst
+# case
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
+match ip src 0/0\
+$meter4 \
+drop flowid :4
+
+######################## Egress side ########################
+
+# attach a dsmarker
+#
+$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
+#
+# values of the DSCP to change depending on the class
+#note that the ECN bits are masked out
+#
+#AF41 (0x88 is 0x22 shifted to the right by two bits)
+#
+$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
+       value 0x88
+#AF42
+$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
+       value 0x90
+#AF43
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x98
+#BE
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x0
+#
+#
+# The class mapping
+#
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 1 tcindex classid 1:1
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 2 tcindex  classid 1:2
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 3 tcindex  classid 1:3
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 4 tcindex  classid 1:4
+#
+
+#
+echo "---- qdisc parameters Ingress  ----------"
+$TC qdisc ls dev $INDEV
+echo "---- Class parameters Ingress  ----------"
+$TC class ls dev $INDEV
+echo "---- filter parameters Ingress ----------"
+$TC filter ls dev $INDEV parent ffff:
+
+echo "---- qdisc parameters Egress  ----------"
+$TC qdisc ls $EGDEV
+echo "---- Class parameters Egress  ----------"
+$TC class ls $EGDEV
+echo "---- filter parameters Egress ----------"
+$TC filter ls $EGDEV parent 1:0
+#
+#deleting the ingress qdisc
+#$TC qdisc del $INDEV ingress
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d7faae987ecec1b22006df77971d6b2a78a1591a 100644 (file)
@@ -0,0 +1,132 @@
+#! /bin/sh -x
+#
+# sample script on using the ingress capabilities
+# This script fwmark tags(IPchains) based on metering on the ingress 
+# interface the result is used for fast classification and re-marking
+# on the egress interface
+# This is an example of a color blind mode marker with no PIR configured
+# based on draft-wahjak-mcm-00.txt (section 3.1)
+#
+#path to various utilities;
+#change to reflect yours.
+#
+IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
+TC=$IPROUTE/tc/tc
+IP=$IPROUTE/ip/ip
+IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
+INDEV=eth2
+EGDEV="dev eth1"
+CIR1=1500kbit
+CIR2=1000kbit
+
+#The CBS is about 60 MTU sized packets
+CBS1=90k
+CBS2=90k
+
+meter1="police rate $CIR1 burst $CBS1 "
+meter2="police rate $CIR1 burst $CBS2 "
+meter3="police rate $CIR2 burst $CBS1 "
+meter4="police rate $CIR2 burst $CBS2 "
+meter5="police rate $CIR2 burst $CBS2 "
+#
+# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
+# tag all incoming packets from any other subnet to fw tag 2
+############################################################ 
+$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
+$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
+#
+############################################################ 
+# install the ingress qdisc on the ingress interface
+$TC qdisc add dev $INDEV handle ffff: ingress
+#
+############################################################ 
+
+# All packets are marked with a tcindex value which is used on the egress
+# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
+#
+############################################################ 
+# 
+# anything with fw tag of 1 is passed on with a tcindex value 1
+#if it doesnt exceed its allocated rate (CIR/CBS)
+# 
+$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
+$meter1 \
+continue flowid 4:1
+#
+# if it exceeds the above but not the extra rate/burst below, it gets a 
+#tcindex value  of 2
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
+$meter2 \
+continue flowid 4:2
+#
+# if it exceeds the above but not the rule below, it gets a tcindex value
+# of 3
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
+$meter3 \
+drop flowid 4:3
+#
+# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
+# exceeds 1Mbps and by default goes to BE if it doesnt
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 2 fw \
+$meter5 \
+drop flowid 4:4
+
+
+######################## Egress side ########################
+
+
+# attach a dsmarker
+#
+$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
+#
+# values of the DSCP to change depending on the class
+#note that the ECN bits are masked out
+#
+#AF41 (0x88 is 0x22 shifted to the right by two bits)
+#
+$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
+       value 0x88
+#AF42
+$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
+       value 0x90
+#AF43
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x98
+#BE
+$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
+       value 0x0
+#
+#
+# The class mapping (using tcindex; could easily have
+# replaced it with the fw classifier instead)
+#
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 1 tcindex classid 1:1
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 2 tcindex  classid 1:2
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 3 tcindex  classid 1:3
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 4 tcindex  classid 1:4
+#
+
+#
+echo "---- qdisc parameters Ingress  ----------"
+$TC qdisc ls dev $INDEV
+echo "---- Class parameters Ingress  ----------"
+$TC class ls dev $INDEV
+echo "---- filter parameters Ingress ----------"
+$TC filter ls dev $INDEV parent ffff:
+
+echo "---- qdisc parameters Egress  ----------"
+$TC qdisc ls $EGDEV
+echo "---- Class parameters Egress  ----------"
+$TC class ls $EGDEV
+echo "---- filter parameters Egress ----------"
+$TC filter ls $EGDEV parent 1:0
+#
+#deleting the ingress qdisc
+#$TC qdisc del $INDEV ingress
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..edf21e4308fa984ee8a837902154c587844671eb 100644 (file)
@@ -0,0 +1,198 @@
+#! /bin/sh -x
+#
+# sample script on using the ingress capabilities using u32 classifier
+# This script tags tcindex based on metering on the ingress 
+# interface the result is used for fast classification and re-marking
+# on the egress interface
+# This is an example of a color aware mode marker with PIR configured
+# based on draft-wahjak-mcm-00.txt (section 3.2)
+#
+# The colors are defined using the Diffserv Fields
+#path to various utilities;
+#change to reflect yours.
+#
+IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
+TC=$IPROUTE/tc/tc
+IP=$IPROUTE/ip/ip
+IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
+INDEV=eth2
+EGDEV="dev eth1"
+CIR1=1000kbit
+CIR2=500kbit
+# the PIR is what is in excess of the CIR
+PIR1=1000kbit
+PIR2=500kbit
+
+#The CBS is about 60 MTU sized packets
+CBS1=90k
+CBS2=90k
+#the EBS is about 20 max sized packets
+EBS1=30k
+EBS2=30k
+
+# The meters: Note that we have shared meters in this case as identified
+# by the index parameter
+meter1=" police index 1 rate $CIR1 burst $CBS1 "
+meter1a=" police index 2 rate $PIR1 burst $EBS1 "
+meter2=" police index 3 rate $CIR2 burst $CBS1 "
+meter2a=" police index 4 rate $PIR2 burst $EBS1 "
+meter3=" police index 5 rate $CIR2 burst $CBS2 "
+meter3a=" police index 6 rate $PIR2 burst $EBS2 "
+meter4=" police index 7 rate $CIR1 burst $CBS2 "
+
+############################################################ 
+#
+# install the ingress qdisc on the ingress interface
+$TC qdisc add dev $INDEV handle ffff: ingress
+############################################################ 
+#
+# All packets are marked with a tcindex value which is used on the egress
+# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
+#
+# *********************** AF41 *************************** 
+#AF41 (DSCP 0x22) from is passed on with a tcindex value 1
+#if it doesnt exceed its CIR/CBS + PIR/EBS
+#policer 1  is used.
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
+match ip tos 0x88 0xfc \
+$meter1 \
+continue flowid :1
+$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
+match ip tos 0x88 0xfc \
+$meter1a \
+continue flowid :1
+#
+# if it exceeds the above but not the extra rate/burst below, it gets a 
+# tcindex value  of 2
+# policer 2 is used
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
+match ip tos 0x88 0xfc \
+$meter2 \
+continue flowid :2
+$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
+match ip tos 0x88 0xfc \
+$meter2a \
+continue flowid :2
+#
+# if it exceeds the above but not the rule below, it gets a tcindex value
+# of 3 (policer 3)
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
+match ip tos 0x88 0xfc \
+$meter3 \
+continue flowid :3
+$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
+match ip tos 0x88 0xfc \
+$meter3a \
+drop flowid :3
+#
+# *********************** AF42 *************************** 
+#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
+#if it doesnt exceed its CIR/CBS + PIR/EBS
+#policer 2 is used. Note that this is shared with the AF41
+#
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 8 u32 \
+match ip tos 0x90 0xfc \
+$meter2 \
+continue flowid :2
+$TC filter add dev $INDEV parent ffff: protocol ip prio 9 u32 \
+match ip tos 0x90 0xfc \
+$meter2a \
+continue flowid :2
+#
+# if it exceeds the above but not the rule below, it gets a tcindex value
+# of 3 (policer 3)
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 10 u32 \
+match ip tos 0x90 0xfc \
+$meter3 \
+continue flowid :3
+$TC filter add dev $INDEV parent ffff: protocol ip prio 11 u32 \
+match ip tos 0x90 0xfc \
+$meter3a \
+drop flowid :3
+
+#
+# *********************** AF43 *************************** 
+#
+#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
+#if it doesnt exceed its CIR/CBS + PIR/EBS
+#policer 3 is used. Note that this is shared with the AF41 and AF42
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 13 u32 \
+match ip tos 0x98 0xfc \
+$meter3 \
+continue flowid :3
+$TC filter add dev $INDEV parent ffff: protocol ip prio 14 u32 \
+match ip tos 0x98 0xfc \
+$meter3a \
+drop flowid :3
+#
+## *********************** BE *************************** 
+##
+## Anything else (not from the AF4*) gets discarded if it 
+## exceeds 1Mbps and by default goes to BE if it doesnt
+## Note that the BE class is also used by the AF4* in the worst
+## case
+##
+$TC filter add dev $INDEV parent ffff: protocol ip prio 16 u32 \
+match ip src 0/0\
+$meter4 \
+drop flowid :4
+
+######################## Egress side ########################
+
+# attach a dsmarker
+#
+$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
+#
+# values of the DSCP to change depending on the class
+#note that the ECN bits are masked out
+#
+#AF41 (0x88 is 0x22 shifted to the right by two bits)
+#
+$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
+       value 0x88
+#AF42
+$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
+       value 0x90
+#AF43
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x98
+#BE
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x0
+#
+#
+# The class mapping
+#
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 1 tcindex classid 1:1
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 2 tcindex  classid 1:2
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 3 tcindex  classid 1:3
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 4 tcindex  classid 1:4
+#
+
+#
+echo "---- qdisc parameters Ingress  ----------"
+$TC qdisc ls dev $INDEV
+echo "---- Class parameters Ingress  ----------"
+$TC class ls dev $INDEV
+echo "---- filter parameters Ingress ----------"
+$TC filter ls dev $INDEV parent ffff:
+
+echo "---- qdisc parameters Egress  ----------"
+$TC qdisc ls $EGDEV
+echo "---- Class parameters Egress  ----------"
+$TC class ls $EGDEV
+echo "---- filter parameters Egress ----------"
+$TC filter ls $EGDEV parent 1:0
+#
+#deleting the ingress qdisc
+#$TC qdisc del $INDEV ingress
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..804fad196e652d7a4805d09f8247892c5b40242e 100644 (file)
@@ -0,0 +1,144 @@
+#! /bin/sh -x
+#
+# sample script on using the ingress capabilities
+# This script fwmark tags(IPchains) based on metering on the ingress 
+# interface the result is used for fast classification and re-marking
+# on the egress interface
+# This is an example of a color blind mode marker with no PIR configured
+# based on draft-wahjak-mcm-00.txt (section 3.1)
+#
+#path to various utilities;
+#change to reflect yours.
+#
+IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
+TC=$IPROUTE/tc/tc
+IP=$IPROUTE/ip/ip
+IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
+INDEV=eth2
+EGDEV="dev eth1"
+CIR1=1500kbit
+CIR2=500kbit
+
+#The CBS is about 60 MTU sized packets
+CBS1=90k
+CBS2=90k
+
+meter1="police rate $CIR1 burst $CBS1 "
+meter1a="police rate $CIR2 burst $CBS1 "
+meter2="police rate $CIR1 burst $CBS2 "
+meter2a="police rate $CIR2 burst $CBS2 "
+meter3="police rate $CIR2 burst $CBS1 "
+meter3a="police rate $CIR2 burst $CBS1 "
+meter4="police rate $CIR2 burst $CBS2 "
+meter5="police rate $CIR1 burst $CBS2 "
+#
+# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
+# tag all incoming packets from any other subnet to fw tag 2
+############################################################ 
+$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
+$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
+#
+############################################################ 
+# install the ingress qdisc on the ingress interface
+$TC qdisc add dev $INDEV handle ffff: ingress
+#
+############################################################ 
+
+# All packets are marked with a tcindex value which is used on the egress
+# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
+#
+############################################################ 
+# 
+# anything with fw tag of 1 is passed on with a tcindex value 1
+#if it doesnt exceed its allocated rate (CIR/CBS)
+# 
+$TC filter add dev $INDEV parent ffff: protocol ip prio 1 handle 1 fw \
+$meter1 \
+continue flowid 4:1
+$TC filter add dev $INDEV parent ffff: protocol ip prio 2 handle 1 fw \
+$meter1a \
+continue flowid 4:1
+#
+# if it exceeds the above but not the extra rate/burst below, it gets a 
+#tcindex value  of 2
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 3 handle 1 fw \
+$meter2 \
+continue flowid 4:2
+$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
+$meter2a \
+continue flowid 4:2
+#
+# if it exceeds the above but not the rule below, it gets a tcindex value
+# of 3
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
+$meter3 \
+continue flowid 4:3
+$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
+$meter3a \
+drop flowid 4:3
+#
+# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
+# exceeds 1Mbps and by default goes to BE if it doesnt
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 7 handle 2 fw \
+$meter5 \
+drop flowid 4:4
+
+
+######################## Egress side ########################
+
+
+# attach a dsmarker
+#
+$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
+#
+# values of the DSCP to change depending on the class
+#note that the ECN bits are masked out
+#
+#AF41 (0x88 is 0x22 shifted to the right by two bits)
+#
+$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
+       value 0x88
+#AF42
+$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
+       value 0x90
+#AF43
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x98
+#BE
+$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
+       value 0x0
+#
+#
+# The class mapping (using tcindex; could easily have
+# replaced it with the fw classifier instead)
+#
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 1 tcindex classid 1:1
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 2 tcindex  classid 1:2
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 3 tcindex  classid 1:3
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 4 tcindex  classid 1:4
+#
+
+#
+echo "---- qdisc parameters Ingress  ----------"
+$TC qdisc ls dev $INDEV
+echo "---- Class parameters Ingress  ----------"
+$TC class ls dev $INDEV
+echo "---- filter parameters Ingress ----------"
+$TC filter ls dev $INDEV parent ffff:
+
+echo "---- qdisc parameters Egress  ----------"
+$TC qdisc ls $EGDEV
+echo "---- Class parameters Egress  ----------"
+$TC class ls $EGDEV
+echo "---- filter parameters Egress ----------"
+$TC filter ls $EGDEV parent 1:0
+#
+#deleting the ingress qdisc
+#$TC qdisc del $INDEV ingress
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..cc2ebb40a74841f8bfc25483d922f8a1d2f99165 100644 (file)
@@ -0,0 +1,145 @@
+#! /bin/sh 
+#
+# sample script on using the ingress capabilities using u32 classifier
+# This script tags tcindex based on metering on the ingress 
+# interface the result is used for fast classification and re-marking
+# on the egress interface
+# This is an example of a color blind mode marker with PIR configured
+# based on draft-wahjak-mcm-00.txt (section 3.2)
+#
+#path to various utilities;
+#change to reflect yours.
+#
+IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
+TC=$IPROUTE/tc/tc
+IP=$IPROUTE/ip/ip
+INDEV=eth2
+EGDEV="dev eth1"
+CIR1=1000kbit
+CIR2=1000kbit
+# The PIR is the excess (in addition to the CIR i.e if always
+# going to the PIR --> average rate is CIR+PIR)
+PIR1=1000kbit
+PIR2=500kbit
+
+#The CBS is about 60 MTU sized packets
+CBS1=90k
+CBS2=90k
+#the EBS is about 10 max sized packets
+EBS1=15k
+EBS2=15k
+# The meters
+meter1=" police rate $CIR1 burst $CBS1 "
+meter1a=" police rate $PIR1 burst $EBS1 "
+meter2=" police rate $CIR2 burst $CBS1 "
+meter2a="police rate $PIR2 burst $CBS1 "
+meter3=" police rate $CIR2 burst $CBS2 "
+meter3a=" police rate $PIR2 burst $EBS2 "
+meter4=" police rate $CIR1 burst $CBS2 "
+meter5=" police rate $CIR1 burst $CBS2 "
+
+
+# install the ingress qdisc on the ingress interface
+############################################################ 
+$TC qdisc add dev $INDEV handle ffff: ingress
+############################################################ 
+#
+############################################################ 
+
+# All packets are marked with a tcindex value which is used on the egress
+# NOTE: tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
+# 
+#anything from subnet 10.2.0.2/24 is passed on with a tcindex value 1
+#if it doesnt exceed its CIR/CBS + PIR/EBS
+# 
+$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
+match ip src 10.2.0.0/24 $meter1 \
+continue flowid :1
+$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
+match ip src 10.2.0.0/24 $meter1a \
+continue flowid :1
+
+#
+# if it exceeds the above but not the extra rate/burst below, it gets a 
+#tcindex value  of 2
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
+match ip src 10.2.0.0/24 $meter2 \
+continue flowid :2
+$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
+match ip src 10.2.0.0/24 $meter2a \
+continue flowid :2
+#
+# if it exceeds the above but not the rule below, it gets a tcindex value
+# of 3
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
+match ip src 10.2.0.0/24 $meter3 \
+continue flowid :3
+$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
+match ip src 10.2.0.0/24 $meter3a \
+drop flowid :3
+#
+#
+# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it 
+# exceeds 1Mbps and by default goes to BE if it doesnt
+#
+$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
+match ip src 0/0 $meter5 \
+drop flowid :4
+
+
+######################## Egress side ########################
+
+
+# attach a dsmarker
+#
+$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
+#
+# values of the DSCP to change depending on the class
+#note that the ECN bits are masked out
+#
+#AF41 (0x88 is 0x22 shifted to the right by two bits)
+#
+$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
+       value 0x88
+#AF42
+$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
+       value 0x90
+#AF43
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x98
+#BE
+$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
+       value 0x0
+#
+#
+# The class mapping
+#
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 1 tcindex classid 1:1
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 2 tcindex  classid 1:2
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 3 tcindex  classid 1:3
+$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
+          handle 4 tcindex  classid 1:4
+#
+
+#
+echo "---- qdisc parameters Ingress  ----------"
+$TC qdisc ls dev $INDEV
+echo "---- Class parameters Ingress  ----------"
+$TC class ls dev $INDEV
+echo "---- filter parameters Ingress ----------"
+$TC filter ls dev $INDEV parent ffff:
+
+echo "---- qdisc parameters Egress  ----------"
+$TC qdisc ls $EGDEV
+echo "---- Class parameters Egress  ----------"
+$TC class ls $EGDEV
+echo "---- filter parameters Egress ----------"
+$TC filter ls $EGDEV parent 1:0
+#
+#deleting the ingress qdisc
+#$TC qdisc del $INDEV ingress
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ec91d632e0c95ec5283b43318d7db86db6ab07e6 100644 (file)
@@ -0,0 +1,98 @@
+
+Note all these are mere examples which can be customized to your needs
+
+AFCBQ
+-----
+AF PHB built using CBQ, DSMARK,GRED (default in GRIO mode) ,RED for BE 
+and the tcindex classifier with some algorithmic mapping
+
+EFCBQ
+-----
+EF PHB built using CBQ (for rate control and prioritization), 
+DSMARK( to remark DSCPs), tcindex  classifier and  RED for the BE
+traffic.
+
+EFPRIO
+------
+EF PHB using the PRIO scheduler, Token Bucket to rate control EF,
+tcindex classifier, DSMARK to remark, and RED for the BE traffic
+
+EDGE scripts
+==============
+
+CB-3(1|2)-(u32/chains)
+======================
+
+
+The major differences are that the classifier is u32 on -u32 extension
+and IPchains on the chains extension. CB stands for color Blind
+and 31 is for the mode where only a CIR and CBS are defined whereas
+32 stands for a mode where a CIR/CBS + PIR/EBS are defined.
+
+Color Blind (CB)
+==========-----=
+We look at one special subnet that we are interested in for simplicty
+reasons to demonstrate the capability. We send the packets from that
+subnet to AF4*, BE or end up dropping depending on the metering results. 
+
+
+The algorithm overview is as follows:
+
+*classify:
+
+**case: subnet X
+----------------
+  if !exceed meter1 tag as AF41
+       else
+           if !exceed meter2  tag as AF42
+               else
+                 if !exceed meter 3 tag as AF43
+                     else 
+                        drop 
+
+default case: Any other subnet
+-------------------------------
+  if !exceed meter 5 tag as AF43
+      else
+        drop 
+
+
+One Egress side change the DSCPs of the packets to reflect AF4* and BE
+based on the tags from the ingress.
+
+-------------------------------------------------------------
+
+Color Aware
+===========
+
+Define some meters with + policing and give them IDs eg
+
+meter1=police index 1 rate $CIR1 burst $CBS1  
+meter2=police index 2 rate $CIR2 burst $CBS2   etc 
+
+General overview:
+classify based on the DSCPs and use the policer ids to decide tagging
+
+
+*classify on ingress:
+
+switch (dscp) {
+    case AF41: /* tos&0xfc == 0x88 */
+       if (!exceed meter1) break;
+    case AF42: /* tos&0xfc == 0x90 */
+       if (!exceed meter2) {
+           tag as AF42;
+           break;
+       }
+    case AF43: /* tos&0xfc == 0x98 */
+       if (!exceed meter3) {
+           tag as AF43;
+           break;
+       } else
+         drop;
+    default:
+       if (!exceed meter4) tag as BE;
+       else drop;
+}
+
+On the Egress side mark the proper AF tags
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..10d6d934486f19d357f2976b46b112d4c48ed9d7 100644 (file)
@@ -0,0 +1,105 @@
+#!/usr/bin/perl
+#
+#
+# AF using CBQ for a single interface eth0 
+# 4 AF classes using GRED and one BE using RED
+# Things you might want to change:
+#      - the device bandwidth (set at 10Mbits)
+#      - the bandwidth allocated for each AF class and the BE class    
+#      - the drop probability associated with each AF virtual queue
+#
+# AF DSCP values used (based on AF draft 04)
+# -----------------------------------------
+# AF DSCP values
+# AF1 1. 0x0a 2. 0x0c 3. 0x0e
+# AF2 1. 0x12 2. 0x14 3. 0x16
+# AF3 1. 0x1a 2. 0x1c 3. 0x1e
+# AF4 1. 0x22 2. 0x24 3. 0x26
+
+#
+# 
+# A simple DSCP-class relationship formula used to generate
+# values in the for loop of this script; $drop stands for the
+# DP
+#      $dscp = ($class*8+$drop*2)
+#
+#  if you use GRIO buffer sharing, then GRED priority is set as follows:
+#  $gprio=$drop+1; 
+#
+
+$TC = "/usr/src/iproute2-current/tc/tc";
+$DEV = "dev lo";
+$DEV = "dev eth1";
+$DEV = "dev eth0";
+# the BE-class number
+$beclass = "5";  
+
+#GRIO buffer sharing on or off?
+$GRIO = "";
+$GRIO = "grio";
+# The bandwidth of your device
+$linerate="10Mbit";
+# The BE and AF rates
+%rate_table=();
+$berate="1500Kbit";
+$rate_table{"AF1rate"}="1500Kbit";
+$rate_table{"AF2rate"}="1500Kbit";
+$rate_table{"AF3rate"}="1500Kbit";
+$rate_table{"AF4rate"}="1500Kbit";
+#
+#
+#
+print "\n# --- General setup  ---\n";
+print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
+print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex mask 0xfc " .
+   "shift 2 pass_on\n";
+   #"shift 2\n";
+print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth $linerate ".
+  "cell 8 avpkt 1000 mpu 64\n";
+print "$TC filter add $DEV parent 2:0 protocol ip prio 1 tcindex ".
+  "mask 0xf0 shift 4 pass_on\n";
+for $class (1..4) {
+    print "\n# --- AF Class $class specific setup---\n";
+    $AFrate=sprintf("AF%drate",$class);
+    print "$TC class add $DEV parent 2:0 classid 2:$class cbq ".
+      "bandwidth $linerate rate $rate_table{$AFrate} avpkt 1000 prio ".
+      (6-$class)." bounded allot 1514 weight 1 maxburst 21\n";
+    print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle $class ".
+      "tcindex classid 2:$class\n";
+    print "$TC qdisc add $DEV parent 2:$class gred setup DPs 3 default 2 ".
+      "$GRIO\n";
+# 
+# per DP setup
+#
+    for $drop (1..3) {
+    print "\n# --- AF Class $class DP $drop---\n";
+       $dscp = $class*8+$drop*2;
+       $tcindex = sprintf("1%x%x",$class,$drop);
+       print "$TC filter add $DEV parent 1:0 protocol ip prio 1 ".
+         "handle $dscp tcindex classid 1:$tcindex\n";
+       $prob = $drop*0.02;
+        if ($GRIO) {
+       $gprio = $drop+1;
+       print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
+         "max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
+         "probability $prob ".
+          "prio $gprio\n";
+        } else {
+       print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
+         "max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
+         "probability $prob \n";
+       }
+    }
+}
+#
+#
+print "\n#------BE Queue setup------\n";
+print "$TC filter add $DEV parent 1:0 protocol ip prio 2 ".
+          "handle 0 tcindex mask 0 classid 1:1\n";
+print "$TC class add $DEV parent 2:0 classid 2:$beclass cbq ".
+      "bandwidth $linerate rate $berate avpkt 1000 prio 6 " .
+      "bounded allot 1514 weight 1 maxburst 21 \n";
+print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle 0 tcindex ".
+  "classid 2:5\n";
+print "$TC qdisc add $DEV parent 2:5 red limit 60KB min 15KB max 45KB ".
+  "burst 20 avpkt 1000 bandwidth $linerate probability 0.4\n";
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..48611bdd65e840c0349168b197b06fb746ac3b14 100644 (file)
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
+$DEV = "dev eth1";
+$efrate="1.5Mbit";
+$MTU="1.5kB";
+print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
+print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
+  "mask 0xfc shift 2\n";
+print "$TC qdisc add $DEV parent 1:0 handle 2:0 prio\n";
+#
+# EF class: Maximum about one MTU sized packet allowed on the queue
+#
+print "$TC qdisc add $DEV parent 2:1 tbf rate $efrate burst $MTU limit 1.6kB\n";
+print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
+         "handle 0x2e tcindex classid 2:1 pass_on\n";
+#
+# BE class
+#
+print "#BE class(2:2) \n";
+print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
+         "min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
+         "probability 0.4\n";
+#
+print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
+         "handle 0 tcindex mask 0 classid 2:2 pass_on\n";
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bcc437b3d2296678d5b6abc83ad358994756ba3c 100644 (file)
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+#
+$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
+$DEV = "dev eth1";
+print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
+print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
+  "mask 0xfc shift 2\n";
+print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth ".
+       "10Mbit cell 8 avpkt 1000 mpu 64\n";
+#
+# EF class
+#
+print "$TC class add $DEV parent 2:0 classid 2:1 cbq bandwidth ". 
+       "10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated ".
+       "allot 1514 weight 1 maxburst 10 \n";
+# packet fifo for EF?
+print "$TC qdisc add $DEV parent 2:1 pfifo limit 5\n";
+print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
+         "handle 0x2e tcindex classid 2:1 pass_on\n";
+#
+# BE class
+#
+print "#BE class(2:2) \n";
+print "$TC class add $DEV parent 2:0 classid 2:2 cbq bandwidth ". 
+       "10Mbit rate 5Mbit avpkt 1000 prio 7 allot 1514 weight 1 ".
+       "maxburst 21 borrow split 2:0 defmap 0xffff \n";
+print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
+         "min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
+         "probability 0.4\n";
+print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
+         "handle 0 tcindex mask 0 classid 2:2 pass_on\n";
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0ec705c09129a089cbc6616bec44a28b5dd48b9e 100644 (file)
@@ -0,0 +1,125 @@
+
+These were the tests done to validate the Diffserv scripts.
+This document will be updated continously. If you do more
+thorough validation testing please post the details to the
+diffserv mailing list. 
+Nevertheless, these tests should serve for basic validation.
+
+AFCBQ, EFCBQ, EFPRIO
+----------------------
+
+generate all possible DSCPs and observe that they 
+get sent to the proper classes. In the case of AF also
+to the correct Virtual Queues.
+
+Edge1
+-----
+generate TOS values 0x0,0x10,0xbb each with IP addresses
+10.2.0.24 (mark 1), 10.2.0.3 (mark2) and 10.2.0.30 (mark 3)
+and observe that they get marked as expected.
+
+Edge2
+-----
+
+-Repeat the tests in Edge1
+-ftp with data direction from 10.2.0.2
+       *observe that the metering/policing works correctly (and the marking
+       as well). In this case the mark used will be 3
+
+Edge31-cb-chains
+----------------
+
+-ftp with data direction from 10.2.0.2
+
+       *observe that the metering/policing works correctly (and the marking
+       as well). In this case the mark used will be 1. 
+
+       Metering: The data throughput should not exceed 2*CIR1 + 2*CIR2
+       which is roughly: 5mbps
+
+       Marking: the should be a variation of marked packets:
+       AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
+
+More tests required to see the interaction of several sources (other
+than subnet 10.2.0.0/24).
+
+Edge31-ca-u32
+--------------
+
+Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the 
+discard port of 10.1.0.2 (behind eth1)
+
+1) generate with src tos = 0x88
+       Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
+       approximately 5mbps
+       Marking: Should vary between 0x88,0x90,0x98 and 0x0
+
+2) generate with src tos = 0x90
+       Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
+       approximately 3.5mbps
+       Marking: Should vary between 0x90,0x98 and 0x0
+
+3) generate with src tos = 0x98
+       Metering: Allocated throughput should not exceed CIR1 + CIR2
+       approximately 2.5mbps
+       Marking: Should vary between 0x98 and 0x0
+
+4) generate with src tos any other than the above
+       Metering: Allocated throughput should not exceed CIR1 
+       approximately 1.5mbps
+       Marking: Should be consistent at 0x0
+
+TODO: Testing on how each color shares when all 4 types of packets
+are going through the edge device
+
+Edge32-cb-u32, Edge32-cb-chains
+-------------------------------
+
+-ftp with data direction from 10.2.0.2
+
+       *observe that the metering/policing works correctly (and the marking
+       as well). 
+
+       Metering: 
+        The data throughput should not exceed 2*CIR1 + 2*CIR2
+       + 2*PIR2 + PIR1 for u32 which is roughly: 6mbps
+        The data throughput should not exceed 2*CIR1 + 5*CIR2
+       for chains which is roughly: 6mbps
+
+       Marking: the should be a variation of marked packets:
+       AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
+
+TODO:
+-More tests required to see the interaction of several sources (other
+than subnet 10.2.0.0/24).
+-More tests needed to capture stats on how many times the CIR was exceeded
+but the data was not remarked etc.
+
+Edge32-ca-u32
+--------------
+
+Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the 
+discard port of 10.1.0.2 (behind eth1)
+
+1) generate with src tos = 0x88
+       Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
+       +PIR1 -- approximately 4mbps
+       Marking: Should vary between 0x88,0x90,0x98 and 0x0
+
+2) generate with src tos = 0x90
+       Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
+       + 2* PIR2 approximately 3mbps
+       Marking: Should vary between 0x90,0x98 and 0x0
+
+3) generate with src tos = 0x98
+       Metering: Allocated throughput should not exceed PIR1+ CIR1 + CIR2
+       approximately 2.5mbps
+       Marking: Should vary between 0x98 and 0x0
+
+4) generate with src tos any other than the above
+       Metering: Allocated throughput should not exceed CIR1 
+       approximately 1mbps
+       Marking: Should be consistent at 0x0
+
+TODO: Testing on how each color shares when all 4 types of packets
+are going through the edge device
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b83add82db689f0548eec9f29e2e48c45c030b44 100644 (file)
@@ -0,0 +1,25 @@
+
+/* I cannot describe, how I laughed, when saw, that now sys/socket.h
+   includes ALL OF networking include files. 8)8)8)
+
+   Bravo! Aah, they forgot sockaddr_ll, sockaddr_pkt and sockaddr_nl...
+   Not a big problem, we only start the way to single UNIVERSAL include file:
+
+   #include <GNU-Gnu_is_Not_Unix.h>.
+
+   Jokes apart, it is full crap. Removed.
+   --ANK
+
+ */
+
+/* Union of all sockaddr types (required by IPv6 Basic API).  This is
+   somewhat evil.  */
+/* 8)8) Well, ipngwg really does strange things sometimes, but
+   not in such extent! It is removed long ago --ANK
+ */
+
+union sockaddr_union
+  {
+    struct sockaddr sa;
+    char __maxsize[128];
+  };
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..296584c255fa620537e66e10a8497b5c0062d49e 100644 (file)
@@ -0,0 +1,10 @@
+/* Mess with various libdb in various glibcs is something...
+ * Crooked hands of hackers can result in amazing results making
+ * incompatibility at all the levels without any reasons.
+ *
+ * The simplest trick which I was able to invent is to write fake
+ * db.h including db_185.h and adding -I/usr/include/db3 to CFLAGS.
+ * Looks ugly but compiles everywhere.
+ */
+
+#include <db_185.h>
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..65e3d8ad6eac229573fad3d3ddde145596e04fdc 100644 (file)
@@ -0,0 +1,20 @@
+#ifndef __GLIBC_BUGS_H__
+#define __GLIBC_BUGS_H__ 1
+
+#include <features.h>
+#include <sys/types.h>
+
+#if defined(__GLIBC__) && __GLIBC__ >= 2
+
+#ifndef __KERNEL_STRICT_NAMES
+#define __KERNEL_STRICT_NAMES 1
+#endif
+
+#include <linux/types.h>
+
+typedef __u16 in_port_t;
+typedef __u32 in_addr_t;
+
+#endif
+
+#endif
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..784a66ca994948a8242af8c37938faabb885b0b0 100644 (file)
@@ -0,0 +1,11 @@
+#ifndef        _NETINET_IN_H
+#define        _NETINET_IN_H   1
+
+#include "glibc-bugs.h"
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <linux/in.h>
+
+#define SOL_IP 0
+
+#endif /* netinet/in.h */
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8812e6764a23afa7ac1d27302f3150511b1f5ed2 100644 (file)
@@ -0,0 +1,9 @@
+#ifndef __NETINET_IP_H
+#define __NETINET_IP_H 1
+
+#include <glibc-bugs.h>
+#include <netinet/in.h>
+
+#include <linux/ip.h>
+
+#endif /* netinet/ip.h */
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..5421d6b88e858ebb366d7ce909e440d7401b3e5e 100644 (file)
@@ -0,0 +1,270 @@
+/* System-specific socket constants and types.  Linux version.
+   Copyright (C) 1991, 92, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifndef        _SOCKETBITS_H
+
+#define        _SOCKETBITS_H   1
+#include <features.h>
+
+#define        __need_size_t
+#define __need_NULL
+#include <stddef.h>
+
+
+__BEGIN_DECLS
+
+/* Type for length arguments in socket calls.  */
+#ifndef __socklen_t_defined
+typedef unsigned int socklen_t;
+# define __socklen_t_defined
+#endif
+
+/* Types of sockets.  */
+enum __socket_type
+{
+  SOCK_STREAM = 1,             /* Sequenced, reliable, connection-based
+                                  byte streams.  */
+#define SOCK_STREAM SOCK_STREAM
+  SOCK_DGRAM = 2,              /* Connectionless, unreliable datagrams
+                                  of fixed maximum length.  */
+#define SOCK_DGRAM SOCK_DGRAM
+  SOCK_RAW = 3,                        /* Raw protocol interface.  */
+#define SOCK_RAW SOCK_RAW
+  SOCK_RDM = 4,                        /* Reliably-delivered messages.  */
+#define SOCK_RDM SOCK_RDM
+  SOCK_SEQPACKET = 5,          /* Sequenced, reliable, connection-based,
+                                  datagrams of fixed maximum length.  */
+#define SOCK_SEQPACKET SOCK_SEQPACKET
+  SOCK_PACKET = 10             /* Linux specific way of getting packets
+                                  at the dev level.  For writing rarp and
+                                  other similar things on the user level. */
+#define SOCK_PACKET SOCK_PACKET
+};
+
+/* Protocol families.  */
+#define        PF_UNSPEC       0       /* Unspecified.  */
+#define        PF_LOCAL        1       /* Local to host (pipes and file-domain).  */
+#define        PF_UNIX         PF_LOCAL /* Old BSD name for PF_LOCAL.  */
+#define        PF_FILE         PF_LOCAL /* POSIX name for PF_LOCAL.  */
+#define        PF_INET         2       /* IP protocol family.  */
+#define        PF_AX25         3       /* Amateur Radio AX.25.  */
+#define        PF_IPX          4       /* Novell Internet Protocol.  */
+#define        PF_APPLETALK    5       /* Don't use this.  */
+#define        PF_NETROM       6       /* Amateur radio NetROM.  */
+#define        PF_BRIDGE       7       /* Multiprotocol bridge.  */
+#define        PF_AAL5         8       /* Reserved for Werner's ATM.  */
+#define        PF_X25          9       /* Reserved for X.25 project.  */
+#define        PF_INET6        10      /* IP version 6.  */
+#define        PF_ROSE         11      /* Amateur Radio X.25 PLP       */
+#define        PF_DECnet       12      /* Reserved for DECnet project  */
+#define        PF_NETBEUI      13      /* Reserved for 802.2LLC project*/
+#define        PF_SECURITY     14      /* Security callback pseudo AF */
+#define        PF_KEY          15      /* PF_KEY key management API */
+#define        PF_NETLINK      16
+#define        PF_ROUTE        PF_NETLINK /* Alias to emulate 4.4BSD */
+#define        PF_PACKET       17      /* Packet family                */
+#define        PF_MAX          32      /* For now.. */
+
+/* Address families.  */
+#define        AF_UNSPEC       PF_UNSPEC
+#define        AF_LOCAL        PF_LOCAL
+#define        AF_UNIX         PF_UNIX
+#define        AF_FILE         PF_FILE
+#define        AF_INET         PF_INET
+#define        AF_AX25         PF_AX25
+#define        AF_IPX          PF_IPX
+#define        AF_APPLETALK    PF_APPLETALK
+#define        AF_NETROM       PF_NETROM
+#define        AF_BRIDGE       PF_BRIDGE
+#define        AF_AAL5         PF_AAL5
+#define        AF_X25          PF_X25
+#define        AF_INET6        PF_INET6
+#define        AF_ROSE         PF_ROSE
+#define        AF_DECnet       PF_DECnet
+#define        AF_NETBEUI      PF_NETBEUI
+#define        AF_SECURITY     PF_SECURITY
+#define        pseudo_AF_KEY   pseudo_PF_KEY
+#define        AF_NETLINK      PF_NETLINK
+#define        AF_ROUTE        PF_ROUTE
+#define        AF_PACKET       PF_PACKET
+#define        AF_MAX          PF_MAX
+
+/* Socket level values.  Others are defined in the appropriate headers.
+
+   XXX These definitions also should go into the appropriate headers as
+   far as they are available.  */
+#define SOL_IPV6        41
+#define SOL_ICMPV6     58
+#define SOL_RAW                255
+#define SOL_AX25        257
+#define SOL_ATALK      258
+#define SOL_NETROM     259
+#define SOL_ROSE       260
+#define SOL_DECNET     261
+#define SOL_X25                262
+
+/* Maximum queue length specifiable by listen.  */
+#define SOMAXCONN      128
+
+/* Get the definition of the macro to define the common sockaddr members.  */
+#if __GLIBC_MINOR__  >= 1
+#include <bits/sockaddr.h>
+#else
+#include <sockaddrcom.h>
+#endif
+
+/* Structure describing a generic socket address.  */
+struct sockaddr
+  {
+    __SOCKADDR_COMMON (sa_);   /* Common data: address family and length.  */
+    char sa_data[14];          /* Address data.  */
+  };
+
+
+/* Bits in the FLAGS argument to `send', `recv', et al.  */
+enum
+  {
+    MSG_OOB            = 0x01, /* Process out-of-band data.  */
+#define MSG_OOB                MSG_OOB
+    MSG_PEEK           = 0x02, /* Peek at incoming messages.  */
+#define MSG_PEEK       MSG_PEEK
+    MSG_DONTROUTE      = 0x04, /* Don't use local routing.  */
+#define MSG_DONTROUTE  MSG_DONTROUTE
+    MSG_CTRUNC         = 0x08, /* Control data lost before delivery.  */
+#define MSG_CTRUNC     MSG_CTRUNC
+    MSG_PROXY          = 0x10, /* Supply or ask second address.  */
+#define MSG_PROXY      MSG_PROXY
+    MSG_TRUNC          = 0x20,
+#define MSG_TRUNC      MSG_TRUNC
+    MSG_DONTWAIT       = 0x40,
+#define MSG_DONTWAIT   MSG_DONTWAIT
+    MSG_WAITALL                = 0x100,
+#define MSG_WAITALL    MSG_WAITALL
+    MSG_ERRQUEUE       = 0x2000,
+#define MSG_ERRQUEUE   MSG_ERRQUEUE
+    MSG_NOSIGNAL       = 0x4000,
+#define MSG_NOSIGNAL   MSG_NOSIGNAL
+  };
+
+
+/* Structure describing messages sent by
+   `sendmsg' and received by `recvmsg'.  */
+struct msghdr
+  {
+    __ptr_t msg_name;          /* Address to send to/receive from.  */
+    socklen_t msg_namelen;     /* Length of address data.  */
+
+    struct iovec *msg_iov;     /* Vector of data to send/receive into.  */
+    size_t msg_iovlen;         /* Number of elements in the vector.  */
+
+    __ptr_t msg_control;       /* Ancillary data (eg BSD filedesc passing). */
+    size_t msg_controllen;     /* Ancillary data buffer length.  */
+
+    int msg_flags;             /* Flags on received message.  */
+  };
+
+/* Structure used for storage of ancillary data object information.  */
+struct cmsghdr
+  {
+    size_t cmsg_len;           /* Length of data in cmsg_data plus length
+                                  of cmsghdr structure.  */
+    int cmsg_level;            /* Originating protocol.  */
+    int cmsg_type;             /* Protocol specific type.  */
+#if !defined __STRICT_ANSI__ && defined __GNUC__ && __GNUC__ >= 2
+    unsigned char __cmsg_data[0]; /* Ancillary data.  */
+#endif
+  };
+
+/* Ancillary data object manipulation macros.  */
+#if !defined __STRICT_ANSI__ && defined __GNUC__ && __GNUC__ >= 2
+# define CMSG_DATA(cmsg) ((cmsg)->__cmsg_data)
+#else
+# define CMSG_DATA(cmsg) ((unsigned char *) ((struct cmsghdr *) (cmsg) + 1))
+#endif
+#define CMSG_NXTHDR(mhdr, cmsg) __cmsg_nxthdr (mhdr, cmsg)
+#define CMSG_FIRSTHDR(mhdr) \
+  ((size_t) (mhdr)->msg_controllen >= sizeof (struct cmsghdr)                \
+   ? (struct cmsghdr *) (mhdr)->msg_control : (struct cmsghdr *) NULL)
+#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
+#define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len))
+#define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len))
+
+
+#ifndef _EXTERN_INLINE
+# define _EXTERN_INLINE extern __inline
+#endif
+extern struct cmsghdr *__cmsg_nxthdr __P ((struct msghdr *__mhdr,
+                                          struct cmsghdr *__cmsg));
+_EXTERN_INLINE struct cmsghdr *
+__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg)
+{
+  if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr))
+         /* The kernel header does this so there may be a reason.  */
+         return NULL;
+
+  __cmsg = (struct cmsghdr *) 
+         ((unsigned char *) __cmsg + CMSG_ALIGN(__cmsg->cmsg_len));  
+
+  if ( (unsigned char *) (__cmsg + 1) >= 
+          (unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)
+         /* No more entries.  */
+         return NULL;
+  return __cmsg;
+}
+
+/* Socket level message types.  This must match the definitions in
+   <linux/socket.h>.  */
+enum
+  {
+    SCM_RIGHTS = 0x01,         /* Data array contains access rights.  */
+#define SCM_RIGHTS SCM_RIGHTS
+    SCM_CREDENTIALS = 0x02,  /* Data array is `struct ucred'.  */
+#define SCM_CREDENTIALS SCM_CREDENTIALS
+  };
+
+
+
+/* Get socket manipulation related informations from kernel headers.  */
+#ifdef THIS_IS_CRAP
+#ifndef _LINUX_TYPES_H
+# define _LINUX_TYPES_H
+#endif
+#endif
+
+#include <asm/socket.h>
+#include <asm/types.h>
+
+struct ucred
+{
+       __u32   pid;
+       __u32   uid;
+       __u32   gid;
+};
+
+
+/* Structure used to manipulate the SO_LINGER option.  */
+struct linger
+  {
+    int l_onoff;               /* Nonzero to linger on close.  */
+    int l_linger;              /* Time to linger.  */
+  };
+
+__END_DECLS
+
+#endif /* socketbits.h */
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e8107edf92ecd5ad9d43b21df8c5b68b9d74cd94 100644 (file)
@@ -0,0 +1 @@
+static char SNAPSHOT[] = "020116";
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..45d3ad2bca3acba6e91753ff1ee7e7238be73746 100644 (file)
@@ -0,0 +1,46 @@
+#ifndef __LIBNETLINK_H__
+#define __LIBNETLINK_H__ 1
+
+#include <asm/types.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+struct rtnl_handle
+{
+       int                     fd;
+       struct sockaddr_nl      local;
+       struct sockaddr_nl      peer;
+       __u32                   seq;
+       __u32                   dump;
+};
+
+extern int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions);
+extern void rtnl_close(struct rtnl_handle *rth);
+extern int rtnl_wilddump_request(struct rtnl_handle *rth, int fam, int type);
+extern int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len);
+extern int rtnl_dump_filter(struct rtnl_handle *rth,
+                           int (*filter)(struct sockaddr_nl *, struct nlmsghdr *n, void *),
+                           void *arg1,
+                           int (*junk)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+                           void *arg2);
+extern int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
+                    unsigned groups, struct nlmsghdr *answer,
+                    int (*junk)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+                    void *jarg);
+extern int rtnl_send(struct rtnl_handle *rth, char *buf, int);
+
+
+extern int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
+extern int addattr_l(struct nlmsghdr *n, int maxlen, int type, void *data, int alen);
+extern int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data);
+extern int rta_addattr_l(struct rtattr *rta, int maxlen, int type, void *data, int alen);
+
+extern int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len);
+
+extern int rtnl_listen(struct rtnl_handle *, int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+                      void *jarg);
+extern int rtnl_from_file(FILE *, int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+                      void *jarg);
+
+#endif /* __LIBNETLINK_H__ */
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..739f157e75814f41cb7e4e800095f4dce00fb1ae 100644 (file)
@@ -0,0 +1,12 @@
+#ifndef __LL_MAP_H__
+#define __LL_MAP_H__ 1
+
+extern int ll_remember_index(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
+extern int ll_init_map(struct rtnl_handle *rth);
+extern int ll_name_to_index(char *name);
+extern const char *ll_index_to_name(int idx);
+extern const char *ll_idx_n2a(int idx, char *buf);
+extern int ll_index_to_type(int idx);
+extern unsigned ll_index_to_flags(int idx);
+
+#endif /* __LL_MAP_H__ */
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6ac29204fa7c8e3775d09c2acfee3c194cb9c64a 100644 (file)
@@ -0,0 +1,28 @@
+#ifndef RT_NAMES_H_
+#define RT_NAMES_H_ 1
+
+const char* rtnl_rtprot_n2a(int id, char *buf, int len);
+const char* rtnl_rtscope_n2a(int id, char *buf, int len);
+const char* rtnl_rttable_n2a(int id, char *buf, int len);
+const char* rtnl_rtrealm_n2a(int id, char *buf, int len);
+const char* rtnl_dsfield_n2a(int id, char *buf, int len);
+int rtnl_rtprot_a2n(int *id, char *arg);
+int rtnl_rtscope_a2n(int *id, char *arg);
+int rtnl_rttable_a2n(int *id, char *arg);
+int rtnl_rtrealm_a2n(__u32 *id, char *arg);
+int rtnl_dsfield_a2n(__u32 *id, char *arg);
+
+const char *inet_proto_n2a(int proto, char *buf, int len);
+int inet_proto_a2n(char *buf);
+
+
+const char * ll_type_n2a(int type, char *buf, int len);
+
+const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen);
+int ll_addr_a2n(unsigned char *lladdr, int len, char *arg);
+
+const char * ll_proto_n2a(unsigned short id, char *buf, int len);
+int ll_proto_a2n(unsigned short *id, char *buf);
+
+
+#endif
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..70bda7d0513a06d432f1f9bc2c45e0ce1b7f542f 100644 (file)
@@ -0,0 +1,10 @@
+#ifndef __RTM_MAP_H__
+#define __RTM_MAP_H__ 1
+
+char *rtnl_rtntype_n2a(int id, char *buf, int len);
+int rtnl_rtntype_a2n(int *id, char *arg);
+
+int get_rt_realms(__u32 *realms, char *arg);
+
+
+#endif /* __RTM_MAP_H__ */
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..23014df3a4503ea9446cbecaa0366e984b14aab2 100644 (file)
@@ -0,0 +1,119 @@
+#ifndef _TCP_DIAG_H_
+#define _TCP_DIAG_H_ 1
+
+/* Replace with dymanically allocated value */
+#define NETLINK_TCPDIAG 4
+
+/* Just some random number */
+#define TCPDIAG_GETSOCK 18
+
+/* Socket identity */
+struct tcpdiag_sockid
+{
+       __u16   tcpdiag_sport;
+       __u16   tcpdiag_dport;
+       __u32   tcpdiag_src[4];
+       __u32   tcpdiag_dst[4];
+       __u32   tcpdiag_if;
+       __u32   tcpdiag_cookie[2];
+#define TCPDIAG_NOCOOKIE (~0U)
+};
+
+/* Request structure */
+
+struct tcpdiagreq
+{
+       __u8    tcpdiag_family;         /* Family of addresses. */
+       __u8    tcpdiag_src_len;
+       __u8    tcpdiag_dst_len;
+       __u8    tcpdiag_ext;            /* Query extended information */
+
+       struct tcpdiag_sockid id;
+
+       __u32   tcpdiag_states;         /* States to dump */
+       __u32   tcpdiag_dbs;            /* Tables to dump (NI) */
+};
+
+enum
+{
+       TCPDIAG_REQ_NONE,
+       TCPDIAG_REQ_BYTECODE,
+};
+
+#define TCPDIAG_REQ_MAX TCPDIAG_REQ_BYTECODE
+
+/* Bytecode is sequence of 4 byte commands followed by variable arguments.
+ * All the commands identified by "code" are conditional jumps forward:
+ * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be
+ * length of the command and its arguments.
+ */
+struct tcpdiag_bc_op
+{
+       unsigned char   code;
+       unsigned char   yes;
+       unsigned short  no;
+};
+
+enum
+{
+       TCPDIAG_BC_NOP,
+       TCPDIAG_BC_JMP,
+       TCPDIAG_BC_S_GE,
+       TCPDIAG_BC_S_LE,
+       TCPDIAG_BC_D_GE,
+       TCPDIAG_BC_D_LE,
+       TCPDIAG_BC_AUTO,
+       TCPDIAG_BC_S_COND,
+       TCPDIAG_BC_D_COND,
+};
+
+struct tcpdiag_hostcond
+{
+       __u8    family;
+       __u8    prefix_len;
+       int     port;
+       __u32   addr[0];
+};
+
+/* Base info structure. It contains socket identity (addrs/ports/cookie)
+ * and, alas, the information shown by netstat. */
+struct tcpdiagmsg
+{
+       __u8    tcpdiag_family;
+       __u8    tcpdiag_state;
+       __u8    tcpdiag_timer;
+       __u8    tcpdiag_retrans;
+
+       struct tcpdiag_sockid id;
+
+       __u32   tcpdiag_expires;
+       __u32   tcpdiag_rqueue;
+       __u32   tcpdiag_wqueue;
+       __u32   tcpdiag_uid;
+       __u32   tcpdiag_inode;
+};
+
+/* Extensions */
+
+enum
+{
+       TCPDIAG_NONE,
+       TCPDIAG_MEMINFO,
+       TCPDIAG_INFO,
+};
+
+#define TCPDIAG_MAX TCPDIAG_INFO
+
+
+/* TCPDIAG_MEM */
+
+struct tcpdiag_meminfo
+{
+       __u32   tcpdiag_rmem;
+       __u32   tcpdiag_wmem;
+       __u32   tcpdiag_fmem;
+       __u32   tcpdiag_tmem;
+};
+
+#endif /* _TCP_DIAG_H_ */
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e9ba5a3833fb08fb0ece43bbbc76f22723ff499f 100644 (file)
@@ -0,0 +1,104 @@
+#ifndef __UTILS_H__
+#define __UTILS_H__ 1
+
+#include <asm/types.h>
+#include <resolv.h>
+
+#include "libnetlink.h"
+#include "ll_map.h"
+#include "rtm_map.h"
+
+extern int preferred_family;
+extern int show_stats;
+extern int show_details;
+extern int show_raw;
+extern int resolve_hosts;
+extern int oneline;
+extern char * _SL_;
+
+#ifndef IPPROTO_ESP
+#define IPPROTO_ESP    50
+#endif
+#ifndef IPPROTO_AH
+#define IPPROTO_AH     51
+#endif
+
+#define SPRINT_BSIZE 64
+#define SPRINT_BUF(x)  char x[SPRINT_BSIZE]
+
+extern void incomplete_command(void) __attribute__((noreturn));
+
+#define NEXT_ARG() do { argv++; if (--argc <= 0) incomplete_command(); } while(0)
+
+typedef struct
+{
+       __u8 family;
+       __u8 bytelen;
+       __s16 bitlen;
+       __u32 data[4];
+} inet_prefix;
+
+#define DN_MAXADDL 20
+#ifndef AF_DECnet
+#define AF_DECnet 12
+#endif
+
+struct dn_naddr 
+{
+        unsigned short          a_len;
+        unsigned char a_addr[DN_MAXADDL];
+};
+
+#define IPX_NODE_LEN 6
+
+struct ipx_addr {
+       u_int32_t ipx_net;
+       u_int8_t  ipx_node[IPX_NODE_LEN];
+};
+
+extern __u32 get_addr32(char *name);
+extern int get_addr_1(inet_prefix *dst, char *arg, int family);
+extern int get_prefix_1(inet_prefix *dst, char *arg, int family);
+extern int get_addr(inet_prefix *dst, char *arg, int family);
+extern int get_prefix(inet_prefix *dst, char *arg, int family);
+
+extern int get_integer(int *val, char *arg, int base);
+extern int get_unsigned(unsigned *val, char *arg, int base);
+#define get_byte get_u8
+#define get_ushort get_u16
+#define get_short get_s16
+extern int get_u32(__u32 *val, char *arg, int base);
+extern int get_u16(__u16 *val, char *arg, int base);
+extern int get_s16(__s16 *val, char *arg, int base);
+extern int get_u8(__u8 *val, char *arg, int base);
+extern int get_s8(__s8 *val, char *arg, int base);
+
+extern __u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen);
+extern __u8* hexstring_a2n(const __u8 *str, __u8 *buf, int blen);
+
+extern const char *format_host(int af, int len, void *addr, char *buf, int buflen);
+extern const char *rt_addr_n2a(int af, int len, void *addr, char *buf, int buflen);
+
+void invarg(char *, char *) __attribute__((noreturn));
+void duparg(char *, char *) __attribute__((noreturn));
+void duparg2(char *, char *) __attribute__((noreturn));
+int matches(char *arg, char *pattern);
+extern int inet_addr_match(inet_prefix *a, inet_prefix *b, int bits);
+
+const char *dnet_ntop(int af, const void *addr, char *str, size_t len);
+int dnet_pton(int af, const char *src, void *addr);
+
+const char *ipx_ntop(int af, const void *addr, char *str, size_t len);
+int ipx_pton(int af, const char *src, void *addr);
+
+extern int __iproute2_hz_internal;
+extern int __get_hz(void);
+
+static __inline__ int get_hz(void)
+{
+       if (__iproute2_hz_internal == 0)
+               __iproute2_hz_internal = __get_hz();
+       return __iproute2_hz_internal;
+}
+
+#endif /* __UTILS_H__ */
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2aa0051827a6ad6c57f4a1347e7b0935e2339617 100644 (file)
@@ -0,0 +1,22 @@
+IPOBJ=ip.o ipaddress.o iproute.o iprule.o \
+    rtm_map.o iptunnel.o ipneigh.o iplink.o ipmaddr.o \
+    ipmonitor.o ipmroute.o
+
+RTMONOBJ=rtmon.o
+
+ALLOBJ=$(IPOBJ) $(RTMONOBJ)
+TARGETS=ip rtmon
+
+all: $(TARGETS)
+
+ip: $(IPOBJ) $(LIBNETLINK) $(LIBUTIL)
+
+rtmon: $(RTMONOBJ) $(LIBNETLINK)
+
+install: all
+       install -m 0755 -s $(TARGETS) $(DESTDIR)$(SBINDIR)
+       install -m 0755 routel routef $(DESTDIR)$(SBINDIR)
+
+clean:
+       rm -f $(ALLOBJ) $(TARGETS)
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ed6960f7a4b4b55b12cb42d6c3c30a7662167617 100644 (file)
--- a/ip/ifcfg
+++ b/ip/ifcfg
@@ -0,0 +1,145 @@
+#! /bin/bash
+
+CheckForwarding () {
+  local sbase fwd
+  sbase=/proc/sys/net/ipv4/conf
+  fwd=0
+  if [ -d $sbase ]; then
+    for dir in $sbase/*/forwarding; do
+      fwd=$[$fwd + `cat $dir`]
+    done
+  else
+    fwd=2
+  fi
+  return $fwd
+}
+
+RestartRDISC () {
+  killall -HUP rdisc || rdisc -fs
+}
+
+ABCMaskLen () {
+  local class;
+
+  class=${1%%.*}
+  if [ "$1" = "" -o $class -eq 0 -o $class -ge 224 ]; then return 0
+  elif [ $class -ge 224 ]; then return 0
+  elif [ $class -ge 192 ]; then return 24
+  elif [ $class -ge 128 ]; then return 16
+  else return 8; fi
+}
+
+label="label $1"
+ldev="$1"
+dev=${1%:*}
+if [ "$dev" = "" -o "$1" = "help" ]; then
+  echo "Usage: ifcfg DEV [[add|del [ADDR[/LEN]] [PEER] | stop]" 1>&2
+  echo "       add - add new address" 1>&2
+  echo "       del - delete address" 1>&2
+  echo "       stop - completely disable IP" 1>&2
+  exit 1
+fi
+shift
+
+CheckForwarding
+fwd=$?
+if [ $fwd -ne 0 ]; then
+  echo "Forwarding is ON or its state is unknown ($fwd). OK, No RDISC." 1>&2
+fi
+
+
+deleting=0
+case "$1" in
+add) shift ;;
+stop)
+  if [ "$ldev" != "$dev" ]; then
+    echo "Cannot stop alias $ldev" 1>&2
+    exit 1;
+  fi
+  ip -4 addr flush dev $dev $label || exit 1
+  if [ $fwd -eq 0 ]; then RestartRDISC; fi
+  exit 0 ;;
+del*)
+  deleting=1; shift ;;
+*)
+esac
+
+ipaddr=
+pfxlen=
+if [ "$1" != "" ]; then
+  ipaddr=${1%/*}
+  if [ "$1" != "$ipaddr" ]; then
+    pfxlen=${1#*/}
+  fi
+  if [ "$ipaddr" = "" ]; then
+    echo "$1 is bad IP address." 1>&2
+    exit 1
+  fi
+fi
+shift
+
+peer=$1
+if [ "$peer" != "" ]; then
+  if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then
+    echo "Peer address with non-trivial netmask." 1>&2
+    exit 1
+  fi
+  pfx="$ipaddr peer $peer"
+else
+  if [ "$pfxlen" = "" ]; then
+    ABCMaskLen $ipaddr
+    pfxlen=$?
+  fi
+  pfx="$ipaddr/$pfxlen"
+fi
+
+if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then
+  label=
+fi
+
+if [ $deleting -ne 0 ]; then
+  ip addr del $pfx dev $dev $label || exit 1
+  if [ $fwd -eq 0 ]; then RestartRDISC; fi
+  exit 0
+fi
+
+
+if ! ip link set up dev $dev ; then
+  echo "Error: cannot enable interface $dev." 1>&2
+  exit 1
+fi
+if [ "$ipaddr" = "" ]; then exit 0; fi
+
+if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
+  echo "Error: some host already uses address $ipaddr on $dev." 1>&2
+  exit 1
+fi
+
+if ! ip address add $pfx brd + dev $dev $label; then
+  echo "Error: failed to add $pfx on $dev." 1>&2
+  exit 1
+fi
+
+arping -q -A -c 1 -I $dev $ipaddr
+noarp=$?
+( sleep 2 ;
+  arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
+
+ip route add unreachable 224.0.0.0/24 >& /dev/null 
+ip route add unreachable 255.255.255.255 >& /dev/null
+if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
+  ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null
+fi
+
+if [ $fwd -eq 0 ]; then
+  if [ $noarp -eq 0 ]; then
+    ip ro append default dev $dev metric 30000 scope global
+  elif [ "$peer" != "" ]; then
+    if ping -q -c 2 -w 4 $peer ; then
+      ip ro append default via $peer dev $dev metric 30001
+    fi
+  fi
+  RestartRDISC
+fi
+
+exit 0
diff --git a/ip/ip.c b/ip/ip.c
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..fe379926acccee2a77df0dddf00f8048a4096573 100644 (file)
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -0,0 +1,167 @@
+/*
+ * ip.c                "ip" utility frontend.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *
+ * Changes:
+ *
+ * Rani Assaf <rani@magic.metawire.com> 980929:        resolve addresses
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <string.h>
+
+#include "SNAPSHOT.h"
+#include "utils.h"
+#include "ip_common.h"
+
+int preferred_family = AF_UNSPEC;
+int show_stats = 0;
+int resolve_hosts = 0;
+int oneline = 0;
+char * _SL_ = NULL;
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr,
+"Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n"
+"where  OBJECT := { link | addr | route | rule | neigh | tunnel |\n"
+"                   maddr | mroute | monitor }\n"
+"       OPTIONS := { -V[ersion] | -s[tatistics] | -r[esolve] |\n"
+"                    -f[amily] { inet | inet6 | ipx | dnet | link } | -o[neline] }\n");
+       exit(-1);
+}
+
+int main(int argc, char **argv)
+{
+       char *basename;
+
+       basename = strrchr(argv[0], '/');
+       if (basename == NULL)
+               basename = argv[0];
+       else
+               basename++;
+       
+       while (argc > 1) {
+               char *opt = argv[1];
+               if (strcmp(opt,"--") == 0) {
+                       argc--; argv++;
+                       break;
+               }
+               if (opt[0] != '-')
+                       break;
+               if (opt[1] == '-')
+                       opt++;
+               if (matches(opt, "-family") == 0) {
+                       argc--;
+                       argv++;
+                       if (argc <= 1)
+                               usage();
+                       if (strcmp(argv[1], "inet") == 0)
+                               preferred_family = AF_INET;
+                       else if (strcmp(argv[1], "inet6") == 0)
+                               preferred_family = AF_INET6;
+                       else if (strcmp(argv[1], "dnet") == 0)
+                               preferred_family = AF_DECnet;
+                       else if (strcmp(argv[1], "link") == 0)
+                               preferred_family = AF_PACKET;
+                       else if (strcmp(argv[1], "ipx") == 0)
+                               preferred_family = AF_IPX;
+                       else if (strcmp(argv[1], "help") == 0)
+                               usage();
+                       else
+                               invarg(argv[1], "invalid protocol family");
+               } else if (strcmp(opt, "-4") == 0) {
+                       preferred_family = AF_INET;
+               } else if (strcmp(opt, "-6") == 0) {
+                       preferred_family = AF_INET6;
+               } else if (strcmp(opt, "-0") == 0) {
+                       preferred_family = AF_PACKET;
+               } else if (strcmp(opt, "-I") == 0) {
+                       preferred_family = AF_IPX;
+               } else if (strcmp(opt, "-D") == 0) {
+                       preferred_family = AF_DECnet;
+               } else if (matches(opt, "-stats") == 0 ||
+                          matches(opt, "-statistics") == 0) {
+                       ++show_stats;
+               } else if (matches(opt, "-resolve") == 0) {
+                       ++resolve_hosts;
+               } else if (matches(opt, "-oneline") == 0) {
+                       ++oneline;
+#if 0
+               } else if (matches(opt, "-numeric") == 0) {
+                       rtnl_names_numeric++;
+#endif
+               } else if (matches(opt, "-Version") == 0) {
+                       printf("ip utility, iproute2-ss%s\n", SNAPSHOT);
+                       exit(0);
+               } else if (matches(opt, "-help") == 0) {
+                       usage();
+               } else {
+                       fprintf(stderr, "Option \"%s\" is unknown, try \"ip -help\".\n", opt);
+                       exit(-1);
+               }
+               argc--; argv++;
+       }
+
+       _SL_ = oneline ? "\\" : "\n" ;
+
+       if (strcmp(basename, "ipaddr") == 0)
+               return do_ipaddr(argc-1, argv+1);
+       if (strcmp(basename, "ipmaddr") == 0)
+               return do_multiaddr(argc-1, argv+1);
+       if (strcmp(basename, "iproute") == 0)
+               return do_iproute(argc-1, argv+1);
+       if (strcmp(basename, "iprule") == 0)
+               return do_iprule(argc-1, argv+1);
+       if (strcmp(basename, "ipneigh") == 0)
+               return do_ipneigh(argc-1, argv+1);
+       if (strcmp(basename, "iplink") == 0)
+               return do_iplink(argc-1, argv+1);
+       if (strcmp(basename, "iptunnel") == 0)
+               return do_iptunnel(argc-1, argv+1);
+       if (strcmp(basename, "ipmonitor") == 0)
+               return do_ipmonitor(argc-1, argv+1);
+
+       if (argc > 1) {
+               if (matches(argv[1], "address") == 0)
+                       return do_ipaddr(argc-2, argv+2);
+               if (matches(argv[1], "maddress") == 0)
+                       return do_multiaddr(argc-2, argv+2);
+               if (matches(argv[1], "route") == 0)
+                       return do_iproute(argc-2, argv+2);
+               if (matches(argv[1], "rule") == 0)
+                       return do_iprule(argc-2, argv+2);
+               if (matches(argv[1], "mroute") == 0)
+                       return do_multiroute(argc-2, argv+2);
+               if (matches(argv[1], "neighbor") == 0 ||
+                   matches(argv[1], "neighbour") == 0)
+                       return do_ipneigh(argc-2, argv+2);
+               if (matches(argv[1], "link") == 0)
+                       return do_iplink(argc-2, argv+2);
+               if (matches(argv[1], "tunnel") == 0 ||
+                   strcmp(argv[1], "tunl") == 0)
+                       return do_iptunnel(argc-2, argv+2);
+               if (matches(argv[1], "monitor") == 0)
+                       return do_ipmonitor(argc-2, argv+2);
+               if (matches(argv[1], "help") == 0)
+                       usage();
+               fprintf(stderr, "Object \"%s\" is unknown, try \"ip help\".\n", argv[1]);
+               exit(-1);
+       }
+       usage();
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..5ac43218e2ffbabe1e022f592c20f53cdcb94a4a 100644 (file)
@@ -0,0 +1,20 @@
+extern int print_linkinfo(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
+extern int print_addrinfo(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
+extern int print_neigh(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
+extern int ipaddr_list(int argc, char **argv);
+extern int ipaddr_list_link(int argc, char **argv);
+extern int iproute_monitor(int argc, char **argv);
+extern void iplink_usage(void) __attribute__((noreturn));
+extern void iproute_reset_filter(void);
+extern void ipaddr_reset_filter(int);
+extern void ipneigh_reset_filter(void);
+extern int print_route(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
+extern int do_ipaddr(int argc, char **argv);
+extern int do_iproute(int argc, char **argv);
+extern int do_iprule(int argc, char **argv);
+extern int do_ipneigh(int argc, char **argv);
+extern int do_iptunnel(int argc, char **argv);
+extern int do_iplink(int argc, char **argv);
+extern int do_ipmonitor(int argc, char **argv);
+extern int do_multiaddr(int argc, char **argv);
+extern int do_multiroute(int argc, char **argv);
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0d00280c8c6b5a8a0553acb2587ee2451ce2b1cc 100644 (file)
@@ -0,0 +1,898 @@
+/*
+ * ipaddress.c         "ip address".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *     Laszlo Valko <valko@linux.karinthy.hu> 990223: address label must be zero terminated
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/sockios.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <fnmatch.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ll_map.h"
+#include "ip_common.h"
+
+static struct
+{
+       int ifindex;
+       int family;
+       int oneline;
+       int showqueue;
+       inet_prefix pfx;
+       int scope, scopemask;
+       int flags, flagmask;
+       int up;
+       char *label;
+       int flushed;
+       char *flushb;
+       int flushp;
+       int flushe;
+       struct rtnl_handle *rth;
+} filter;
+
+static int do_link;
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       if (do_link) {
+               iplink_usage();
+       }
+       fprintf(stderr, "Usage: ip addr {add|del} IFADDR dev STRING\n");
+       fprintf(stderr, "       ip addr {show|flush} [ dev STRING ] [ scope SCOPE-ID ]\n");
+       fprintf(stderr, "                            [ to PREFIX ] [ FLAG-LIST ] [ label PATTERN ]\n");
+       fprintf(stderr, "IFADDR := PREFIX | ADDR peer PREFIX\n");
+       fprintf(stderr, "          [ broadcast ADDR ] [ anycast ADDR ]\n");
+       fprintf(stderr, "          [ label STRING ] [ scope SCOPE-ID ]\n");
+       fprintf(stderr, "SCOPE-ID := [ host | link | global | NUMBER ]\n");
+       fprintf(stderr, "FLAG-LIST := [ FLAG-LIST ] FLAG\n");
+       fprintf(stderr, "FLAG  := [ permanent | dynamic | secondary | primary |\n");
+       fprintf(stderr, "           tentative | deprecated ]\n");
+       exit(-1);
+}
+
+void print_link_flags(FILE *fp, unsigned flags, unsigned mdown)
+{
+       fprintf(fp, "<");
+       flags &= ~IFF_RUNNING;
+#define _PF(f) if (flags&IFF_##f) { \
+                  flags &= ~IFF_##f ; \
+                  fprintf(fp, #f "%s", flags ? "," : ""); }
+       _PF(LOOPBACK);
+       _PF(BROADCAST);
+       _PF(POINTOPOINT);
+       _PF(MULTICAST);
+       _PF(NOARP);
+       _PF(ALLMULTI);
+       _PF(PROMISC);
+       _PF(MASTER);
+       _PF(SLAVE);
+       _PF(DEBUG);
+       _PF(DYNAMIC);
+       _PF(AUTOMEDIA);
+       _PF(PORTSEL);
+       _PF(NOTRAILERS);
+       _PF(UP);
+#undef _PF
+        if (flags)
+               fprintf(fp, "%x", flags);
+       if (mdown)
+               fprintf(fp, ",M-DOWN");
+       fprintf(fp, "> ");
+}
+
+void print_queuelen(char *name)
+{
+       struct ifreq ifr;
+       int s;
+
+       s = socket(AF_INET, SOCK_STREAM, 0);
+       if (s < 0)
+               return;
+
+       memset(&ifr, 0, sizeof(ifr));
+       strcpy(ifr.ifr_name, name);
+       if (ioctl(s, SIOCGIFTXQLEN, &ifr) < 0) { 
+               perror("SIOCGIFXQLEN");
+               close(s);
+               return;
+       }
+       close(s);
+
+       if (ifr.ifr_qlen)
+               printf("qlen %d", ifr.ifr_qlen);
+}
+
+int print_linkinfo(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct ifinfomsg *ifi = NLMSG_DATA(n);
+       struct rtattr * tb[IFLA_MAX+1];
+       int len = n->nlmsg_len;
+       unsigned m_flag = 0;
+
+       if (n->nlmsg_type != RTM_NEWLINK && n->nlmsg_type != RTM_DELLINK)
+               return 0;
+
+       len -= NLMSG_LENGTH(sizeof(*ifi));
+       if (len < 0)
+               return -1;
+
+       if (filter.ifindex && ifi->ifi_index != filter.ifindex)
+               return 0;
+       if (filter.up && !(ifi->ifi_flags&IFF_UP))
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
+       if (tb[IFLA_IFNAME] == NULL) {
+               fprintf(stderr, "BUG: nil ifname\n");
+               return -1;
+       }
+       if (filter.label &&
+           (!filter.family || filter.family == AF_PACKET) &&
+           fnmatch(filter.label, RTA_DATA(tb[IFLA_IFNAME]), 0))
+               return 0;
+
+       if (n->nlmsg_type == RTM_DELLINK)
+               fprintf(fp, "Deleted ");
+
+       fprintf(fp, "%d: %s", ifi->ifi_index,
+               tb[IFLA_IFNAME] ? (char*)RTA_DATA(tb[IFLA_IFNAME]) : "<nil>");
+
+       if (tb[IFLA_LINK]) {
+               SPRINT_BUF(b1);
+               int iflink = *(int*)RTA_DATA(tb[IFLA_LINK]);
+               if (iflink == 0)
+                       fprintf(fp, "@NONE: ");
+               else {
+                       fprintf(fp, "@%s: ", ll_idx_n2a(iflink, b1));
+                       m_flag = ll_index_to_flags(iflink);
+                       m_flag = !(m_flag & IFF_UP);
+               }
+       } else {
+               fprintf(fp, ": ");
+       }
+       print_link_flags(fp, ifi->ifi_flags, m_flag);
+
+       if (tb[IFLA_MTU])
+               fprintf(fp, "mtu %u ", *(int*)RTA_DATA(tb[IFLA_MTU]));
+       if (tb[IFLA_QDISC])
+               fprintf(fp, "qdisc %s ", (char*)RTA_DATA(tb[IFLA_QDISC]));
+#ifdef IFLA_MASTER
+       if (tb[IFLA_MASTER]) {
+               SPRINT_BUF(b1);
+               fprintf(fp, "master %s ", ll_idx_n2a(*(int*)RTA_DATA(tb[IFLA_MASTER]), b1));
+       }
+#endif
+       if (filter.showqueue)
+               print_queuelen((char*)RTA_DATA(tb[IFLA_IFNAME]));
+       
+       if (!filter.family || filter.family == AF_PACKET) {
+               SPRINT_BUF(b1);
+               fprintf(fp, "%s", _SL_);
+               fprintf(fp, "    link/%s ", ll_type_n2a(ifi->ifi_type, b1, sizeof(b1)));
+
+               if (tb[IFLA_ADDRESS]) {
+                       fprintf(fp, "%s", ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]),
+                                                     RTA_PAYLOAD(tb[IFLA_ADDRESS]),
+                                                     ifi->ifi_type,
+                                                     b1, sizeof(b1)));
+               }
+               if (tb[IFLA_BROADCAST]) {
+                       if (ifi->ifi_flags&IFF_POINTOPOINT)
+                               fprintf(fp, " peer ");
+                       else
+                               fprintf(fp, " brd ");
+                       fprintf(fp, "%s", ll_addr_n2a(RTA_DATA(tb[IFLA_BROADCAST]),
+                                                     RTA_PAYLOAD(tb[IFLA_BROADCAST]),
+                                                     ifi->ifi_type,
+                                                     b1, sizeof(b1)));
+               }
+       }
+       if (do_link && tb[IFLA_STATS] && show_stats) {
+               struct net_device_stats slocal;
+               struct net_device_stats *s = RTA_DATA(tb[IFLA_STATS]);
+               if (((unsigned long)s) & (sizeof(unsigned long)-1)) {
+                       memcpy(&slocal, s, sizeof(slocal));
+                       s = &slocal;
+               }
+               fprintf(fp, "%s", _SL_);
+               fprintf(fp, "    RX: bytes  packets  errors  dropped overrun mcast   %s%s",
+                       s->rx_compressed ? "compressed" : "", _SL_);
+               fprintf(fp, "    %-10lu %-8lu %-7lu %-7lu %-7lu %-7lu",
+                       s->rx_bytes, s->rx_packets, s->rx_errors,
+                       s->rx_dropped, s->rx_over_errors,
+                       s->multicast
+                       );
+               if (s->rx_compressed)
+                       fprintf(fp, " %-7lu", s->rx_compressed);
+               if (show_stats > 1) {
+                       fprintf(fp, "%s", _SL_);
+                       fprintf(fp, "    RX errors: length  crc     frame   fifo    missed%s", _SL_);
+                       fprintf(fp, "               %-7lu  %-7lu %-7lu %-7lu %-7lu",
+                               s->rx_length_errors,
+                               s->rx_crc_errors,
+                               s->rx_frame_errors,
+                               s->rx_fifo_errors,
+                               s->rx_missed_errors
+                               );
+               }
+               fprintf(fp, "%s", _SL_);
+               fprintf(fp, "    TX: bytes  packets  errors  dropped carrier collsns %s%s",
+                       s->tx_compressed ? "compressed" : "", _SL_);
+               fprintf(fp, "    %-10lu %-8lu %-7lu %-7lu %-7lu %-7lu",
+                       s->tx_bytes, s->tx_packets, s->tx_errors,
+                       s->tx_dropped, s->tx_carrier_errors, s->collisions);
+               if (s->tx_compressed)
+                       fprintf(fp, " %-7lu", s->tx_compressed);
+               if (show_stats > 1) {
+                       fprintf(fp, "%s", _SL_);
+                       fprintf(fp, "    TX errors: aborted fifo    window  heartbeat%s", _SL_);
+                       fprintf(fp, "               %-7lu  %-7lu %-7lu %-7lu",
+                               s->tx_aborted_errors,
+                               s->tx_fifo_errors,
+                               s->tx_window_errors,
+                               s->tx_heartbeat_errors
+                               );
+               }
+       }
+       fprintf(fp, "\n");
+       fflush(fp);
+       return 0;
+}
+
+static int flush_update(void)
+{
+       if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) {
+               perror("Failed to send flush request\n");
+               return -1;
+       }
+       filter.flushp = 0;
+       return 0;
+}
+
+int print_addrinfo(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct ifaddrmsg *ifa = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr * rta_tb[IFA_MAX+1];
+       char abuf[256];
+       SPRINT_BUF(b1);
+
+       if (n->nlmsg_type != RTM_NEWADDR && n->nlmsg_type != RTM_DELADDR)
+               return 0;
+       len -= NLMSG_LENGTH(sizeof(*ifa));
+       if (len < 0) {
+               fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+               return -1;
+       }
+
+       if (filter.flushb && n->nlmsg_type != RTM_NEWADDR)
+               return 0;
+
+       memset(rta_tb, 0, sizeof(rta_tb));
+       parse_rtattr(rta_tb, IFA_MAX, IFA_RTA(ifa), n->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
+
+       if (!rta_tb[IFA_LOCAL])
+               rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS];
+       if (!rta_tb[IFA_ADDRESS])
+               rta_tb[IFA_ADDRESS] = rta_tb[IFA_LOCAL];
+
+       if (filter.ifindex && filter.ifindex != ifa->ifa_index)
+               return 0;
+       if ((filter.scope^ifa->ifa_scope)&filter.scopemask)
+               return 0;
+       if ((filter.flags^ifa->ifa_flags)&filter.flagmask)
+               return 0;
+       if (filter.label) {
+               SPRINT_BUF(b1);
+               const char *label;
+               if (rta_tb[IFA_LABEL])
+                       label = RTA_DATA(rta_tb[IFA_LABEL]);
+               else
+                       label = ll_idx_n2a(ifa->ifa_index, b1);
+               if (fnmatch(filter.label, label, 0) != 0)
+                       return 0;
+       }
+       if (filter.pfx.family) {
+               if (rta_tb[IFA_LOCAL]) {
+                       inet_prefix dst;
+                       memset(&dst, 0, sizeof(dst));
+                       dst.family = ifa->ifa_family;
+                       memcpy(&dst.data, RTA_DATA(rta_tb[IFA_LOCAL]), RTA_PAYLOAD(rta_tb[IFA_LOCAL]));
+                       if (inet_addr_match(&dst, &filter.pfx, filter.pfx.bitlen))
+                               return 0;
+               }
+       }
+
+       if (filter.flushb) {
+               struct nlmsghdr *fn;
+               if (NLMSG_ALIGN(filter.flushp) + n->nlmsg_len > filter.flushe) {
+                       if (flush_update())
+                               return -1;
+               }
+               fn = (struct nlmsghdr*)(filter.flushb + NLMSG_ALIGN(filter.flushp));
+               memcpy(fn, n, n->nlmsg_len);
+               fn->nlmsg_type = RTM_DELADDR;
+               fn->nlmsg_flags = NLM_F_REQUEST;
+               fn->nlmsg_seq = ++filter.rth->seq;
+               filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb;
+               filter.flushed++;
+               if (show_stats < 2)
+                       return 0;
+       }
+
+       if (n->nlmsg_type == RTM_DELADDR)
+               fprintf(fp, "Deleted ");
+
+       if (filter.oneline || filter.flushb)
+               fprintf(fp, "%u: %s", ifa->ifa_index, ll_index_to_name(ifa->ifa_index));
+       if (ifa->ifa_family == AF_INET)
+               fprintf(fp, "    inet ");
+       else if (ifa->ifa_family == AF_INET6)
+               fprintf(fp, "    inet6 ");
+       else if (ifa->ifa_family == AF_DECnet)
+               fprintf(fp, "    dnet ");
+       else if (ifa->ifa_family == AF_IPX)
+               fprintf(fp, "     ipx ");
+       else
+               fprintf(fp, "    family %d ", ifa->ifa_family);
+
+       if (rta_tb[IFA_LOCAL]) {
+               fprintf(fp, "%s", rt_addr_n2a(ifa->ifa_family,
+                                             RTA_PAYLOAD(rta_tb[IFA_LOCAL]),
+                                             RTA_DATA(rta_tb[IFA_LOCAL]),
+                                             abuf, sizeof(abuf)));
+
+               if (rta_tb[IFA_ADDRESS] == NULL ||
+                   memcmp(RTA_DATA(rta_tb[IFA_ADDRESS]), RTA_DATA(rta_tb[IFA_LOCAL]), 4) == 0) {
+                       fprintf(fp, "/%d ", ifa->ifa_prefixlen);
+               } else {
+                       fprintf(fp, " peer %s/%d ",
+                               rt_addr_n2a(ifa->ifa_family,
+                                           RTA_PAYLOAD(rta_tb[IFA_ADDRESS]),
+                                           RTA_DATA(rta_tb[IFA_ADDRESS]),
+                                           abuf, sizeof(abuf)),
+                               ifa->ifa_prefixlen);
+               }
+       }
+
+       if (rta_tb[IFA_BROADCAST]) {
+               fprintf(fp, "brd %s ",
+                       rt_addr_n2a(ifa->ifa_family,
+                                   RTA_PAYLOAD(rta_tb[IFA_BROADCAST]),
+                                   RTA_DATA(rta_tb[IFA_BROADCAST]),
+                                   abuf, sizeof(abuf)));
+       }
+       if (rta_tb[IFA_ANYCAST]) {
+               fprintf(fp, "any %s ",
+                       rt_addr_n2a(ifa->ifa_family,
+                                   RTA_PAYLOAD(rta_tb[IFA_ANYCAST]),
+                                   RTA_DATA(rta_tb[IFA_ANYCAST]),
+                                   abuf, sizeof(abuf)));
+       }
+       fprintf(fp, "scope %s ", rtnl_rtscope_n2a(ifa->ifa_scope, b1, sizeof(b1)));
+       if (ifa->ifa_flags&IFA_F_SECONDARY) {
+               ifa->ifa_flags &= ~IFA_F_SECONDARY;
+               fprintf(fp, "secondary ");
+       }
+       if (ifa->ifa_flags&IFA_F_TENTATIVE) {
+               ifa->ifa_flags &= ~IFA_F_TENTATIVE;
+               fprintf(fp, "tentative ");
+       }
+       if (ifa->ifa_flags&IFA_F_DEPRECATED) {
+               ifa->ifa_flags &= ~IFA_F_DEPRECATED;
+               fprintf(fp, "deprecated ");
+       }
+       if (!(ifa->ifa_flags&IFA_F_PERMANENT)) {
+               fprintf(fp, "dynamic ");
+       } else
+               ifa->ifa_flags &= ~IFA_F_PERMANENT;
+       if (ifa->ifa_flags)
+               fprintf(fp, "flags %02x ", ifa->ifa_flags);
+       if (rta_tb[IFA_LABEL])
+               fprintf(fp, "%s", (char*)RTA_DATA(rta_tb[IFA_LABEL]));
+       if (rta_tb[IFA_CACHEINFO]) {
+               struct ifa_cacheinfo *ci = RTA_DATA(rta_tb[IFA_CACHEINFO]);
+               char buf[128];
+               fprintf(fp, "%s", _SL_);
+               if (ci->ifa_valid == 0xFFFFFFFFU)
+                       sprintf(buf, "valid_lft forever");
+               else
+                       sprintf(buf, "valid_lft %dsec", ci->ifa_valid);
+               if (ci->ifa_prefered == 0xFFFFFFFFU)
+                       sprintf(buf+strlen(buf), " preferred_lft forever");
+               else
+                       sprintf(buf+strlen(buf), " preferred_lft %dsec", ci->ifa_prefered);
+               fprintf(fp, "       %s", buf);
+       }
+       fprintf(fp, "\n");
+       fflush(fp);
+       return 0;
+}
+
+
+struct nlmsg_list
+{
+       struct nlmsg_list *next;
+       struct nlmsghdr   h;
+};
+
+int print_selected_addrinfo(int ifindex, struct nlmsg_list *ainfo, FILE *fp)
+{
+       for ( ;ainfo ;  ainfo = ainfo->next) {
+               struct nlmsghdr *n = &ainfo->h;
+               struct ifaddrmsg *ifa = NLMSG_DATA(n);
+
+               if (n->nlmsg_type != RTM_NEWADDR)
+                       continue;
+
+               if (n->nlmsg_len < NLMSG_LENGTH(sizeof(ifa)))
+                       return -1;
+
+               if (ifa->ifa_index != ifindex || 
+                   (filter.family && filter.family != ifa->ifa_family))
+                       continue;
+
+               print_addrinfo(NULL, n, fp);
+       }
+       return 0;
+}
+
+
+int store_nlmsg(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       struct nlmsg_list **linfo = (struct nlmsg_list**)arg;
+       struct nlmsg_list *h;
+       struct nlmsg_list **lp;
+
+       h = malloc(n->nlmsg_len+sizeof(void*));
+       if (h == NULL)
+               return -1;
+
+       memcpy(&h->h, n, n->nlmsg_len);
+       h->next = NULL;
+
+       for (lp = linfo; *lp; lp = &(*lp)->next) /* NOTHING */;
+       *lp = h;
+
+       ll_remember_index(who, n, NULL);
+       return 0;
+}
+
+int ipaddr_list_or_flush(int argc, char **argv, int flush)
+{
+       struct nlmsg_list *linfo = NULL;
+       struct nlmsg_list *ainfo = NULL;
+       struct nlmsg_list *l;
+       struct rtnl_handle rth;
+       char *filter_dev = NULL;
+       int no_link = 0;
+
+       ipaddr_reset_filter(oneline);
+       filter.showqueue = 1;
+
+       if (filter.family == AF_UNSPEC)
+               filter.family = preferred_family;
+
+       if (flush) {
+               if (argc <= 0) {
+                       fprintf(stderr, "Flush requires arguments.\n");
+                       return -1;
+               }
+               if (filter.family == AF_PACKET) {
+                       fprintf(stderr, "Cannot flush link addresses.\n");
+                       return -1;
+               }
+       }
+
+       while (argc > 0) {
+               if (strcmp(*argv, "to") == 0) {
+                       NEXT_ARG();
+                       get_prefix(&filter.pfx, *argv, filter.family);
+                       if (filter.family == AF_UNSPEC)
+                               filter.family = filter.pfx.family;
+               } else if (strcmp(*argv, "scope") == 0) {
+                       int scope = 0;
+                       NEXT_ARG();
+                       filter.scopemask = -1;
+                       if (rtnl_rtscope_a2n(&scope, *argv)) {
+                               if (strcmp(*argv, "all") != 0)
+                                       invarg("invalid \"scope\"\n", *argv);
+                               scope = RT_SCOPE_NOWHERE;
+                               filter.scopemask = 0;
+                       }
+                       filter.scope = scope;
+               } else if (strcmp(*argv, "up") == 0) {
+                       filter.up = 1;
+               } else if (strcmp(*argv, "dynamic") == 0) {
+                       filter.flags &= ~IFA_F_PERMANENT;
+                       filter.flagmask |= IFA_F_PERMANENT;
+               } else if (strcmp(*argv, "permanent") == 0) {
+                       filter.flags |= IFA_F_PERMANENT;
+                       filter.flagmask |= IFA_F_PERMANENT;
+               } else if (strcmp(*argv, "secondary") == 0) {
+                       filter.flags |= IFA_F_SECONDARY;
+                       filter.flagmask |= IFA_F_SECONDARY;
+               } else if (strcmp(*argv, "primary") == 0) {
+                       filter.flags &= ~IFA_F_SECONDARY;
+                       filter.flagmask |= IFA_F_SECONDARY;
+               } else if (strcmp(*argv, "tentative") == 0) {
+                       filter.flags |= IFA_F_TENTATIVE;
+                       filter.flagmask |= IFA_F_TENTATIVE;
+               } else if (strcmp(*argv, "deprecated") == 0) {
+                       filter.flags |= IFA_F_DEPRECATED;
+                       filter.flagmask |= IFA_F_DEPRECATED;
+               } else if (strcmp(*argv, "label") == 0) {
+                       NEXT_ARG();
+                       filter.label = *argv;
+               } else {
+                       if (strcmp(*argv, "dev") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (filter_dev)
+                               duparg2("dev", *argv);
+                       filter_dev = *argv;
+               }
+               argv++; argc--;
+       }
+
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       if (rtnl_wilddump_request(&rth, preferred_family, RTM_GETLINK) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       if (rtnl_dump_filter(&rth, store_nlmsg, &linfo, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+
+       if (filter_dev) {
+               filter.ifindex = ll_name_to_index(filter_dev);
+               if (filter.ifindex <= 0) {
+                       fprintf(stderr, "Device \"%s\" does not exist.\n", filter_dev);
+                       return -1;
+               }
+       }
+
+       if (flush) {
+               int round = 0;
+               char flushb[4096-512];
+
+               filter.flushb = flushb;
+               filter.flushp = 0;
+               filter.flushe = sizeof(flushb);
+               filter.rth = &rth;
+
+               for (;;) {
+                       if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) {
+                               perror("Cannot send dump request");
+                               exit(1);
+                       }
+                       filter.flushed = 0;
+                       if (rtnl_dump_filter(&rth, print_addrinfo, stdout, NULL, NULL) < 0) {
+                               fprintf(stderr, "Flush terminated\n");
+                               exit(1);
+                       }
+                       if (filter.flushed == 0) {
+                               if (round == 0) {
+                                       fprintf(stderr, "Nothing to flush.\n");
+                               } else if (show_stats)
+                                       printf("*** Flush is complete after %d round%s ***\n", round, round>1?"s":"");
+                               fflush(stdout);
+                               return 0;
+                       }
+                       round++;
+                       if (flush_update() < 0)
+                               exit(1);
+                       if (show_stats) {
+                               printf("\n*** Round %d, deleting %d addresses ***\n", round, filter.flushed);
+                               fflush(stdout);
+                       }
+               }
+       }
+
+       if (filter.family != AF_PACKET) {
+               if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) {
+                       perror("Cannot send dump request");
+                       exit(1);
+               }
+
+               if (rtnl_dump_filter(&rth, store_nlmsg, &ainfo, NULL, NULL) < 0) {
+                       fprintf(stderr, "Dump terminated\n");
+                       exit(1);
+               }
+       }
+
+
+       if (filter.family && filter.family != AF_PACKET) {
+               struct nlmsg_list **lp;
+               lp=&linfo;
+
+               if (filter.oneline)
+                       no_link = 1;
+
+               while ((l=*lp)!=NULL) {
+                       int ok = 0;
+                       struct ifinfomsg *ifi = NLMSG_DATA(&l->h);
+                       struct nlmsg_list *a;
+
+                       for (a=ainfo; a; a=a->next) {
+                               struct nlmsghdr *n = &a->h;
+                               struct ifaddrmsg *ifa = NLMSG_DATA(n);
+
+                               if (ifa->ifa_index != ifi->ifi_index || 
+                                   (filter.family && filter.family != ifa->ifa_family))
+                                       continue;
+                               if ((filter.scope^ifa->ifa_scope)&filter.scopemask)
+                                       continue;
+                               if ((filter.flags^ifa->ifa_flags)&filter.flagmask)
+                                       continue;
+                               if (filter.pfx.family || filter.label) {
+                                       struct rtattr *tb[IFA_MAX+1];
+                                       memset(tb, 0, sizeof(tb));
+                                       parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(n));
+                                       if (!tb[IFA_LOCAL])
+                                               tb[IFA_LOCAL] = tb[IFA_ADDRESS];
+
+                                       if (filter.pfx.family && tb[IFA_LOCAL]) {
+                                               inet_prefix dst;
+                                               memset(&dst, 0, sizeof(dst));
+                                               dst.family = ifa->ifa_family;
+                                               memcpy(&dst.data, RTA_DATA(tb[IFA_LOCAL]), RTA_PAYLOAD(tb[IFA_LOCAL]));
+                                               if (inet_addr_match(&dst, &filter.pfx, filter.pfx.bitlen))
+                                                       continue;
+                                       }
+                                       if (filter.label) {
+                                               SPRINT_BUF(b1);
+                                               const char *label;
+                                               if (tb[IFA_LABEL])
+                                                       label = RTA_DATA(tb[IFA_LABEL]);
+                                               else
+                                                       label = ll_idx_n2a(ifa->ifa_index, b1);
+                                               if (fnmatch(filter.label, label, 0) != 0)
+                                                       continue;
+                                       }
+                               }
+
+                               ok = 1;
+                               break;
+                       }
+                       if (!ok)
+                               *lp = l->next;
+                       else
+                               lp = &l->next;
+               }
+       }
+
+       for (l=linfo; l; l = l->next) {
+               if (no_link || print_linkinfo(NULL, &l->h, stdout) == 0) {
+                       struct ifinfomsg *ifi = NLMSG_DATA(&l->h);
+                       if (filter.family != AF_PACKET)
+                               print_selected_addrinfo(ifi->ifi_index, ainfo, stdout);
+               }
+               fflush(stdout);
+       }
+
+       exit(0);
+}
+
+int ipaddr_list_link(int argc, char **argv)
+{
+       preferred_family = AF_PACKET;
+       do_link = 1;
+       return ipaddr_list_or_flush(argc, argv, 0);
+}
+
+void ipaddr_reset_filter(int oneline)
+{
+       memset(&filter, 0, sizeof(filter));
+       filter.oneline = oneline;
+}
+
+int default_scope(inet_prefix *lcl)
+{
+       if (lcl->family == AF_INET) {
+               if (lcl->bytelen >= 1 && *(__u8*)&lcl->data == 127)
+                       return RT_SCOPE_HOST;
+       }
+       return 0;
+}
+
+int ipaddr_modify(int cmd, int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct ifaddrmsg        ifa;
+               char                    buf[256];
+       } req;
+       char  *d = NULL;
+       char  *l = NULL;
+       inet_prefix lcl;
+       inet_prefix peer;
+       int local_len = 0;
+       int peer_len = 0;
+       int brd_len = 0;
+       int any_len = 0;
+       int scoped = 0;
+
+       memset(&req, 0, sizeof(req));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST;
+       req.n.nlmsg_type = cmd;
+       req.ifa.ifa_family = preferred_family;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "peer") == 0 ||
+                   strcmp(*argv, "remote") == 0) {
+                       NEXT_ARG();
+
+                       if (peer_len)
+                               duparg("peer", *argv);
+                       get_prefix(&peer, *argv, req.ifa.ifa_family);
+                       peer_len = peer.bytelen;
+                       if (req.ifa.ifa_family == AF_UNSPEC)
+                               req.ifa.ifa_family = peer.family;
+                       addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &peer.data, peer.bytelen);
+                       req.ifa.ifa_prefixlen = peer.bitlen;
+               } else if (matches(*argv, "broadcast") == 0 ||
+                          strcmp(*argv, "brd") == 0) {
+                       inet_prefix addr;
+                       NEXT_ARG();
+                       if (brd_len)
+                               duparg("broadcast", *argv);
+                       if (strcmp(*argv, "+") == 0)
+                               brd_len = -1;
+                       else if (strcmp(*argv, "-") == 0)
+                               brd_len = -2;
+                       else {
+                               get_addr(&addr, *argv, req.ifa.ifa_family);
+                               if (req.ifa.ifa_family == AF_UNSPEC)
+                                       req.ifa.ifa_family = addr.family;
+                               addattr_l(&req.n, sizeof(req), IFA_BROADCAST, &addr.data, addr.bytelen);
+                               brd_len = addr.bytelen;
+                       }
+               } else if (strcmp(*argv, "anycast") == 0) {
+                       inet_prefix addr;
+                       NEXT_ARG();
+                       if (any_len)
+                               duparg("anycast", *argv);
+                       get_addr(&addr, *argv, req.ifa.ifa_family);
+                       if (req.ifa.ifa_family == AF_UNSPEC)
+                               req.ifa.ifa_family = addr.family;
+                       addattr_l(&req.n, sizeof(req), IFA_ANYCAST, &addr.data, addr.bytelen);
+                       any_len = addr.bytelen;
+               } else if (strcmp(*argv, "scope") == 0) {
+                       int scope = 0;
+                       NEXT_ARG();
+                       if (rtnl_rtscope_a2n(&scope, *argv))
+                               invarg(*argv, "invalid scope value.");
+                       req.ifa.ifa_scope = scope;
+                       scoped = 1;
+               } else if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       d = *argv;
+               } else if (strcmp(*argv, "label") == 0) {
+                       NEXT_ARG();
+                       l = *argv;
+                       addattr_l(&req.n, sizeof(req), IFA_LABEL, l, strlen(l)+1);
+               } else {
+                       if (strcmp(*argv, "local") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (local_len)
+                               duparg2("local", *argv);
+                       get_prefix(&lcl, *argv, req.ifa.ifa_family);
+                       if (req.ifa.ifa_family == AF_UNSPEC)
+                               req.ifa.ifa_family = lcl.family;
+                       addattr_l(&req.n, sizeof(req), IFA_LOCAL, &lcl.data, lcl.bytelen);
+                       local_len = lcl.bytelen;
+               }
+               argc--; argv++;
+       }
+       if (d == NULL) {
+               fprintf(stderr, "Not enough information: \"dev\" argument is required.\n");
+               return -1;
+       }
+       if (l && matches(d, l) != 0) {
+               fprintf(stderr, "\"dev\" (%s) must match \"label\" (%s).\n", d, l);
+               exit(1);
+       }
+
+       if (peer_len == 0 && local_len && cmd != RTM_DELADDR) {
+               peer = lcl;
+               addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &lcl.data, lcl.bytelen);
+       }
+       if (req.ifa.ifa_prefixlen == 0)
+               req.ifa.ifa_prefixlen = lcl.bitlen;
+
+       if (brd_len < 0 && cmd != RTM_DELADDR) {
+               inet_prefix brd;
+               int i;
+               if (req.ifa.ifa_family != AF_INET) {
+                       fprintf(stderr, "Broadcast can be set only for IPv4 addresses\n");
+                       return -1;
+               }
+               brd = peer;
+               if (brd.bitlen <= 30) {
+                       for (i=31; i>=brd.bitlen; i--) {
+                               if (brd_len == -1)
+                                       brd.data[0] |= htonl(1<<(31-i));
+                               else
+                                       brd.data[0] &= ~htonl(1<<(31-i));
+                       }
+                       addattr_l(&req.n, sizeof(req), IFA_BROADCAST, &brd.data, brd.bytelen);
+                       brd_len = brd.bytelen;
+               }
+       }
+       if (!scoped && cmd != RTM_DELADDR)
+               req.ifa.ifa_scope = default_scope(&lcl);
+
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       ll_init_map(&rth);
+
+       if ((req.ifa.ifa_index = ll_name_to_index(d)) == 0) {
+               fprintf(stderr, "Cannot find device \"%s\"\n", d);
+               return -1;
+       }
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+               exit(2);
+
+       exit(0);
+}
+
+int do_ipaddr(int argc, char **argv)
+{
+       if (argc < 1)
+               return ipaddr_list_or_flush(0, NULL, 0);
+       if (matches(*argv, "add") == 0)
+               return ipaddr_modify(RTM_NEWADDR, argc-1, argv+1);
+       if (matches(*argv, "delete") == 0)
+               return ipaddr_modify(RTM_DELADDR, argc-1, argv+1);
+       if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
+           || matches(*argv, "lst") == 0)
+               return ipaddr_list_or_flush(argc-1, argv+1, 0);
+       if (matches(*argv, "flush") == 0)
+               return ipaddr_list_or_flush(argc-1, argv+1, 1);
+       if (matches(*argv, "help") == 0)
+               usage();
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip address help\".\n", *argv);
+       exit(-1);
+}
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..1fc3dcfd381a07638134e7c071cf8ad7cf15eab3 100644 (file)
@@ -0,0 +1,397 @@
+/*
+ * iplink.c            "ip link".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/if_packet.h>
+#include <linux/if_ether.h>
+#include <linux/sockios.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/sockios.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+
+
+static void usage(void) __attribute__((noreturn));
+
+void iplink_usage(void)
+{
+       fprintf(stderr, "Usage: ip link set DEVICE { up | down | arp { on | off } |\n");
+       fprintf(stderr, "                            dynamic { on | off } |\n");
+       fprintf(stderr, "                            multicast { on | off } | txqueuelen PACKETS |\n");
+       fprintf(stderr, "                            name NEWNAME |\n");
+       fprintf(stderr, "                            address LLADDR | broadcast LLADDR |\n");
+       fprintf(stderr, "                            mtu MTU }\n");
+       fprintf(stderr, "       ip link show [ DEVICE ]\n");
+       exit(-1);
+}
+
+static void usage(void)
+{
+       iplink_usage();
+}
+
+static int on_off(char *msg)
+{
+       fprintf(stderr, "Error: argument of \"%s\" must be \"on\" or \"off\"\n", msg);
+       return -1;
+}
+
+static int get_ctl_fd(void)
+{
+       int s_errno;
+       int fd;
+
+       fd = socket(PF_INET, SOCK_DGRAM, 0);
+       if (fd >= 0)
+               return fd;
+       s_errno = errno;
+       fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+       if (fd >= 0)
+               return fd;
+       fd = socket(PF_INET6, SOCK_DGRAM, 0);
+       if (fd >= 0)
+               return fd;
+       errno = s_errno;
+       perror("Cannot create control socket");
+       return -1;
+}
+
+static int do_chflags(char *dev, __u32 flags, __u32 mask)
+{
+       struct ifreq ifr;
+       int fd;
+       int err;
+
+       strcpy(ifr.ifr_name, dev);
+       fd = get_ctl_fd();
+       if (fd < 0)
+               return -1;
+       err = ioctl(fd, SIOCGIFFLAGS, &ifr);
+       if (err) {
+               perror("SIOCGIFFLAGS");
+               close(fd);
+               return -1;
+       }
+       if ((ifr.ifr_flags^flags)&mask) {
+               ifr.ifr_flags &= ~mask;
+               ifr.ifr_flags |= mask&flags;
+               err = ioctl(fd, SIOCSIFFLAGS, &ifr);
+               if (err)
+                       perror("SIOCSIFFLAGS");
+       }
+       close(fd);
+       return err;
+}
+
+static int do_changename(char *dev, char *newdev)
+{
+       struct ifreq ifr;
+       int fd;
+       int err;
+
+       strcpy(ifr.ifr_name, dev);
+       strcpy(ifr.ifr_newname, newdev);
+       fd = get_ctl_fd();
+       if (fd < 0)
+               return -1;
+       err = ioctl(fd, SIOCSIFNAME, &ifr);
+       if (err) {
+               perror("SIOCSIFNAME");
+               close(fd);
+               return -1;
+       }
+       close(fd);
+       return err;
+}
+
+static int set_qlen(char *dev, int qlen)
+{
+       struct ifreq ifr;
+       int s;
+
+       s = get_ctl_fd();
+       if (s < 0)
+               return -1;
+
+       memset(&ifr, 0, sizeof(ifr));
+       strcpy(ifr.ifr_name, dev); 
+       ifr.ifr_qlen = qlen; 
+       if (ioctl(s, SIOCSIFTXQLEN, &ifr) < 0) {
+               perror("SIOCSIFXQLEN");
+               close(s);
+               return -1;
+       }
+       close(s);
+
+       return 0; 
+}
+
+static int set_mtu(char *dev, int mtu)
+{
+       struct ifreq ifr;
+       int s;
+
+       s = get_ctl_fd();
+       if (s < 0)
+               return -1;
+
+       memset(&ifr, 0, sizeof(ifr));
+       strcpy(ifr.ifr_name, dev); 
+       ifr.ifr_mtu = mtu; 
+       if (ioctl(s, SIOCSIFMTU, &ifr) < 0) {
+               perror("SIOCSIFMTU");
+               close(s);
+               return -1;
+       }
+       close(s);
+
+       return 0; 
+}
+
+static int get_address(char *dev, int *htype)
+{
+       struct ifreq ifr;
+       struct sockaddr_ll me;
+       int alen;
+       int s;
+
+       s = socket(PF_PACKET, SOCK_DGRAM, 0);
+       if (s < 0) { 
+               perror("socket(PF_PACKET)");
+               return -1;
+       }
+
+       memset(&ifr, 0, sizeof(ifr));
+       strcpy(ifr.ifr_name, dev);
+       if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
+               perror("SIOCGIFINDEX");
+               close(s);
+               return -1;
+       }
+
+       memset(&me, 0, sizeof(me));
+       me.sll_family = AF_PACKET;
+       me.sll_ifindex = ifr.ifr_ifindex;
+       me.sll_protocol = htons(ETH_P_LOOP);
+       if (bind(s, (struct sockaddr*)&me, sizeof(me)) == -1) {
+               perror("bind");
+               close(s);
+               return -1;
+       }
+
+       alen = sizeof(me);
+       if (getsockname(s, (struct sockaddr*)&me, &alen) == -1) {
+               perror("getsockname");
+               close(s);
+               return -1;
+       }
+       close(s);
+       *htype = me.sll_hatype;
+       return me.sll_halen;
+}
+
+static int parse_address(char *dev, int hatype, int halen, char *lla, struct ifreq *ifr)
+{
+       int alen;
+
+       memset(ifr, 0, sizeof(*ifr));
+       strcpy(ifr->ifr_name, dev);
+       ifr->ifr_hwaddr.sa_family = hatype;
+       alen = ll_addr_a2n(ifr->ifr_hwaddr.sa_data, 14, lla);
+       if (alen < 0)
+               return -1;
+       if (alen != halen) {
+               fprintf(stderr, "Wrong address (%s) length: expected %d bytes\n", lla, halen);
+               return -1;
+       }
+       return 0; 
+}
+
+static int set_address(struct ifreq *ifr, int brd)
+{
+       int s;
+
+       s = get_ctl_fd();
+       if (s < 0)
+               return -1;
+       if (ioctl(s, brd?SIOCSIFHWBROADCAST:SIOCSIFHWADDR, ifr) < 0) {
+               perror(brd?"SIOCSIFHWBROADCAST":"SIOCSIFHWADDR");
+               close(s);
+               return -1;
+       }
+       close(s);
+       return 0; 
+}
+
+
+static int do_set(int argc, char **argv)
+{
+       char *dev = NULL;
+       __u32 mask = 0;
+       __u32 flags = 0;
+       int qlen = -1;
+       int mtu = -1;
+       char *newaddr = NULL;
+       char *newbrd = NULL;
+       struct ifreq ifr0, ifr1;
+       char *newname = NULL;
+       int htype, halen;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "up") == 0) {
+                       mask |= IFF_UP;
+                       flags |= IFF_UP;
+               } else if (strcmp(*argv, "down") == 0) {
+                       mask |= IFF_UP;
+                       flags &= ~IFF_UP;
+               } else if (strcmp(*argv, "name") == 0) {
+                       NEXT_ARG();
+                       newname = *argv;
+               } else if (matches(*argv, "address") == 0) {
+                       NEXT_ARG();
+                       newaddr = *argv;
+               } else if (matches(*argv, "broadcast") == 0 ||
+                          strcmp(*argv, "brd") == 0) {
+                       NEXT_ARG();
+                       newbrd = *argv;
+               } else if (matches(*argv, "txqueuelen") == 0 ||
+                          strcmp(*argv, "qlen") == 0 ||
+                          matches(*argv, "txqlen") == 0) {
+                       NEXT_ARG();
+                       if (qlen != -1)
+                               duparg("txqueuelen", *argv);
+                       if (get_integer(&qlen,  *argv, 0))
+                               invarg("Invalid \"txqueuelen\" value\n", *argv);
+               } else if (strcmp(*argv, "mtu") == 0) {
+                       NEXT_ARG();
+                       if (mtu != -1)
+                               duparg("mtu", *argv);
+                       if (get_integer(&mtu, *argv, 0))
+                               invarg("Invalid \"mtu\" value\n", *argv);
+               } else if (strcmp(*argv, "multicast") == 0) {
+                       NEXT_ARG();
+                       mask |= IFF_MULTICAST;
+                       if (strcmp(*argv, "on") == 0) {
+                               flags |= IFF_MULTICAST;
+                       } else if (strcmp(*argv, "off") == 0) {
+                               flags &= ~IFF_MULTICAST;
+                       } else
+                               return on_off("multicast");
+               } else if (strcmp(*argv, "arp") == 0) {
+                       NEXT_ARG();
+                       mask |= IFF_NOARP;
+                       if (strcmp(*argv, "on") == 0) {
+                               flags &= ~IFF_NOARP;
+                       } else if (strcmp(*argv, "off") == 0) {
+                               flags |= IFF_NOARP;
+                       } else
+                               return on_off("noarp");
+#ifdef IFF_DYNAMIC
+               } else if (matches(*argv, "dynamic") == 0) {
+                       NEXT_ARG();
+                       mask |= IFF_DYNAMIC;
+                       if (strcmp(*argv, "on") == 0) {
+                               flags |= IFF_DYNAMIC;
+                       } else if (strcmp(*argv, "off") == 0) {
+                               flags &= ~IFF_DYNAMIC;
+                       } else
+                               return on_off("dynamic");
+#endif
+               } else {
+                        if (strcmp(*argv, "dev") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (dev)
+                               duparg2("dev", *argv);
+                       dev = *argv;
+               }
+               argc--; argv++;
+       }
+
+       if (!dev) {
+               fprintf(stderr, "Not enough of information: \"dev\" argument is required.\n");
+               exit(-1);
+       }
+
+       if (newaddr || newbrd) {
+               halen = get_address(dev, &htype);
+               if (halen < 0)
+                       return -1;
+               if (newaddr) {
+                       if (parse_address(dev, htype, halen, newaddr, &ifr0) < 0)
+                               return -1;
+               }
+               if (newbrd) {
+                       if (parse_address(dev, htype, halen, newbrd, &ifr1) < 0)
+                               return -1; 
+               }
+       }
+
+       if (newname && strcmp(dev, newname)) {
+               if (do_changename(dev, newname) < 0)
+                       return -1;
+               dev = newname;
+       }
+       if (qlen != -1) { 
+               if (set_qlen(dev, qlen) < 0)
+                       return -1; 
+       }
+       if (mtu != -1) { 
+               if (set_mtu(dev, mtu) < 0)
+                       return -1; 
+       }
+       if (newaddr || newbrd) {
+               if (newbrd) {
+                       if (set_address(&ifr1, 1) < 0)
+                               return -1; 
+               }
+               if (newaddr) {
+                       if (set_address(&ifr0, 0) < 0)
+                               return -1;
+               }
+       }
+       if (mask)
+               return do_chflags(dev, flags, mask);
+       return 0;
+}
+
+int do_iplink(int argc, char **argv)
+{
+       if (argc > 0) {
+               if (matches(*argv, "set") == 0)
+                       return do_set(argc-1, argv+1);
+               if (matches(*argv, "show") == 0 ||
+                   matches(*argv, "lst") == 0 ||
+                   matches(*argv, "list") == 0)
+                       return ipaddr_list_link(argc-1, argv+1);
+               if (matches(*argv, "help") == 0)
+                       usage();
+       } else
+               return ipaddr_list_link(0, NULL);
+
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip link help\".\n", *argv);
+       exit(-1);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b2c4adc0e9e2b15261bd98fb54838f4e5cde73e5 100644 (file)
@@ -0,0 +1,342 @@
+/*
+ * ipmaddr.c           "ip maddress".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+#include <linux/sockios.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "rt_names.h"
+#include "utils.h"
+
+static struct {
+       char *dev;
+       int  family;
+} filter;
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: ip maddr [ add | del ] MULTIADDR dev STRING\n");
+       fprintf(stderr, "       ip maddr show [ dev STRING ]\n");
+       exit(-1);
+}
+
+static int parse_hex(char *str, unsigned char *addr)
+{
+       int len=0;
+
+       while (*str) {
+               int tmp;
+               if (str[1] == 0)
+                       return -1;
+               if (sscanf(str, "%02x", &tmp) != 1)
+                       return -1;
+               addr[len] = tmp;
+               len++;
+               str += 2;
+       }
+       return len;
+}
+
+struct ma_info
+{
+       struct ma_info *next;
+       int             index;
+       int             users;
+       char            *features;
+       char            name[IFNAMSIZ];
+       inet_prefix     addr;
+};
+
+void maddr_ins(struct ma_info **lst, struct ma_info *m)
+{
+       struct ma_info *mp;
+
+       for (; (mp=*lst) != NULL; lst = &mp->next) {
+               if (mp->index > m->index)
+                       break;
+       }
+       m->next = *lst;
+       *lst = m;
+}
+
+void read_dev_mcast(struct ma_info **result_p)
+{
+       char buf[256];
+       FILE *fp = fopen("/proc/net/dev_mcast", "r");
+
+       if (!fp)
+               return;
+
+       while (fgets(buf, sizeof(buf), fp)) {
+               char hexa[256];
+               struct ma_info m;
+               int len;
+               int st;
+
+               memset(&m, 0, sizeof(m));
+               sscanf(buf, "%d%s%d%d%s", &m.index, m.name, &m.users, &st,
+                      hexa);
+               if (filter.dev && strcmp(filter.dev, m.name))
+                       continue;
+
+               m.addr.family = AF_PACKET;
+
+               len = parse_hex(hexa, (unsigned char*)&m.addr.data);
+               if (len >= 0) {
+                       struct ma_info *ma = malloc(sizeof(m));
+
+                       memcpy(ma, &m, sizeof(m));
+                       ma->addr.bytelen = len;
+                       ma->addr.bitlen = len<<3;
+                       if (st)
+                               ma->features = "static";
+                       maddr_ins(result_p, ma);
+               }
+       }
+       fclose(fp);
+}
+
+void read_igmp(struct ma_info **result_p)
+{
+       struct ma_info m;
+       char buf[256];
+       FILE *fp = fopen("/proc/net/igmp", "r");
+
+       if (!fp)
+               return;
+       memset(&m, 0, sizeof(m));
+       fgets(buf, sizeof(buf), fp);
+
+       m.addr.family = AF_INET;
+       m.addr.bitlen = 32;
+       m.addr.bytelen = 4;
+
+       while (fgets(buf, sizeof(buf), fp)) {
+               struct ma_info *ma = malloc(sizeof(m));
+
+               if (buf[0] != '\t') {
+                       sscanf(buf, "%d%s", &m.index, m.name);
+                       continue;
+               }
+
+               if (filter.dev && strcmp(filter.dev, m.name))
+                       continue;
+
+               sscanf(buf, "%08x%d", (__u32*)&m.addr.data, &m.users);
+
+               ma = malloc(sizeof(m));
+               memcpy(ma, &m, sizeof(m));
+               maddr_ins(result_p, ma);
+       }
+       fclose(fp);
+}
+
+
+void read_igmp6(struct ma_info **result_p)
+{
+       char buf[256];
+       FILE *fp = fopen("/proc/net/igmp6", "r");
+
+       if (!fp)
+               return;
+
+       while (fgets(buf, sizeof(buf), fp)) {
+               char hexa[256];
+               struct ma_info m;
+               int len;
+
+               memset(&m, 0, sizeof(m));
+               sscanf(buf, "%d%s%s%d", &m.index, m.name, hexa, &m.users);
+
+               if (filter.dev && strcmp(filter.dev, m.name))
+                       continue;
+
+               m.addr.family = AF_INET6;
+
+               len = parse_hex(hexa, (unsigned char*)&m.addr.data);
+               if (len >= 0) {
+                       struct ma_info *ma = malloc(sizeof(m));
+
+                       memcpy(ma, &m, sizeof(m));
+
+                       ma->addr.bytelen = len;
+                       ma->addr.bitlen = len<<3;
+                       maddr_ins(result_p, ma);
+               }
+       }
+       fclose(fp);
+}
+
+static void print_maddr(FILE *fp, struct ma_info *list)
+{
+       fprintf(fp, "\t");
+
+       if (list->addr.family == AF_PACKET) {
+               SPRINT_BUF(b1);
+               fprintf(fp, "link  %s", ll_addr_n2a((unsigned char*)list->addr.data,
+                                                   list->addr.bytelen, 0,
+                                                   b1, sizeof(b1)));
+       } else {
+               char abuf[256];
+               switch(list->addr.family) {
+               case AF_INET:
+                       fprintf(fp, "inet  ");
+                       break;
+               case AF_INET6:
+                       fprintf(fp, "inet6 ");
+                       break;
+               default:
+                       fprintf(fp, "family %d ", list->addr.family);
+                       break;
+               }
+               fprintf(fp, "%s", 
+                       format_host(list->addr.family,
+                                   -1,
+                                   list->addr.data,
+                                   abuf, sizeof(abuf)));
+       }
+       if (list->users != 1)
+               fprintf(fp, " users %d", list->users);
+       if (list->features)
+               fprintf(fp, " %s", list->features);
+       fprintf(fp, "\n");
+}
+
+static void print_mlist(FILE *fp, struct ma_info *list)
+{
+       int cur_index = 0;
+
+       for (; list; list = list->next) {
+               if (oneline) {
+                       cur_index = list->index;
+                       fprintf(fp, "%d:\t%s%s", cur_index, list->name, _SL_);
+               } else if (cur_index != list->index) {
+                       cur_index = list->index;
+                       fprintf(fp, "%d:\t%s\n", cur_index, list->name);
+               }
+               print_maddr(fp, list);
+       }
+}
+
+static int multiaddr_list(int argc, char **argv)
+{
+       struct ma_info *list = NULL;
+
+       if (!filter.family)
+               filter.family = preferred_family;
+
+       while (argc > 0) {
+               if (1) {
+                       if (strcmp(*argv, "dev") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (filter.dev)
+                               duparg2("dev", *argv);
+                       filter.dev = *argv;
+               }
+               argv++; argc--;
+       }
+
+       if (!filter.family || filter.family == AF_PACKET)
+               read_dev_mcast(&list);
+       if (!filter.family || filter.family == AF_INET)
+               read_igmp(&list);
+       if (!filter.family || filter.family == AF_INET6)
+               read_igmp6(&list);
+       print_mlist(stdout, list);
+       return 0;
+}
+
+int multiaddr_modify(int cmd, int argc, char **argv)
+{
+       struct ifreq ifr;
+       int fd;
+
+       memset(&ifr, 0, sizeof(ifr));
+
+       if (cmd == RTM_NEWADDR)
+               cmd = SIOCADDMULTI;
+       else
+               cmd = SIOCDELMULTI;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       if (ifr.ifr_name[0])
+                               duparg("dev", *argv);
+                       strncpy(ifr.ifr_name, *argv, IFNAMSIZ);
+               } else {
+                       if (matches(*argv, "address") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (ifr.ifr_hwaddr.sa_data[0])
+                               duparg("address", *argv);
+                       if (ll_addr_a2n(ifr.ifr_hwaddr.sa_data, 14, *argv) < 0) {
+                               fprintf(stderr, "Error: \"%s\" is not a legal ll address.\n", *argv);
+                               exit(1);
+                       }
+               }
+               argc--; argv++;
+       }
+       if (ifr.ifr_name[0] == 0) {
+               fprintf(stderr, "Not enough information: \"dev\" is required.\n");
+               exit(-1);
+       }
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd < 0) {
+               perror("Cannot create socket");
+               exit(1);
+       }
+       if (ioctl(fd, cmd, (char*)&ifr) != 0) {
+               perror("ioctl");
+               exit(1);
+       }
+       close(fd);
+
+       exit(0);
+}
+
+
+int do_multiaddr(int argc, char **argv)
+{
+       if (argc < 1)
+               return multiaddr_list(0, NULL);
+       if (matches(*argv, "add") == 0)
+               return multiaddr_modify(RTM_NEWADDR, argc-1, argv+1);
+       if (matches(*argv, "delete") == 0)
+               return multiaddr_modify(RTM_DELADDR, argc-1, argv+1);
+       if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
+           || matches(*argv, "lst") == 0)
+               return multiaddr_list(argc-1, argv+1);
+       if (matches(*argv, "help") == 0)
+               usage();
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip maddr help\".\n", *argv);
+       exit(-1);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9ed6bbafecb7b94f0b3e6ef50c0ac45b7d2f34f5 100644 (file)
@@ -0,0 +1,152 @@
+/*
+ * ipmonitor.c         "ip monitor".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <time.h>
+
+#include "utils.h"
+#include "ip_common.h"
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ]\n");
+       exit(-1);
+}
+
+
+int accept_msg(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+
+       if (n->nlmsg_type == RTM_NEWROUTE || n->nlmsg_type == RTM_DELROUTE) {
+               print_route(who, n, arg);
+               return 0;
+       }
+       if (n->nlmsg_type == RTM_NEWLINK || n->nlmsg_type == RTM_DELLINK) {
+               ll_remember_index(who, n, NULL);
+               print_linkinfo(who, n, arg);
+               return 0;
+       }
+       if (n->nlmsg_type == RTM_NEWADDR || n->nlmsg_type == RTM_DELADDR) {
+               print_addrinfo(who, n, arg);
+               return 0;
+       }
+       if (n->nlmsg_type == RTM_NEWNEIGH || n->nlmsg_type == RTM_DELNEIGH) {
+               print_neigh(who, n, arg);
+               return 0;
+       }
+       if (n->nlmsg_type == 15) {
+               char *tstr;
+               time_t secs = ((__u32*)NLMSG_DATA(n))[0];
+               long usecs = ((__u32*)NLMSG_DATA(n))[1];
+               tstr = asctime(localtime(&secs));
+               tstr[strlen(tstr)-1] = 0;
+               fprintf(fp, "Timestamp: %s %lu us\n", tstr, usecs);
+               return 0;
+       }
+       if (n->nlmsg_type == RTM_NEWQDISC ||
+           n->nlmsg_type == RTM_DELQDISC ||
+           n->nlmsg_type == RTM_NEWTCLASS ||
+           n->nlmsg_type == RTM_DELTCLASS ||
+           n->nlmsg_type == RTM_NEWTFILTER ||
+           n->nlmsg_type == RTM_DELTFILTER)
+               return 0;
+       if (n->nlmsg_type != NLMSG_ERROR && n->nlmsg_type != NLMSG_NOOP &&
+           n->nlmsg_type != NLMSG_DONE) {
+               fprintf(fp, "Unknown message: %08x %08x %08x\n",
+                       n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+       }
+       return 0;
+}
+
+int do_ipmonitor(int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       char *file = NULL;
+       unsigned groups = ~RTMGRP_TC;
+       int llink=0;
+       int laddr=0;
+       int lroute=0;
+
+       ipaddr_reset_filter(1);
+       iproute_reset_filter();
+       ipneigh_reset_filter();
+
+       while (argc > 0) {
+               if (matches(*argv, "file") == 0) {
+                       NEXT_ARG();
+                       file = *argv;
+               } else if (matches(*argv, "link") == 0) {
+                       llink=1;
+                       groups = 0;
+               } else if (matches(*argv, "address") == 0) {
+                       laddr=1;
+                       groups = 0;
+               } else if (matches(*argv, "route") == 0) {
+                       lroute=1;
+                       groups = 0;
+               } else if (strcmp(*argv, "all") == 0) {
+                       groups = ~RTMGRP_TC;
+               } else if (matches(*argv, "help") == 0) {
+                       usage();
+               } else {
+                       fprintf(stderr, "Argument \"%s\" is unknown, try \"ip monitor help\".\n", *argv);
+                       exit(-1);
+               }
+               argc--; argv++;
+       }
+
+       if (llink)
+               groups |= RTMGRP_LINK;
+       if (laddr) {
+               if (!preferred_family || preferred_family == AF_INET)
+                       groups |= RTMGRP_IPV4_IFADDR;
+               if (!preferred_family || preferred_family == AF_INET6)
+                       groups |= RTMGRP_IPV6_IFADDR;
+       }
+       if (lroute) {
+               if (!preferred_family || preferred_family == AF_INET)
+                       groups |= RTMGRP_IPV4_ROUTE;
+               if (!preferred_family || preferred_family == AF_INET6)
+                       groups |= RTMGRP_IPV6_ROUTE;
+       }
+
+       if (file) {
+               FILE *fp;
+               fp = fopen(file, "r");
+               if (fp == NULL) {
+                       perror("Cannot fopen");
+                       exit(-1);
+               }
+               return rtnl_from_file(fp, accept_msg, (void*)stdout);
+       }
+
+       if (rtnl_open(&rth, groups) < 0)
+               exit(1);
+
+       ll_init_map(&rth);
+
+       if (rtnl_listen(&rth, accept_msg, (void*)stdout) < 0)
+               exit(2);
+
+       exit(0);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..01e876bcc159d3baadb017e97511c90621ba3785 100644 (file)
@@ -0,0 +1,204 @@
+/*
+ * ipmroute.c          "ip mroute".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/netdevice.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+#include <linux/sockios.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+
+char filter_dev[16];
+int  filter_family;
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: ip mroute show [ PREFIX ] [ from PREFIX ] [ iif DEVICE ]\n");
+#if 0
+       fprintf(stderr, "Usage: ip mroute [ add | del ] DESTINATION from SOURCE [ iif DEVICE ] [ oif DEVICE ]\n");
+#endif
+       exit(-1);
+}
+
+char *viftable[32];
+
+struct rtfilter
+{
+       inet_prefix mdst;
+       inet_prefix msrc;
+} filter;
+
+void read_viftable(void)
+{
+       char buf[256];
+       FILE *fp = fopen("/proc/net/ip_mr_vif", "r");
+
+       if (!fp)
+               return;
+
+       fgets(buf, sizeof(buf), fp);
+
+       while (fgets(buf, sizeof(buf), fp)) {
+               int vifi;
+               char dev[256];
+
+               if (sscanf(buf, "%d%s", &vifi, dev) < 2)
+                       continue;
+               
+               if (vifi<0 || vifi>31)
+                       continue;
+
+               viftable[vifi] = strdup(dev);
+       }
+       fclose(fp);
+}
+
+void read_mroute_list(FILE *ofp)
+{
+       char buf[256];
+       FILE *fp = fopen("/proc/net/ip_mr_cache", "r");
+
+       if (!fp)
+               return;
+
+       fgets(buf, sizeof(buf), fp);
+
+       while (fgets(buf, sizeof(buf), fp)) {
+               inet_prefix maddr, msrc;
+               unsigned pkts, b, w;
+               int vifi;
+               char oiflist[256];
+               char sbuf[256];
+               char mbuf[256];
+               char obuf[256];
+
+               oiflist[0] = 0;
+               if (sscanf(buf, "%x%x%d%u%u%u%s", maddr.data, msrc.data, &vifi,
+                          &pkts, &b, &w, oiflist) < 6)
+                       continue;
+
+               if (vifi!=-1 && (vifi < 0 || vifi>31))
+                       continue;
+
+               if (filter_dev[0] && (vifi<0 || strcmp(filter_dev, viftable[vifi])))
+                       continue;
+               if (filter.mdst.family && inet_addr_match(&maddr, &filter.mdst, filter.mdst.bitlen))
+                       continue;
+               if (filter.msrc.family && inet_addr_match(&msrc, &filter.msrc, filter.msrc.bitlen))
+                       continue;
+
+               snprintf(obuf, sizeof(obuf), "(%s, %s)", 
+                        format_host(AF_INET, 4, &msrc.data[0], sbuf, sizeof(sbuf)),
+                        format_host(AF_INET, 4, &maddr.data[0], mbuf, sizeof(mbuf)));
+
+               fprintf(ofp, "%-32s Iif: ", obuf);
+
+               if (vifi == -1)
+                       fprintf(ofp, "unresolved ");
+               else
+                       fprintf(ofp, "%-10s ", viftable[vifi]);
+
+               if (oiflist[0]) {
+                       char *next = NULL;
+                       char *p = oiflist;
+                       int ovifi, ottl;
+
+                       fprintf(ofp, "Oifs: ");
+
+                       while (p) {
+                               next = strchr(p, ' ');
+                               if (next) {
+                                       *next = 0;
+                                       next++;
+                               }
+                               if (sscanf(p, "%d:%d", &ovifi, &ottl)<2) {
+                                       p = next;
+                                       continue;
+                               }
+                               p = next;
+
+                               fprintf(ofp, "%s", viftable[ovifi]);
+                               if (ottl>1)
+                                       fprintf(ofp, "(ttl %d) ", ovifi);
+                               else
+                                       fprintf(ofp, " ");
+                       }
+               }
+
+               if (show_stats && b) {
+                       fprintf(ofp, "%s  %u packets, %u bytes", _SL_, pkts, b);
+                       if (w)
+                               fprintf(ofp, ", %u arrived on wrong iif.", w);
+               }
+               fprintf(ofp, "\n");
+       }
+       fclose(fp);
+}
+
+
+static int mroute_list(int argc, char **argv)
+{
+       while (argc > 0) {
+               if (strcmp(*argv, "iif") == 0) {
+                       NEXT_ARG();
+                       strncpy(filter_dev, *argv, sizeof(filter_dev)-1);
+               } else if (matches(*argv, "from") == 0) {
+                       NEXT_ARG();
+                       get_prefix(&filter.msrc, *argv, AF_INET);
+               } else {
+                       if (strcmp(*argv, "to") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       get_prefix(&filter.mdst, *argv, AF_INET);
+               }
+               argv++; argc--;
+       }
+
+       read_viftable();
+       read_mroute_list(stdout);
+       return 0;
+}
+
+int do_multiroute(int argc, char **argv)
+{
+       if (argc < 1)
+               return mroute_list(0, NULL);
+#if 0
+       if (matches(*argv, "add") == 0)
+               return mroute_modify(RTM_NEWADDR, argc-1, argv+1);
+       if (matches(*argv, "delete") == 0)
+               return mroute_modify(RTM_DELADDR, argc-1, argv+1);
+       if (matches(*argv, "get") == 0)
+               return mroute_get(argc-1, argv+1);
+#endif
+       if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
+           || matches(*argv, "lst") == 0)
+               return mroute_list(argc-1, argv+1);
+       if (matches(*argv, "help") == 0)
+               usage();
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip mroute help\".\n", *argv);
+       exit(-1);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f8c27900d5abadc76199a0c5870dd9f3dea93812 100644 (file)
@@ -0,0 +1,484 @@
+/*
+ * ipneigh.c           "ip neigh".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *
+ * Changes:
+ *
+ * Rani Assaf <rani@magic.metawire.com> 980929:        resolve addresses
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+
+#define NUD_VALID      (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
+
+static struct
+{
+       int family;
+        int index;
+       int state;
+       int unused_only;
+       inet_prefix pfx;
+       int flushed;
+       char *flushb;
+       int flushp;
+       int flushe;
+       struct rtnl_handle *rth;
+} filter;
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: ip neigh { add | del | change | replace } { ADDR [ lladdr LLADDR ]\n"
+                       "          [ nud { permanent | noarp | stale | reachable } ]\n"
+                       "          | proxy ADDR } [ dev DEV ]\n");
+       fprintf(stderr, "       ip neigh {show|flush} [ to PREFIX ] [ dev DEV ] [ nud STATE ]\n");
+       exit(-1);
+}
+
+int nud_state_a2n(unsigned *state, char *arg)
+{
+       if (matches(arg, "permanent") == 0)
+               *state = NUD_PERMANENT;
+       else if (matches(arg, "reachable") == 0)
+               *state = NUD_REACHABLE;
+       else if (strcmp(arg, "noarp") == 0)
+               *state = NUD_NOARP;
+       else if (strcmp(arg, "none") == 0)
+               *state = NUD_NONE;
+       else if (strcmp(arg, "stale") == 0)
+               *state = NUD_STALE;
+       else if (strcmp(arg, "incomplete") == 0)
+               *state = NUD_INCOMPLETE;
+       else if (strcmp(arg, "delay") == 0)
+               *state = NUD_DELAY;
+       else if (strcmp(arg, "probe") == 0)
+               *state = NUD_PROBE;
+       else if (matches(arg, "failed") == 0)
+               *state = NUD_FAILED;
+       else {
+               if (get_unsigned(state, arg, 0))
+                       return -1;
+               if (*state>=0x100 || (*state&((*state)-1)))
+                       return -1;
+       }
+       return 0;
+}
+
+char * nud_state_n2a(__u8 state, char *buf, int len)
+{
+       switch (state) {
+       case NUD_NONE:  
+               return "none";
+       case NUD_INCOMPLETE:    
+               return "incomplete";
+       case NUD_REACHABLE:     
+               return "reachable";
+       case NUD_STALE: 
+               return "stale";
+       case NUD_DELAY: 
+               return "delay";
+       case NUD_PROBE: 
+               return "probe";
+       case NUD_FAILED:        
+               return "failed";
+       case NUD_NOARP: 
+               return "noarp";
+       case NUD_PERMANENT:     
+               return "permanent";
+       default:        
+               snprintf(buf, len, "%x", state);
+               return buf;
+       }
+}
+
+static int flush_update(void)
+{
+       if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) {
+               perror("Failed to send flush request\n");
+               return -1;
+       }
+       filter.flushp = 0;
+       return 0;
+}
+
+
+static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct ndmsg            ndm;
+               char                    buf[256];
+       } req;
+       char  *d = NULL;
+       int dst_ok = 0;
+       int lladdr_ok = 0;
+       char * lla = NULL;
+       inet_prefix dst;
+
+       memset(&req, 0, sizeof(req));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST|flags;
+       req.n.nlmsg_type = cmd;
+       req.ndm.ndm_family = preferred_family;
+       req.ndm.ndm_state = NUD_PERMANENT;
+
+       while (argc > 0) {
+               if (matches(*argv, "lladdr") == 0) {
+                       NEXT_ARG();
+                       if (lladdr_ok)
+                               duparg("lladdr", *argv);
+                       lla = *argv;
+                       lladdr_ok = 1;
+               } else if (strcmp(*argv, "nud") == 0) {
+                       unsigned state;
+                       NEXT_ARG();
+                       if (nud_state_a2n(&state, *argv))
+                               invarg("nud state is bad", *argv);
+                       req.ndm.ndm_state = state;
+               } else if (matches(*argv, "proxy") == 0) {
+                       NEXT_ARG();
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (dst_ok)
+                               duparg("address", *argv);
+                       get_addr(&dst, *argv, preferred_family);
+                       dst_ok = 1;
+                       req.ndm.ndm_flags |= NTF_PROXY;
+               } else if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       d = *argv;
+               } else {
+                       if (strcmp(*argv, "to") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (dst_ok)
+                               duparg2("to", *argv);
+                       get_addr(&dst, *argv, preferred_family);
+                       dst_ok = 1;
+               }
+               argc--; argv++;
+       }
+       if (d == NULL || !dst_ok || dst.family == AF_UNSPEC) {
+               fprintf(stderr, "Device and destination are required arguments.\n");
+               exit(-1);
+       }
+       req.ndm.ndm_family = dst.family;
+       addattr_l(&req.n, sizeof(req), NDA_DST, &dst.data, dst.bytelen);
+
+       if (lla && strcmp(lla, "null")) {
+               __u8 llabuf[16];
+               int l;
+
+               l = ll_addr_a2n(llabuf, sizeof(llabuf), lla);
+               addattr_l(&req.n, sizeof(req), NDA_LLADDR, llabuf, l);
+       }
+
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       ll_init_map(&rth);
+
+       if ((req.ndm.ndm_ifindex = ll_name_to_index(d)) == 0) {
+               fprintf(stderr, "Cannot find device \"%s\"\n", d);
+               return -1;
+       }
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+               exit(2);
+
+       exit(0);
+}
+
+
+int print_neigh(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct ndmsg *r = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr * tb[NDA_MAX+1];
+       char abuf[256];
+
+       if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH) {
+               fprintf(stderr, "Not RTM_NEWNEIGH: %08x %08x %08x\n",
+                       n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+               
+               return 0;
+       }
+       len -= NLMSG_LENGTH(sizeof(*r));
+       if (len < 0) {
+               fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+               return -1;
+       }
+
+       if (filter.flushb && n->nlmsg_type != RTM_NEWNEIGH)
+               return 0;
+
+       if (filter.family && filter.family != r->ndm_family)
+               return 0;
+       if (filter.index && filter.index != r->ndm_ifindex)
+               return 0;
+       if (!(filter.state&r->ndm_state) &&
+           (r->ndm_state || !(filter.state&0x100)) &&
+             (r->ndm_family != AF_DECnet))
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, NDA_MAX, NDA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+
+       if (tb[NDA_DST]) {
+               if (filter.pfx.family) {
+                       inet_prefix dst;
+                       memset(&dst, 0, sizeof(dst));
+                       dst.family = r->ndm_family;
+                       memcpy(&dst.data, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
+                       if (inet_addr_match(&dst, &filter.pfx, filter.pfx.bitlen))
+                               return 0;
+               }
+       }
+       if (filter.unused_only && tb[NDA_CACHEINFO]) {
+               struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]);
+               if (ci->ndm_refcnt)
+                       return 0;
+       }
+
+       if (filter.flushb) {
+               struct nlmsghdr *fn;
+               if (NLMSG_ALIGN(filter.flushp) + n->nlmsg_len > filter.flushe) {
+                       if (flush_update())
+                               return -1;
+               }
+               fn = (struct nlmsghdr*)(filter.flushb + NLMSG_ALIGN(filter.flushp));
+               memcpy(fn, n, n->nlmsg_len);
+               fn->nlmsg_type = RTM_DELNEIGH;
+               fn->nlmsg_flags = NLM_F_REQUEST;
+               fn->nlmsg_seq = ++filter.rth->seq;
+               filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb;
+               filter.flushed++;
+               if (show_stats < 2)
+                       return 0;
+       }
+
+       if (tb[NDA_DST]) {
+               fprintf(fp, "%s ", 
+                       format_host(r->ndm_family,
+                                   RTA_PAYLOAD(tb[NDA_DST]),
+                                   RTA_DATA(tb[NDA_DST]),
+                                   abuf, sizeof(abuf)));
+       }
+       if (!filter.index && r->ndm_ifindex)
+               fprintf(fp, "dev %s ", ll_index_to_name(r->ndm_ifindex));
+       if (tb[NDA_LLADDR]) {
+               SPRINT_BUF(b1);
+               fprintf(fp, "lladdr %s", ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]),
+                                             RTA_PAYLOAD(tb[NDA_LLADDR]),
+                                             ll_index_to_type(r->ndm_ifindex),
+                                             b1, sizeof(b1)));
+       }
+       if (r->ndm_flags & NTF_ROUTER) {
+               fprintf(fp, " router");
+       }
+       if (tb[NDA_CACHEINFO] && show_stats) {
+               static int hz;
+               struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]);
+               if (!hz)
+                       hz = get_hz();
+               if (ci->ndm_refcnt)
+                       printf(" ref %d", ci->ndm_refcnt);
+               fprintf(fp, " used %d/%d/%d", ci->ndm_used/hz,
+                      ci->ndm_confirmed/hz, ci->ndm_updated/hz);
+       }
+
+       if (r->ndm_state) {
+               SPRINT_BUF(b1);
+               fprintf(fp, " nud %s", nud_state_n2a(r->ndm_state, b1, sizeof(b1)));
+       }
+       fprintf(fp, "\n");
+
+       fflush(fp);
+       return 0;
+}
+
+void ipneigh_reset_filter()
+{
+       memset(&filter, 0, sizeof(filter));
+       filter.state = ~0;
+}
+
+int do_show_or_flush(int argc, char **argv, int flush)
+{
+       char *filter_dev = NULL;
+       struct rtnl_handle rth;
+       int state_given = 0;
+
+       ipneigh_reset_filter();
+
+       if (!filter.family)
+               filter.family = preferred_family;
+
+       if (flush) {
+               if (argc <= 0) {
+                       fprintf(stderr, "Flush requires arguments.\n");
+                       return -1;
+               }
+               filter.state = ~(NUD_PERMANENT|NUD_NOARP);
+       } else
+               filter.state = 0xFF & ~NUD_NOARP;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       if (filter_dev)
+                               duparg("dev", *argv);
+                       filter_dev = *argv;
+               } else if (strcmp(*argv, "unused") == 0) {
+                       filter.unused_only = 1;
+               } else if (strcmp(*argv, "nud") == 0) {
+                       unsigned state;
+                       NEXT_ARG();
+                       if (!state_given) {
+                               state_given = 1;
+                               filter.state = 0;
+                       }
+                       if (nud_state_a2n(&state, *argv)) {
+                               if (strcmp(*argv, "all") != 0)
+                                       invarg("nud state is bad", *argv);
+                               state = ~0;
+                               if (flush)
+                                       state &= ~NUD_NOARP;
+                       }
+                       if (state == 0)
+                               state = 0x100;
+                       filter.state |= state;
+               } else {
+                       if (strcmp(*argv, "to") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       get_prefix(&filter.pfx, *argv, filter.family);
+                       if (filter.family == AF_UNSPEC)
+                               filter.family = filter.pfx.family;
+               }
+               argc--; argv++;
+       }
+
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       ll_init_map(&rth);
+
+       if (filter_dev) {
+               if ((filter.index = ll_name_to_index(filter_dev)) == 0) {
+                       fprintf(stderr, "Cannot find device \"%s\"\n", filter_dev);
+                       return -1;
+               }
+       }
+
+       if (flush) {
+               int round = 0;
+               char flushb[4096-512];
+
+               filter.flushb = flushb;
+               filter.flushp = 0;
+               filter.flushe = sizeof(flushb);
+               filter.rth = &rth;
+               filter.state &= ~NUD_FAILED;
+
+               for (;;) {
+                       if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNEIGH) < 0) {
+                               perror("Cannot send dump request");
+                               exit(1);
+                       }
+                       filter.flushed = 0;
+                       if (rtnl_dump_filter(&rth, print_neigh, stdout, NULL, NULL) < 0) {
+                               fprintf(stderr, "Flush terminated\n");
+                               exit(1);
+                       }
+                       if (filter.flushed == 0) {
+                               if (round == 0) {
+                                       fprintf(stderr, "Nothing to flush.\n");
+                               } else if (show_stats)
+                                       printf("*** Flush is complete after %d round%s ***\n", round, round>1?"s":"");
+                               fflush(stdout);
+                               return 0;
+                       }
+                       round++;
+                       if (flush_update() < 0)
+                               exit(1);
+                       if (show_stats) {
+                               printf("\n*** Round %d, deleting %d entries ***\n", round, filter.flushed);
+                               fflush(stdout);
+                       }
+               }
+       }
+
+       if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNEIGH) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       if (rtnl_dump_filter(&rth, print_neigh, stdout, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+
+       return 0;
+}
+
+int do_ipneigh(int argc, char **argv)
+{
+       if (argc > 0) {
+               if (matches(*argv, "add") == 0)
+                       return ipneigh_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_EXCL, argc-1, argv+1);
+               if (matches(*argv, "change") == 0 ||
+                   strcmp(*argv, "chg") == 0)
+                       return ipneigh_modify(RTM_NEWNEIGH, NLM_F_REPLACE, argc-1, argv+1);
+               if (matches(*argv, "replace") == 0)
+                       return ipneigh_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);
+               if (matches(*argv, "delete") == 0)
+                       return ipneigh_modify(RTM_DELNEIGH, 0, argc-1, argv+1);
+               if (matches(*argv, "get") == 0) {
+                       fprintf(stderr, "Sorry, \"neigh get\" is not implemented :-(\n");
+                       return -1;
+               }
+               if (matches(*argv, "show") == 0 ||
+                   matches(*argv, "lst") == 0 ||
+                   matches(*argv, "list") == 0)
+                       return do_show_or_flush(argc-1, argv+1, 0);
+               if (matches(*argv, "flush") == 0)
+                       return do_show_or_flush(argc-1, argv+1, 1);
+               if (matches(*argv, "help") == 0)
+                       usage();
+       } else
+               return do_show_or_flush(0, NULL, 0);
+
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip neigh help\".\n", *argv);
+       exit(-1);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..404f8e0e3e120734ab900781ecbd5edc735d19a7 100644 (file)
+/*
+ * iproute.c           "ip route".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *
+ * Changes:
+ *
+ * Rani Assaf <rani@magic.metawire.com> 980929:        resolve addresses
+ * Kunihiro Ishiguro <kunihiro@zebra.org> 001102: rtnh_ifindex was not initialized
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+#include <linux/in_route.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+
+#ifndef RTAX_RTTVAR
+#define RTAX_RTTVAR RTAX_HOPS
+#endif
+
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: ip route { list | flush } SELECTOR\n");
+       fprintf(stderr, "       ip route get ADDRESS [ from ADDRESS iif STRING ]\n");
+       fprintf(stderr, "                            [ oif STRING ]  [ tos TOS ]\n");
+       fprintf(stderr, "       ip route { add | del | change | append | replace | monitor } ROUTE\n");
+       fprintf(stderr, "SELECTOR := [ root PREFIX ] [ match PREFIX ] [ exact PREFIX ]\n");
+       fprintf(stderr, "            [ table TABLE_ID ] [ proto RTPROTO ]\n");
+       fprintf(stderr, "            [ type TYPE ] [ scope SCOPE ]\n");
+       fprintf(stderr, "ROUTE := NODE_SPEC [ INFO_SPEC ]\n");
+       fprintf(stderr, "NODE_SPEC := [ TYPE ] PREFIX [ tos TOS ]\n");
+       fprintf(stderr, "             [ table TABLE_ID ] [ proto RTPROTO ]\n");
+       fprintf(stderr, "             [ scope SCOPE ] [ metric METRIC ]\n");
+       fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
+       fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
+       fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n");
+       fprintf(stderr, "           [ rtt NUMBER ] [ rttvar NUMBER ]\n");
+       fprintf(stderr, "           [ window NUMBER] [ cwnd NUMBER ] [ ssthresh REALM ]\n");
+       fprintf(stderr, "           [ realms REALM ]\n");
+       fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n");
+       fprintf(stderr, "          unreachable | prohibit | blackhole | nat ]\n");
+       fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n");
+       fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n");
+       fprintf(stderr, "FLAGS := [ equalize ]\n");
+       fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n");
+       fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n");
+       exit(-1);
+}
+
+
+static struct
+{
+       int tb;
+       int flushed;
+       char *flushb;
+       int flushp;
+       int flushe;
+       struct rtnl_handle *rth;
+       int protocol, protocolmask;
+       int scope, scopemask;
+       int type, typemask;
+       int tos, tosmask;
+       int iif, iifmask;
+       int oif, oifmask;
+       int realm, realmmask;
+       inet_prefix rprefsrc;
+       inet_prefix rvia;
+       inet_prefix rdst;
+       inet_prefix mdst;
+       inet_prefix rsrc;
+       inet_prefix msrc;
+} filter;
+
+static int flush_update(void)
+{
+       if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) {
+               perror("Failed to send flush request\n");
+               return -1;
+       }
+       filter.flushp = 0;
+       return 0;
+}
+
+int print_route(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct rtmsg *r = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr * tb[RTA_MAX+1];
+       char abuf[256];
+       inet_prefix dst;
+       inet_prefix src;
+       inet_prefix prefsrc;
+       inet_prefix via;
+       int host_len = -1;
+       SPRINT_BUF(b1);
+       
+
+       if (n->nlmsg_type != RTM_NEWROUTE && n->nlmsg_type != RTM_DELROUTE) {
+               fprintf(stderr, "Not a route: %08x %08x %08x\n",
+                       n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+               return 0;
+       }
+       if (filter.flushb && n->nlmsg_type != RTM_NEWROUTE)
+               return 0;
+       len -= NLMSG_LENGTH(sizeof(*r));
+       if (len < 0) {
+               fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+               return -1;
+       }
+
+       if (r->rtm_family == AF_INET6)
+               host_len = 128;
+       else if (r->rtm_family == AF_INET)
+               host_len = 32;
+       else if (r->rtm_family == AF_DECnet)
+               host_len = 16;
+       else if (r->rtm_family == AF_IPX)
+               host_len = 80;
+
+       if (r->rtm_family == AF_INET6) {
+               if (filter.tb) {
+                       if (filter.tb < 0) {
+                               if (!(r->rtm_flags&RTM_F_CLONED))
+                                       return 0;
+                       } else {
+                               if (r->rtm_flags&RTM_F_CLONED)
+                                       return 0;
+                               if (filter.tb == RT_TABLE_LOCAL) {
+                                       if (r->rtm_type != RTN_LOCAL)
+                                               return 0;
+                               } else if (filter.tb == RT_TABLE_MAIN) {
+                                       if (r->rtm_type == RTN_LOCAL)
+                                               return 0;
+                               } else {
+                                       return 0;
+                               }
+                       }
+               }
+       } else {
+               if (filter.tb > 0 && filter.tb != r->rtm_table)
+                       return 0;
+       }
+       if ((filter.protocol^r->rtm_protocol)&filter.protocolmask)
+               return 0;
+       if ((filter.scope^r->rtm_scope)&filter.scopemask)
+               return 0;
+       if ((filter.type^r->rtm_type)&filter.typemask)
+               return 0;
+       if ((filter.tos^r->rtm_tos)&filter.tosmask)
+               return 0;
+       if (filter.rdst.family &&
+           (r->rtm_family != filter.rdst.family || filter.rdst.bitlen > r->rtm_dst_len))
+               return 0;
+       if (filter.mdst.family &&
+           (r->rtm_family != filter.mdst.family ||
+            (filter.mdst.bitlen >= 0 && filter.mdst.bitlen < r->rtm_dst_len)))
+               return 0;
+       if (filter.rsrc.family &&
+           (r->rtm_family != filter.rsrc.family || filter.rsrc.bitlen > r->rtm_src_len))
+               return 0;
+       if (filter.msrc.family &&
+           (r->rtm_family != filter.msrc.family ||
+            (filter.msrc.bitlen >= 0 && filter.msrc.bitlen < r->rtm_src_len)))
+               return 0;
+       if (filter.rvia.family && r->rtm_family != filter.rvia.family)
+               return 0;
+       if (filter.rprefsrc.family && r->rtm_family != filter.rprefsrc.family)
+               return 0;
+
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);
+
+       memset(&dst, 0, sizeof(dst));
+       dst.family = r->rtm_family;
+       if (tb[RTA_DST])
+               memcpy(&dst.data, RTA_DATA(tb[RTA_DST]), (r->rtm_dst_len+7)/8);
+       if (filter.rsrc.family || filter.msrc.family) {
+               memset(&src, 0, sizeof(src));
+               src.family = r->rtm_family;
+               if (tb[RTA_SRC])
+                       memcpy(&src.data, RTA_DATA(tb[RTA_SRC]), (r->rtm_src_len+7)/8);
+       }
+       if (filter.rvia.bitlen>0) {
+               memset(&via, 0, sizeof(via));
+               via.family = r->rtm_family;
+               if (tb[RTA_GATEWAY])
+                       memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len);
+       }
+       if (filter.rprefsrc.bitlen>0) {
+               memset(&prefsrc, 0, sizeof(prefsrc));
+               prefsrc.family = r->rtm_family;
+               if (tb[RTA_PREFSRC])
+                       memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), host_len);
+       }
+
+       if (filter.rdst.family && inet_addr_match(&dst, &filter.rdst, filter.rdst.bitlen))
+               return 0;
+       if (filter.mdst.family && filter.mdst.bitlen >= 0 &&
+           inet_addr_match(&dst, &filter.mdst, r->rtm_dst_len))
+               return 0;
+
+       if (filter.rsrc.family && inet_addr_match(&src, &filter.rsrc, filter.rsrc.bitlen))
+               return 0;
+       if (filter.msrc.family && filter.msrc.bitlen >= 0 &&
+           inet_addr_match(&src, &filter.msrc, r->rtm_src_len))
+               return 0;
+
+       if (filter.rvia.family && inet_addr_match(&via, &filter.rvia, filter.rvia.bitlen))
+               return 0;
+       if (filter.rprefsrc.family && inet_addr_match(&prefsrc, &filter.rprefsrc, filter.rprefsrc.bitlen))
+               return 0;
+       if (filter.realmmask) {
+               __u32 realms = 0;
+               if (tb[RTA_FLOW])
+                       realms = *(__u32*)RTA_DATA(tb[RTA_FLOW]);
+               if ((realms^filter.realm)&filter.realmmask)
+                       return 0;
+       }
+       if (filter.iifmask) {
+               int iif = 0;
+               if (tb[RTA_IIF])
+                       iif = *(int*)RTA_DATA(tb[RTA_IIF]);
+               if ((iif^filter.iif)&filter.iifmask)
+                       return 0;
+       }
+       if (filter.oifmask) {
+               int oif = 0;
+               if (tb[RTA_OIF])
+                       oif = *(int*)RTA_DATA(tb[RTA_OIF]);
+               if ((oif^filter.oif)&filter.oifmask)
+                       return 0;
+       }
+       if (filter.flushb && 
+           r->rtm_family == AF_INET6 &&
+           r->rtm_dst_len == 0 &&
+           r->rtm_type == RTN_UNREACHABLE &&
+           tb[RTA_PRIORITY] &&
+           *(int*)RTA_DATA(tb[RTA_PRIORITY]) == -1)
+               return 0;
+
+       if (filter.flushb) {
+               struct nlmsghdr *fn;
+               if (NLMSG_ALIGN(filter.flushp) + n->nlmsg_len > filter.flushe) {
+                       if (flush_update())
+                               return -1;
+               }
+               fn = (struct nlmsghdr*)(filter.flushb + NLMSG_ALIGN(filter.flushp));
+               memcpy(fn, n, n->nlmsg_len);
+               fn->nlmsg_type = RTM_DELROUTE;
+               fn->nlmsg_flags = NLM_F_REQUEST;
+               fn->nlmsg_seq = ++filter.rth->seq;
+               filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb;
+               filter.flushed++;
+               if (show_stats < 2)
+                       return 0;
+       }
+
+       if (n->nlmsg_type == RTM_DELROUTE)
+               fprintf(fp, "Deleted ");
+       if (r->rtm_type != RTN_UNICAST && !filter.type)
+               fprintf(fp, "%s ", rtnl_rtntype_n2a(r->rtm_type, b1, sizeof(b1)));
+
+       if (tb[RTA_DST]) {
+               if (r->rtm_dst_len != host_len) {
+                       fprintf(fp, "%s/%u ", rt_addr_n2a(r->rtm_family,
+                                                        RTA_PAYLOAD(tb[RTA_DST]),
+                                                        RTA_DATA(tb[RTA_DST]),
+                                                        abuf, sizeof(abuf)),
+                               r->rtm_dst_len
+                               );
+               } else {
+                       fprintf(fp, "%s ", format_host(r->rtm_family,
+                                                      RTA_PAYLOAD(tb[RTA_DST]),
+                                                      RTA_DATA(tb[RTA_DST]),
+                                                      abuf, sizeof(abuf))
+                               );
+               }
+       } else if (r->rtm_dst_len) {
+               fprintf(fp, "0/%d ", r->rtm_dst_len);
+       } else {
+               fprintf(fp, "default ");
+       }
+       if (tb[RTA_SRC]) {
+               if (r->rtm_src_len != host_len) {
+                       fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
+                                                        RTA_PAYLOAD(tb[RTA_SRC]),
+                                                        RTA_DATA(tb[RTA_SRC]),
+                                                        abuf, sizeof(abuf)),
+                               r->rtm_src_len
+                               );
+               } else {
+                       fprintf(fp, "from %s ", format_host(r->rtm_family,
+                                                      RTA_PAYLOAD(tb[RTA_SRC]),
+                                                      RTA_DATA(tb[RTA_SRC]),
+                                                      abuf, sizeof(abuf))
+                               );
+               }
+       } else if (r->rtm_src_len) {
+               fprintf(fp, "from 0/%u ", r->rtm_src_len);
+       }
+       if (r->rtm_tos && filter.tosmask != -1) {
+               SPRINT_BUF(b1);
+               fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
+       }
+       if (tb[RTA_GATEWAY] && filter.rvia.bitlen != host_len) {
+               fprintf(fp, "via %s ", 
+                       format_host(r->rtm_family,
+                                   RTA_PAYLOAD(tb[RTA_GATEWAY]),
+                                   RTA_DATA(tb[RTA_GATEWAY]),
+                                   abuf, sizeof(abuf)));
+       }
+       if (tb[RTA_OIF] && filter.oifmask != -1)
+               fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
+
+       if (!(r->rtm_flags&RTM_F_CLONED)) {
+               if (r->rtm_table != RT_TABLE_MAIN && !filter.tb)
+                       fprintf(fp, " table %s ", rtnl_rttable_n2a(r->rtm_table, b1, sizeof(b1)));
+               if (r->rtm_protocol != RTPROT_BOOT && filter.protocolmask != -1)
+                       fprintf(fp, " proto %s ", rtnl_rtprot_n2a(r->rtm_protocol, b1, sizeof(b1)));
+               if (r->rtm_scope != RT_SCOPE_UNIVERSE && filter.scopemask != -1)
+                       fprintf(fp, " scope %s ", rtnl_rtscope_n2a(r->rtm_scope, b1, sizeof(b1)));
+       }
+       if (tb[RTA_PREFSRC] && filter.rprefsrc.bitlen != host_len) {
+               /* Do not use format_host(). It is our local addr
+                  and symbolic name will not be useful.
+                */
+               fprintf(fp, " src %s ", 
+                       rt_addr_n2a(r->rtm_family,
+                                   RTA_PAYLOAD(tb[RTA_PREFSRC]),
+                                   RTA_DATA(tb[RTA_PREFSRC]),
+                                   abuf, sizeof(abuf)));
+       }
+       if (tb[RTA_PRIORITY])
+               fprintf(fp, " metric %d ", *(__u32*)RTA_DATA(tb[RTA_PRIORITY]));
+       if (r->rtm_flags & RTNH_F_DEAD)
+               fprintf(fp, "dead ");
+       if (r->rtm_flags & RTNH_F_ONLINK)
+               fprintf(fp, "onlink ");
+       if (r->rtm_flags & RTNH_F_PERVASIVE)
+               fprintf(fp, "pervasive ");
+       if (r->rtm_flags & RTM_F_EQUALIZE)
+               fprintf(fp, "equalize ");
+       if (r->rtm_flags & RTM_F_NOTIFY)
+               fprintf(fp, "notify ");
+
+       if (tb[RTA_FLOW] && filter.realmmask != ~0U) {
+               __u32 to = *(__u32*)RTA_DATA(tb[RTA_FLOW]);
+               __u32 from = to>>16;
+               to &= 0xFFFF;
+               fprintf(fp, "realm%s ", from ? "s" : "");
+               if (from) {
+                       fprintf(fp, "%s/",
+                               rtnl_rtrealm_n2a(from, b1, sizeof(b1)));
+               }
+               fprintf(fp, "%s ",
+                       rtnl_rtrealm_n2a(to, b1, sizeof(b1)));
+       }
+       if ((r->rtm_flags&RTM_F_CLONED) && r->rtm_family == AF_INET) {
+               __u32 flags = r->rtm_flags&~0xFFFF;
+               int first = 1;
+
+               fprintf(fp, "%s    cache ", _SL_);
+
+#define PRTFL(fl,flname) if (flags&RTCF_##fl) { \
+  flags &= ~RTCF_##fl; \
+  fprintf(fp, "%s" flname "%s", first ? "<" : "", flags ? "," : "> "); \
+  first = 0; }
+               PRTFL(LOCAL, "local");
+               PRTFL(REJECT, "reject");
+               PRTFL(MULTICAST, "mc");
+               PRTFL(BROADCAST, "brd");
+               PRTFL(DNAT, "dst-nat");
+               PRTFL(SNAT, "src-nat");
+               PRTFL(MASQ, "masq");
+               PRTFL(DIRECTDST, "dst-direct");
+               PRTFL(DIRECTSRC, "src-direct");
+               PRTFL(REDIRECTED, "redirected");
+               PRTFL(DOREDIRECT, "redirect");
+               PRTFL(FAST, "fastroute");
+               PRTFL(NOTIFY, "notify");
+               PRTFL(TPROXY, "proxy");
+#ifdef RTCF_EQUALIZE
+               PRTFL(EQUALIZE, "equalize");
+#endif
+               if (flags)
+                       fprintf(fp, "%s%x> ", first ? "<" : "", flags);
+               if (tb[RTA_CACHEINFO]) {
+                       struct rta_cacheinfo *ci = RTA_DATA(tb[RTA_CACHEINFO]);
+                       static int hz;
+                       if (!hz)
+                               hz = get_hz();
+                       if (ci->rta_expires != 0)
+                               fprintf(fp, " expires %dsec", ci->rta_expires/hz);
+                       if (ci->rta_error != 0)
+                               fprintf(fp, " error %d", ci->rta_error);
+                       if (show_stats) {
+                               if (ci->rta_clntref)
+                                       fprintf(fp, " users %d", ci->rta_clntref);
+                               if (ci->rta_used != 0)
+                                       fprintf(fp, " used %d", ci->rta_used);
+                               if (ci->rta_lastuse != 0)
+                                       fprintf(fp, " age %dsec", ci->rta_lastuse/hz);
+                       }
+#ifdef RTNETLINK_HAVE_PEERINFO
+                       if (ci->rta_id)
+                               fprintf(fp, " ipid 0x%04x", ci->rta_id);
+                       if (ci->rta_ts || ci->rta_tsage)
+                               fprintf(fp, " ts 0x%x tsage %dsec", ci->rta_ts, ci->rta_tsage);
+#endif
+               }
+       } else if (r->rtm_family == AF_INET6) {
+               struct rta_cacheinfo *ci = NULL;
+               if (tb[RTA_CACHEINFO])
+                       ci = RTA_DATA(tb[RTA_CACHEINFO]);
+               if ((r->rtm_flags & RTM_F_CLONED) || (ci && ci->rta_expires)) {
+                       static int hz;
+                       if (!hz)
+                               hz = get_hz();
+                       if (r->rtm_flags & RTM_F_CLONED)
+                               fprintf(fp, "%s    cache ", _SL_);
+                       if (ci->rta_expires)
+                               fprintf(fp, " expires %dsec", ci->rta_expires/hz);
+                       if (ci->rta_error != 0)
+                               fprintf(fp, " error %d", ci->rta_error);
+                       if (show_stats) {
+                               if (ci->rta_clntref)
+                                       fprintf(fp, " users %d", ci->rta_clntref);
+                               if (ci->rta_used != 0)
+                                       fprintf(fp, " used %d", ci->rta_used);
+                               if (ci->rta_lastuse != 0)
+                                       fprintf(fp, " age %dsec", ci->rta_lastuse/hz);
+                       }
+               } else if (ci) {
+                       if (ci->rta_error != 0)
+                               fprintf(fp, " error %d", ci->rta_error);
+               }
+       }
+       if (tb[RTA_METRICS]) {
+               int i;
+               unsigned mxlock = 0;
+               struct rtattr *mxrta[RTAX_MAX+1];
+
+               memset(mxrta, 0, sizeof(mxrta));
+
+               parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
+                           RTA_PAYLOAD(tb[RTA_METRICS]));
+               if (mxrta[RTAX_LOCK])
+                       mxlock = *(unsigned*)RTA_DATA(mxrta[RTAX_LOCK]);
+
+               for (i=2; i<=RTAX_MAX; i++) {
+                       static char *mx_names[] = 
+                       {
+                               "mtu",
+                               "window",
+                               "rtt",
+                               "rttvar",
+                               "ssthresh",
+                               "cwnd",
+                               "advmss",
+                               "reordering",
+                       };
+                       static int hz;
+                       if (mxrta[i] == NULL)
+                               continue;
+                       if (!hz)
+                               hz = get_hz();
+                       if (i-2 < sizeof(mx_names)/sizeof(char*))
+                               fprintf(fp, " %s", mx_names[i-2]);
+                       else
+                               fprintf(fp, " metric%d", i);
+                       if (mxlock & (1<<i))
+                               fprintf(fp, " lock");
+
+                       if (i != RTAX_RTT && i != RTAX_RTTVAR)
+                               fprintf(fp, " %u", *(unsigned*)RTA_DATA(mxrta[i]));
+                       else {
+                               unsigned val = *(unsigned*)RTA_DATA(mxrta[i]);
+
+                               val *= 1000;
+                               if (i == RTAX_RTT)
+                                       val /= 8;
+                               else
+                                       val /= 4;
+                               if (val >= hz)
+                                       fprintf(fp, " %ums", val/hz);
+                               else
+                                       fprintf(fp, " %.2fms", (float)val/hz);
+                       }
+               }
+       }
+       if (tb[RTA_IIF] && filter.iifmask != -1) {
+               fprintf(fp, " iif %s", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_IIF])));
+       }
+       if (tb[RTA_MULTIPATH]) {
+               struct rtnexthop *nh = RTA_DATA(tb[RTA_MULTIPATH]);
+               int first = 0;
+
+               len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
+
+               for (;;) {
+                       if (len < sizeof(*nh))
+                               break;
+                       if (nh->rtnh_len > len)
+                               break;
+                       if (r->rtm_flags&RTM_F_CLONED && r->rtm_type == RTN_MULTICAST) {
+                               if (first)
+                                       fprintf(fp, " Oifs:");
+                               else
+                                       fprintf(fp, " ");
+                       } else
+                               fprintf(fp, "%s\tnexthop", _SL_);
+                       if (nh->rtnh_len > sizeof(*nh)) {
+                               memset(tb, 0, sizeof(tb));
+                               parse_rtattr(tb, RTA_MAX, RTNH_DATA(nh), nh->rtnh_len - sizeof(*nh));
+                               if (tb[RTA_GATEWAY]) {
+                                       fprintf(fp, " via %s ", 
+                                               format_host(r->rtm_family,
+                                                           RTA_PAYLOAD(tb[RTA_GATEWAY]),
+                                                           RTA_DATA(tb[RTA_GATEWAY]),
+                                                           abuf, sizeof(abuf)));
+                               }
+                       }
+                       if (r->rtm_flags&RTM_F_CLONED && r->rtm_type == RTN_MULTICAST) {
+                               fprintf(fp, " %s", ll_index_to_name(nh->rtnh_ifindex));
+                               if (nh->rtnh_hops != 1)
+                                       fprintf(fp, "(ttl>%d)", nh->rtnh_hops);
+                       } else {
+                               fprintf(fp, " dev %s", ll_index_to_name(nh->rtnh_ifindex));
+                               fprintf(fp, " weight %d", nh->rtnh_hops+1);
+                       }
+                       if (nh->rtnh_flags & RTNH_F_DEAD)
+                               fprintf(fp, " dead");
+                       if (nh->rtnh_flags & RTNH_F_ONLINK)
+                               fprintf(fp, " onlink");
+                       if (nh->rtnh_flags & RTNH_F_PERVASIVE)
+                               fprintf(fp, " pervasive");
+                       len -= NLMSG_ALIGN(nh->rtnh_len);
+                       nh = RTNH_NEXT(nh);
+               }
+       }
+       fprintf(fp, "\n");
+       fflush(fp);
+       return 0;
+}
+
+
+int parse_one_nh(struct rtattr *rta, struct rtnexthop *rtnh, int *argcp, char ***argvp)
+{
+       int argc = *argcp;
+       char **argv = *argvp;
+
+       while (++argv, --argc > 0) {
+               if (strcmp(*argv, "via") == 0) {
+                       NEXT_ARG();
+                       rta_addattr32(rta, 4096, RTA_GATEWAY, get_addr32(*argv));
+                       rtnh->rtnh_len += sizeof(struct rtattr) + 4;
+               } else if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       if ((rtnh->rtnh_ifindex = ll_name_to_index(*argv)) == 0) {
+                               fprintf(stderr, "Cannot find device \"%s\"\n", *argv);
+                               exit(1);
+                       }
+               } else if (strcmp(*argv, "weight") == 0) {
+                       unsigned w;
+                       NEXT_ARG();
+                       if (get_unsigned(&w, *argv, 0) || w == 0 || w > 256)
+                               invarg("\"weight\" is invalid\n", *argv);
+                       rtnh->rtnh_hops = w - 1;
+               } else if (strcmp(*argv, "onlink") == 0) {
+                       rtnh->rtnh_flags |= RTNH_F_ONLINK;
+               } else
+                       break;
+       }
+       *argcp = argc;
+       *argvp = argv;
+       return 0;
+}
+
+int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, int argc, char **argv)
+{
+       char buf[1024];
+       struct rtattr *rta = (void*)buf;
+       struct rtnexthop *rtnh;
+
+       rta->rta_type = RTA_MULTIPATH;
+       rta->rta_len = RTA_LENGTH(0);
+       rtnh = RTA_DATA(rta);
+
+       while (argc > 0) {
+               if (strcmp(*argv, "nexthop") != 0) {
+                       fprintf(stderr, "Error: \"nexthop\" or end of line is expected instead of \"%s\"\n", *argv);
+                       exit(-1);
+               }
+               if (argc <= 1) {
+                       fprintf(stderr, "Error: unexpected end of line after \"nexthop\"\n");
+                       exit(-1);
+               }
+               memset(rtnh, 0, sizeof(*rtnh));
+               rtnh->rtnh_len = sizeof(*rtnh);
+               rta->rta_len += rtnh->rtnh_len;
+               parse_one_nh(rta, rtnh, &argc, &argv);
+               rtnh = RTNH_NEXT(rtnh);
+       }
+
+       if (rta->rta_len > RTA_LENGTH(0))
+               addattr_l(n, 1024, RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta));
+       return 0;
+}
+
+
+int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct rtmsg            r;
+               char                    buf[1024];
+       } req;
+       char  mxbuf[256];
+       struct rtattr * mxrta = (void*)mxbuf;
+       unsigned mxlock = 0;
+       char  *d = NULL;
+       int gw_ok = 0;
+       int dst_ok = 0;
+       int nhs_ok = 0;
+       int scope_ok = 0;
+       int table_ok = 0;
+       int proto_ok = 0;
+       int type_ok = 0;
+
+       memset(&req, 0, sizeof(req));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST|flags;
+       req.n.nlmsg_type = cmd;
+       req.r.rtm_family = preferred_family;
+       req.r.rtm_table = RT_TABLE_MAIN;
+       req.r.rtm_scope = RT_SCOPE_NOWHERE;
+
+       if (cmd != RTM_DELROUTE) {
+               req.r.rtm_protocol = RTPROT_BOOT;
+               req.r.rtm_scope = RT_SCOPE_UNIVERSE;
+               req.r.rtm_type = RTN_UNICAST;
+       }
+
+       mxrta->rta_type = RTA_METRICS;
+       mxrta->rta_len = RTA_LENGTH(0);
+
+       while (argc > 0) {
+               if (strcmp(*argv, "src") == 0) {
+                       inet_prefix addr;
+                       NEXT_ARG();
+                       get_addr(&addr, *argv, req.r.rtm_family);
+                       if (req.r.rtm_family == AF_UNSPEC)
+                               req.r.rtm_family = addr.family;
+                       addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &addr.data, addr.bytelen);
+               } else if (strcmp(*argv, "via") == 0) {
+                       inet_prefix addr;
+                       gw_ok = 1;
+                       NEXT_ARG();
+                       get_addr(&addr, *argv, req.r.rtm_family);
+                       if (req.r.rtm_family == AF_UNSPEC)
+                               req.r.rtm_family = addr.family;
+                       addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+               } else if (strcmp(*argv, "from") == 0) {
+                       inet_prefix addr;
+                       NEXT_ARG();
+                       get_prefix(&addr, *argv, req.r.rtm_family);
+                       if (req.r.rtm_family == AF_UNSPEC)
+                               req.r.rtm_family = addr.family;
+                       if (addr.bytelen)
+                               addattr_l(&req.n, sizeof(req), RTA_SRC, &addr.data, addr.bytelen);
+                       req.r.rtm_src_len = addr.bitlen;
+               } else if (strcmp(*argv, "tos") == 0 ||
+                          matches(*argv, "dsfield") == 0) {
+                       __u32 tos;
+                       NEXT_ARG();
+                       if (rtnl_dsfield_a2n(&tos, *argv))
+                               invarg("\"tos\" value is invalid\n", *argv);
+                       req.r.rtm_tos = tos;
+               } else if (matches(*argv, "metric") == 0 ||
+                          matches(*argv, "priority") == 0 ||
+                          matches(*argv, "preference") == 0) {
+                       __u32 metric;
+                       NEXT_ARG();
+                       if (get_u32(&metric, *argv, 0))
+                               invarg("\"metric\" value is invalid\n", *argv);
+                       addattr32(&req.n, sizeof(req), RTA_PRIORITY, metric);
+               } else if (strcmp(*argv, "scope") == 0) {
+                       int scope = 0;
+                       NEXT_ARG();
+                       if (rtnl_rtscope_a2n(&scope, *argv))
+                               invarg("invalid \"scope\" value\n", *argv);
+                       req.r.rtm_scope = scope;
+                       scope_ok = 1;
+               } else if (strcmp(*argv, "mtu") == 0) {
+                       unsigned mtu;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "lock") == 0) {
+                               mxlock |= (1<<RTAX_MTU);
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&mtu, *argv, 0))
+                               invarg("\"mtu\" value is invalid\n", *argv);
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_MTU, mtu);
+#ifdef RTAX_ADVMSS
+               } else if (strcmp(*argv, "advmss") == 0) {
+                       unsigned mss;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "lock") == 0) {
+                               mxlock |= (1<<RTAX_ADVMSS);
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&mss, *argv, 0))
+                               invarg("\"mss\" value is invalid\n", *argv);
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_ADVMSS, mss);
+#endif
+#ifdef RTAX_REORDERING
+               } else if (matches(*argv, "reordering") == 0) {
+                       unsigned reord;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "lock") == 0) {
+                               mxlock |= (1<<RTAX_REORDERING);
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&reord, *argv, 0))
+                               invarg("\"reordering\" value is invalid\n", *argv);
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_REORDERING, reord);
+#endif
+               } else if (strcmp(*argv, "rtt") == 0) {
+                       unsigned rtt;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "lock") == 0) {
+                               mxlock |= (1<<RTAX_RTT);
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&rtt, *argv, 0))
+                               invarg("\"rtt\" value is invalid\n", *argv);
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_RTT, rtt);
+               } else if (matches(*argv, "window") == 0) {
+                       unsigned win;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "lock") == 0) {
+                               mxlock |= (1<<RTAX_WINDOW);
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&win, *argv, 0))
+                               invarg("\"window\" value is invalid\n", *argv);
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_WINDOW, win);
+               } else if (matches(*argv, "cwnd") == 0) {
+                       unsigned win;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "lock") == 0) {
+                               mxlock |= (1<<RTAX_CWND);
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&win, *argv, 0))
+                               invarg("\"cwnd\" value is invalid\n", *argv);
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_CWND, win);
+               } else if (matches(*argv, "rttvar") == 0) {
+                       unsigned win;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "lock") == 0) {
+                               mxlock |= (1<<RTAX_RTTVAR);
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&win, *argv, 0))
+                               invarg("\"rttvar\" value is invalid\n", *argv);
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_RTTVAR, win);
+               } else if (matches(*argv, "ssthresh") == 0) {
+                       unsigned win;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "lock") == 0) {
+                               mxlock |= (1<<RTAX_SSTHRESH);
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&win, *argv, 0))
+                               invarg("\"ssthresh\" value is invalid\n", *argv);
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_SSTHRESH, win);
+               } else if (matches(*argv, "realms") == 0) {
+                       __u32 realm;
+                       NEXT_ARG();
+                       if (get_rt_realms(&realm, *argv))
+                               invarg("\"realm\" value is invalid\n", *argv);
+                       addattr32(&req.n, sizeof(req), RTA_FLOW, realm);
+               } else if (strcmp(*argv, "onlink") == 0) {
+                       req.r.rtm_flags |= RTNH_F_ONLINK;
+               } else if (matches(*argv, "equalize") == 0 ||
+                          strcmp(*argv, "eql") == 0) {
+                       req.r.rtm_flags |= RTM_F_EQUALIZE;
+               } else if (strcmp(*argv, "nexthop") == 0) {
+                       nhs_ok = 1;
+                       break;
+               } else if (matches(*argv, "protocol") == 0) {
+                       int prot;
+                       NEXT_ARG();
+                       if (rtnl_rtprot_a2n(&prot, *argv))
+                               invarg("\"protocol\" value is invalid\n", *argv);
+                       req.r.rtm_protocol = prot;
+                       proto_ok =1;
+               } else if (matches(*argv, "table") == 0) {
+                       int tid;
+                       NEXT_ARG();
+                       if (rtnl_rttable_a2n(&tid, *argv))
+                               invarg("\"table\" value is invalid\n", *argv);
+                       req.r.rtm_table = tid;
+                       table_ok = 1;
+               } else if (strcmp(*argv, "dev") == 0 ||
+                          strcmp(*argv, "oif") == 0) {
+                       NEXT_ARG();
+                       d = *argv;
+               } else {
+                       int type;
+                       inet_prefix dst;
+
+                       if (strcmp(*argv, "to") == 0) {
+                               NEXT_ARG();
+                       }
+                       if ((**argv < '0' || **argv > '9') &&
+                           rtnl_rtntype_a2n(&type, *argv) == 0) {
+                               NEXT_ARG();
+                               req.r.rtm_type = type;
+                               type_ok = 1;
+                       }
+
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (dst_ok)
+                               duparg2("to", *argv);
+                       get_prefix(&dst, *argv, req.r.rtm_family);
+                       if (req.r.rtm_family == AF_UNSPEC)
+                               req.r.rtm_family = dst.family;
+                       req.r.rtm_dst_len = dst.bitlen;
+                       dst_ok = 1;
+                       if (dst.bytelen)
+                               addattr_l(&req.n, sizeof(req), RTA_DST, &dst.data, dst.bytelen);
+               }
+               argc--; argv++;
+       }
+
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       if (d || nhs_ok)  {
+               int idx;
+
+               ll_init_map(&rth);
+
+               if (d) {
+                       if ((idx = ll_name_to_index(d)) == 0) {
+                               fprintf(stderr, "Cannot find device \"%s\"\n", d);
+                               return -1;
+                       }
+                       addattr32(&req.n, sizeof(req), RTA_OIF, idx);
+               }
+       }
+
+       if (mxrta->rta_len > RTA_LENGTH(0)) {
+               if (mxlock)
+                       rta_addattr32(mxrta, sizeof(mxbuf), RTAX_LOCK, mxlock);
+               addattr_l(&req.n, sizeof(req), RTA_METRICS, RTA_DATA(mxrta), RTA_PAYLOAD(mxrta));
+       }
+
+       if (nhs_ok)
+               parse_nexthops(&req.n, &req.r, argc, argv);
+
+       if (!table_ok) {
+               if (req.r.rtm_type == RTN_LOCAL ||
+                   req.r.rtm_type == RTN_BROADCAST ||
+                   req.r.rtm_type == RTN_NAT ||
+                   req.r.rtm_type == RTN_ANYCAST)
+                       req.r.rtm_table = RT_TABLE_LOCAL;
+       }
+       if (!scope_ok) {
+               if (req.r.rtm_type == RTN_LOCAL ||
+                   req.r.rtm_type == RTN_NAT)
+                       req.r.rtm_scope = RT_SCOPE_HOST;
+               else if (req.r.rtm_type == RTN_BROADCAST ||
+                        req.r.rtm_type == RTN_MULTICAST ||
+                        req.r.rtm_type == RTN_ANYCAST)
+                       req.r.rtm_scope = RT_SCOPE_LINK;
+               else if (req.r.rtm_type == RTN_UNICAST ||
+                        req.r.rtm_type == RTN_UNSPEC) {
+                       if (cmd == RTM_DELROUTE)
+                               req.r.rtm_scope = RT_SCOPE_NOWHERE;
+                       else if (!gw_ok && !nhs_ok)
+                               req.r.rtm_scope = RT_SCOPE_LINK;
+               }
+       }
+
+       if (req.r.rtm_family == AF_UNSPEC)
+               req.r.rtm_family = AF_INET;
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+               exit(2);
+
+       return 0;
+}
+
+static int rtnl_rtcache_request(struct rtnl_handle *rth, int family)
+{
+       struct {
+               struct nlmsghdr nlh;
+               struct rtmsg rtm;
+       } req;
+       struct sockaddr_nl nladdr;
+
+       memset(&nladdr, 0, sizeof(nladdr));
+       memset(&req, 0, sizeof(req));
+       nladdr.nl_family = AF_NETLINK;
+
+       req.nlh.nlmsg_len = sizeof(req);
+       req.nlh.nlmsg_type = RTM_GETROUTE;
+       req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_REQUEST;
+       req.nlh.nlmsg_pid = 0;
+       req.nlh.nlmsg_seq = rth->dump = ++rth->seq;
+       req.rtm.rtm_family = family;
+       req.rtm.rtm_flags |= RTM_F_CLONED;
+
+       return sendto(rth->fd, (void*)&req, sizeof(req), 0, (struct sockaddr*)&nladdr, sizeof(nladdr));
+}
+
+static int iproute_flush_cache(void)
+{
+#define ROUTE_FLUSH_PATH "/proc/sys/net/ipv4/route/flush"
+
+       int len;
+       int flush_fd = open (ROUTE_FLUSH_PATH, O_WRONLY);
+       char *buffer = "-1";
+
+       if (flush_fd < 0) {
+               fprintf (stderr, "Cannot open \"%s\"\n", ROUTE_FLUSH_PATH);
+               return -1;
+       }
+
+       len = strlen (buffer);
+               
+       if ((write (flush_fd, (void *)buffer, len)) < len) {
+               fprintf (stderr, "Cannot flush routing cache\n");
+               return -1;
+       }
+       close(flush_fd);
+       return 0;
+}
+
+
+static int iproute_list_or_flush(int argc, char **argv, int flush)
+{
+       int do_ipv6 = preferred_family;
+       struct rtnl_handle rth;
+       char *id = NULL;
+       char *od = NULL;
+
+       iproute_reset_filter();
+       filter.tb = RT_TABLE_MAIN;
+
+       if (flush && argc <= 0) {
+               fprintf(stderr, "\"ip route flush\" requires arguments.\n");
+               return -1;
+       }
+
+       while (argc > 0) {
+               if (matches(*argv, "table") == 0) {
+                       int tid;
+                       NEXT_ARG();
+                       if (rtnl_rttable_a2n(&tid, *argv)) {
+                               if (strcmp(*argv, "all") == 0) {
+                                       tid = 0;
+                               } else if (strcmp(*argv, "cache") == 0) {
+                                       tid = -1;
+                               } else if (strcmp(*argv, "help") == 0) {
+                                       usage();
+                               } else {
+                                       invarg("table id value is invalid\n", *argv);
+                               }
+                       }
+                       filter.tb = tid;
+               } else if (matches(*argv, "cached") == 0 ||
+                          matches(*argv, "cloned") == 0) {
+                       filter.tb = -1;
+               } else if (strcmp(*argv, "tos") == 0 ||
+                          matches(*argv, "dsfield") == 0) {
+                       __u32 tos;
+                       NEXT_ARG();
+                       if (rtnl_dsfield_a2n(&tos, *argv))
+                               invarg("TOS value is invalid\n", *argv);
+                       filter.tos = tos;
+                       filter.tosmask = -1;
+               } else if (matches(*argv, "protocol") == 0) {
+                       int prot = 0;
+                       NEXT_ARG();
+                       filter.protocolmask = -1;
+                       if (rtnl_rtprot_a2n(&prot, *argv)) {
+                               if (strcmp(*argv, "all") != 0)
+                                       invarg("invalid \"protocol\"\n", *argv);
+                               prot = 0;
+                               filter.protocolmask = 0;
+                       }
+                       filter.protocol = prot;
+               } else if (matches(*argv, "scope") == 0) {
+                       int scope = 0;
+                       NEXT_ARG();
+                       filter.scopemask = -1;
+                       if (rtnl_rtscope_a2n(&scope, *argv)) {
+                               if (strcmp(*argv, "all") != 0)
+                                       invarg("invalid \"scope\"\n", *argv);
+                               scope = RT_SCOPE_NOWHERE;
+                               filter.scopemask = 0;
+                       }
+                       filter.scope = scope;
+               } else if (matches(*argv, "type") == 0) {
+                       int type;
+                       NEXT_ARG();
+                       filter.typemask = -1;
+                       if (rtnl_rtntype_a2n(&type, *argv))
+                               invarg("node type value is invalid\n", *argv);
+                       filter.type = type;
+               } else if (strcmp(*argv, "dev") == 0 ||
+                          strcmp(*argv, "oif") == 0) {
+                       NEXT_ARG();
+                       od = *argv;
+               } else if (strcmp(*argv, "iif") == 0) {
+                       NEXT_ARG();
+                       id = *argv;
+               } else if (strcmp(*argv, "via") == 0) {
+                       NEXT_ARG();
+                       get_prefix(&filter.rvia, *argv, do_ipv6);
+               } else if (strcmp(*argv, "src") == 0) {
+                       NEXT_ARG();
+                       get_prefix(&filter.rprefsrc, *argv, do_ipv6);
+               } else if (matches(*argv, "realms") == 0) {
+                       __u32 realm;
+                       NEXT_ARG();
+                       if (get_rt_realms(&realm, *argv))
+                               invarg("invalid realms\n", *argv);
+                       filter.realm = realm;
+                       filter.realmmask = ~0U;
+                       if ((filter.realm&0xFFFF) == 0 &&
+                           (*argv)[strlen(*argv) - 1] == '/')
+                               filter.realmmask &= ~0xFFFF;
+                       if ((filter.realm&0xFFFF0000U) == 0 &&
+                           (strchr(*argv, '/') == NULL ||
+                            (*argv)[0] == '/'))
+                               filter.realmmask &= ~0xFFFF0000U;
+               } else if (matches(*argv, "from") == 0) {
+                       NEXT_ARG();
+                       if (matches(*argv, "root") == 0) {
+                               NEXT_ARG();
+                               get_prefix(&filter.rsrc, *argv, do_ipv6);
+                       } else if (matches(*argv, "match") == 0) {
+                               NEXT_ARG();
+                               get_prefix(&filter.msrc, *argv, do_ipv6);
+                       } else {
+                               if (matches(*argv, "exact") == 0) {
+                                       NEXT_ARG();
+                               }
+                               get_prefix(&filter.msrc, *argv, do_ipv6);
+                               filter.rsrc = filter.msrc;
+                       }
+               } else {
+                       if (matches(*argv, "to") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "root") == 0) {
+                               NEXT_ARG();
+                               get_prefix(&filter.rdst, *argv, do_ipv6);
+                       } else if (matches(*argv, "match") == 0) {
+                               NEXT_ARG();
+                               get_prefix(&filter.mdst, *argv, do_ipv6);
+                       } else {
+                               if (matches(*argv, "exact") == 0) {
+                                       NEXT_ARG();
+                               }
+                               get_prefix(&filter.mdst, *argv, do_ipv6);
+                               filter.rdst = filter.mdst;
+                       }
+               }
+               argc--; argv++;
+       }
+
+       if (do_ipv6 == AF_UNSPEC && filter.tb)
+               do_ipv6 = AF_INET;
+
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       ll_init_map(&rth);
+
+       if (id || od)  {
+               int idx;
+
+               if (id) {
+                       if ((idx = ll_name_to_index(id)) == 0) {
+                               fprintf(stderr, "Cannot find device \"%s\"\n", id);
+                               return -1;
+                       }
+                       filter.iif = idx;
+                       filter.iifmask = -1;
+               }
+               if (od) {
+                       if ((idx = ll_name_to_index(od)) == 0) {
+                               fprintf(stderr, "Cannot find device \"%s\"\n", od);
+                               return -1;
+                       }
+                       filter.oif = idx;
+                       filter.oifmask = -1;
+               }
+       }
+
+       if (flush) {
+               int round = 0;
+               char flushb[4096-512];
+
+               if (filter.tb == -1) {
+                       if (do_ipv6 != AF_INET6) {
+                               iproute_flush_cache();
+                               if (show_stats)
+                                       printf("*** IPv4 routing cache is flushed.\n");
+                       }
+                       if (do_ipv6 == AF_INET)
+                               return 0;
+               }
+
+               filter.flushb = flushb;
+               filter.flushp = 0;
+               filter.flushe = sizeof(flushb);
+               filter.rth = &rth;
+
+               for (;;) {
+                       if (rtnl_wilddump_request(&rth, do_ipv6, RTM_GETROUTE) < 0) {
+                               perror("Cannot send dump request");
+                               exit(1);
+                       }
+                       filter.flushed = 0;
+                       if (rtnl_dump_filter(&rth, print_route, stdout, NULL, NULL) < 0) {
+                               fprintf(stderr, "Flush terminated\n");
+                               exit(1);
+                       }
+                       if (filter.flushed == 0) {
+                               if (round == 0) {
+                                       if (filter.tb != -1 || do_ipv6 == AF_INET6)
+                                               fprintf(stderr, "Nothing to flush.\n");
+                               } else if (show_stats)
+                                       printf("*** Flush is complete after %d round%s ***\n", round, round>1?"s":"");
+                               fflush(stdout);
+                               return 0;
+                       }
+                       round++;
+                       if (flush_update() < 0)
+                               exit(1);
+                       if (show_stats) {
+                               printf("\n*** Round %d, deleting %d entries ***\n", round, filter.flushed);
+                               fflush(stdout);
+                       }
+               }
+       }
+
+       if (filter.tb != -1) {
+               if (rtnl_wilddump_request(&rth, do_ipv6, RTM_GETROUTE) < 0) {
+                       perror("Cannot send dump request");
+                       exit(1);
+               }
+       } else {
+               if (rtnl_rtcache_request(&rth, do_ipv6) < 0) {
+                       perror("Cannot send dump request");
+                       exit(1);
+               }
+       }
+
+       if (rtnl_dump_filter(&rth, print_route, stdout, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+
+       exit(0);
+}
+
+
+int iproute_get(int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct rtmsg            r;
+               char                    buf[1024];
+       } req;
+       char  *idev = NULL;
+       char  *odev = NULL;
+       int connected = 0;
+       int from_ok = 0;
+
+       memset(&req, 0, sizeof(req));
+
+       iproute_reset_filter();
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST;
+       req.n.nlmsg_type = RTM_GETROUTE;
+       req.r.rtm_family = preferred_family;
+       req.r.rtm_table = 0;
+       req.r.rtm_protocol = 0;
+       req.r.rtm_scope = 0;
+       req.r.rtm_type = 0;
+       req.r.rtm_src_len = 0;
+       req.r.rtm_dst_len = 0;
+       req.r.rtm_tos = 0;
+       
+       while (argc > 0) {
+               if (strcmp(*argv, "tos") == 0 ||
+                   matches(*argv, "dsfield") == 0) {
+                       __u32 tos;
+                       NEXT_ARG();
+                       if (rtnl_dsfield_a2n(&tos, *argv))
+                               invarg("TOS value is invalid\n", *argv);
+                       req.r.rtm_tos = tos;
+               } else if (matches(*argv, "from") == 0) {
+                       inet_prefix addr;
+                       NEXT_ARG();
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       from_ok = 1;
+                       get_prefix(&addr, *argv, req.r.rtm_family);
+                       if (req.r.rtm_family == AF_UNSPEC)
+                               req.r.rtm_family = addr.family;
+                       if (addr.bytelen)
+                               addattr_l(&req.n, sizeof(req), RTA_SRC, &addr.data, addr.bytelen);
+                       req.r.rtm_src_len = addr.bitlen;
+               } else if (matches(*argv, "iif") == 0) {
+                       NEXT_ARG();
+                       idev = *argv;
+               } else if (matches(*argv, "oif") == 0 ||
+                          strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       odev = *argv;
+               } else if (matches(*argv, "notify") == 0) {
+                       req.r.rtm_flags |= RTM_F_NOTIFY;
+               } else if (matches(*argv, "connected") == 0) {
+                       connected = 1;
+               } else {
+                       inet_prefix addr;
+                       if (strcmp(*argv, "to") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       get_prefix(&addr, *argv, req.r.rtm_family);
+                       if (req.r.rtm_family == AF_UNSPEC)
+                               req.r.rtm_family = addr.family;
+                       if (addr.bytelen)
+                               addattr_l(&req.n, sizeof(req), RTA_DST, &addr.data, addr.bytelen);
+                       req.r.rtm_dst_len = addr.bitlen;
+               }
+               argc--; argv++;
+       }
+
+       if (req.r.rtm_dst_len == 0) {
+               fprintf(stderr, "need at least destination address\n");
+               exit(1);
+       }
+
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       ll_init_map(&rth);
+
+       if (idev || odev)  {
+               int idx;
+
+               if (idev) {
+                       if ((idx = ll_name_to_index(idev)) == 0) {
+                               fprintf(stderr, "Cannot find device \"%s\"\n", idev);
+                               return -1;
+                       }
+                       addattr32(&req.n, sizeof(req), RTA_IIF, idx);
+               }
+               if (odev) {
+                       if ((idx = ll_name_to_index(odev)) == 0) {
+                               fprintf(stderr, "Cannot find device \"%s\"\n", odev);
+                               return -1;
+                       }
+                       addattr32(&req.n, sizeof(req), RTA_OIF, idx);
+               }
+       }
+
+       if (req.r.rtm_family == AF_UNSPEC)
+               req.r.rtm_family = AF_INET;
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, &req.n, NULL, NULL) < 0)
+               exit(2);
+
+       if (connected && !from_ok) {
+               struct rtmsg *r = NLMSG_DATA(&req.n);
+               int len = req.n.nlmsg_len;
+               struct rtattr * tb[RTA_MAX+1];
+
+               if (print_route(NULL, &req.n, (void*)stdout) < 0) {
+                       fprintf(stderr, "An error :-)\n");
+                       exit(1);
+               }
+
+               if (req.n.nlmsg_type != RTM_NEWROUTE) {
+                       fprintf(stderr, "Not a route?\n");
+                       return -1;
+               }
+               len -= NLMSG_LENGTH(sizeof(*r));
+               if (len < 0) {
+                       fprintf(stderr, "Wrong len %d\n", len);
+                       return -1;
+               }
+
+               memset(tb, 0, sizeof(tb));
+               parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);
+
+               if (tb[RTA_PREFSRC]) {
+                       tb[RTA_PREFSRC]->rta_type = RTA_SRC;
+                       r->rtm_src_len = 8*RTA_PAYLOAD(tb[RTA_PREFSRC]);
+               } else if (!tb[RTA_SRC]) {
+                       fprintf(stderr, "Failed to connect the route\n");
+                       return -1;
+               }
+               if (!odev && tb[RTA_OIF])
+                       tb[RTA_OIF]->rta_type = 0;
+               if (tb[RTA_GATEWAY])
+                       tb[RTA_GATEWAY]->rta_type = 0;
+               if (!idev && tb[RTA_IIF])
+                       tb[RTA_IIF]->rta_type = 0;
+               req.n.nlmsg_flags = NLM_F_REQUEST;
+               req.n.nlmsg_type = RTM_GETROUTE;
+
+               if (rtnl_talk(&rth, &req.n, 0, 0, &req.n, NULL, NULL) < 0)
+                       exit(2);
+       }
+
+       if (print_route(NULL, &req.n, (void*)stdout) < 0) {
+               fprintf(stderr, "An error :-)\n");
+               exit(1);
+       }
+
+       exit(0);
+}
+
+void iproute_reset_filter()
+{
+       memset(&filter, 0, sizeof(filter));
+       filter.mdst.bitlen = -1;
+       filter.msrc.bitlen = -1;
+}
+
+int do_iproute(int argc, char **argv)
+{
+       if (argc < 1)
+               return iproute_list_or_flush(0, NULL, 0);
+       
+       if (matches(*argv, "add") == 0)
+               return iproute_modify(RTM_NEWROUTE, NLM_F_CREATE|NLM_F_EXCL,
+                                     argc-1, argv+1);
+       if (matches(*argv, "change") == 0 || strcmp(*argv, "chg") == 0)
+               return iproute_modify(RTM_NEWROUTE, NLM_F_REPLACE,
+                                     argc-1, argv+1);
+       if (matches(*argv, "replace") == 0)
+               return iproute_modify(RTM_NEWROUTE, NLM_F_CREATE|NLM_F_REPLACE,
+                                     argc-1, argv+1);
+       if (matches(*argv, "prepend") == 0)
+               return iproute_modify(RTM_NEWROUTE, NLM_F_CREATE,
+                                     argc-1, argv+1);
+       if (matches(*argv, "append") == 0)
+               return iproute_modify(RTM_NEWROUTE, NLM_F_CREATE|NLM_F_APPEND,
+                                     argc-1, argv+1);
+       if (matches(*argv, "test") == 0)
+               return iproute_modify(RTM_NEWROUTE, NLM_F_EXCL,
+                                     argc-1, argv+1);
+       if (matches(*argv, "delete") == 0)
+               return iproute_modify(RTM_DELROUTE, 0,
+                                     argc-1, argv+1);
+       if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
+           || matches(*argv, "lst") == 0)
+               return iproute_list_or_flush(argc-1, argv+1, 0);
+       if (matches(*argv, "get") == 0)
+               return iproute_get(argc-1, argv+1);
+       if (matches(*argv, "flush") == 0)
+               return iproute_list_or_flush(argc-1, argv+1, 1);
+       if (matches(*argv, "help") == 0)
+               usage();
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip route help\".\n", *argv);
+       exit(-1);
+}
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..457864f8dec87d332842830e91b49c081e211526 100644 (file)
@@ -0,0 +1,323 @@
+/*
+ * iprule.c            "ip rule".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *
+ * Changes:
+ *
+ * Rani Assaf <rani@magic.metawire.com> 980929:        resolve addresses
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "rt_names.h"
+#include "utils.h"
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: ip rule [ list | add | del ] SELECTOR ACTION\n");
+       fprintf(stderr, "SELECTOR := [ from PREFIX ] [ to PREFIX ] [ tos TOS ] [ fwmark FWMARK ]\n");
+       fprintf(stderr, "            [ dev STRING ] [ pref NUMBER ]\n");
+       fprintf(stderr, "ACTION := [ table TABLE_ID ] [ nat ADDRESS ]\n");
+       fprintf(stderr, "          [ prohibit | reject | unreachable ]\n");
+       fprintf(stderr, "          [ realms [SRCREALM/]DSTREALM ]\n");
+       fprintf(stderr, "TABLE_ID := [ local | main | default | NUMBER ]\n");
+       exit(-1);
+}
+
+int print_rule(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct rtmsg *r = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       int host_len = -1;
+       struct rtattr * tb[RTA_MAX+1];
+       char abuf[256];
+       SPRINT_BUF(b1);
+
+       if (n->nlmsg_type != RTM_NEWRULE)
+               return 0;
+
+       len -= NLMSG_LENGTH(sizeof(*r));
+       if (len < 0)
+               return -1;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);
+
+       if (r->rtm_family == AF_INET)
+               host_len = 32;
+       else if (r->rtm_family == AF_INET6)
+               host_len = 128;
+       else if (r->rtm_family == AF_DECnet)
+               host_len = 16;
+       else if (r->rtm_family == AF_IPX)
+               host_len = 80;
+
+       if (tb[RTA_PRIORITY])
+               fprintf(fp, "%u:\t", *(unsigned*)RTA_DATA(tb[RTA_PRIORITY]));
+       else
+               fprintf(fp, "0:\t");
+
+       if (tb[RTA_SRC]) {
+               if (r->rtm_src_len != host_len) {
+                       fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
+                                                        RTA_PAYLOAD(tb[RTA_SRC]),
+                                                        RTA_DATA(tb[RTA_SRC]),
+                                                        abuf, sizeof(abuf)),
+                               r->rtm_src_len
+                               );
+               } else {
+                       fprintf(fp, "from %s ", format_host(r->rtm_family,
+                                                      RTA_PAYLOAD(tb[RTA_SRC]),
+                                                      RTA_DATA(tb[RTA_SRC]),
+                                                      abuf, sizeof(abuf))
+                               );
+               }
+       } else if (r->rtm_src_len) {
+               fprintf(fp, "from 0/%d ", r->rtm_src_len);
+       } else {
+               fprintf(fp, "from all ");
+       }
+
+       if (tb[RTA_DST]) {
+               if (r->rtm_dst_len != host_len) {
+                       fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family,
+                                                        RTA_PAYLOAD(tb[RTA_DST]),
+                                                        RTA_DATA(tb[RTA_DST]),
+                                                        abuf, sizeof(abuf)),
+                               r->rtm_dst_len
+                               );
+               } else {
+                       fprintf(fp, "to %s ", format_host(r->rtm_family,
+                                                      RTA_PAYLOAD(tb[RTA_DST]),
+                                                      RTA_DATA(tb[RTA_DST]),
+                                                      abuf, sizeof(abuf)));
+               }
+       } else if (r->rtm_dst_len) {
+               fprintf(fp, "to 0/%d ", r->rtm_dst_len);
+       }
+
+       if (r->rtm_tos) {
+               SPRINT_BUF(b1);
+               fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
+       }
+       if (tb[RTA_PROTOINFO]) {
+               fprintf(fp, "fwmark %8x ", *(__u32*)RTA_DATA(tb[RTA_PROTOINFO]));
+       }
+
+       if (tb[RTA_IIF]) {
+               fprintf(fp, "iif %s ", (char*)RTA_DATA(tb[RTA_IIF]));
+       }
+
+       if (r->rtm_table)
+               fprintf(fp, "lookup %s ", rtnl_rttable_n2a(r->rtm_table, b1, sizeof(b1)));
+
+       if (tb[RTA_FLOW]) {
+               __u32 to = *(__u32*)RTA_DATA(tb[RTA_FLOW]);
+               __u32 from = to>>16;
+               to &= 0xFFFF;
+               if (from) {
+                       fprintf(fp, "realms %s/",
+                               rtnl_rtrealm_n2a(from, b1, sizeof(b1)));
+               }
+               fprintf(fp, "%s ",
+                       rtnl_rtrealm_n2a(to, b1, sizeof(b1)));
+       }
+
+       if (r->rtm_type == RTN_NAT) {
+               if (tb[RTA_GATEWAY]) {
+                       fprintf(fp, "map-to %s ", 
+                               format_host(r->rtm_family,
+                                           RTA_PAYLOAD(tb[RTA_GATEWAY]),
+                                           RTA_DATA(tb[RTA_GATEWAY]),
+                                           abuf, sizeof(abuf)));
+               } else
+                       fprintf(fp, "masquerade");
+       } else if (r->rtm_type != RTN_UNICAST)
+               fprintf(fp, "%s", rtnl_rtntype_n2a(r->rtm_type, b1, sizeof(b1)));
+
+       fprintf(fp, "\n");
+       fflush(fp);
+       return 0;
+}
+
+int iprule_list(int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       int af = preferred_family;
+
+       if (af == AF_UNSPEC)
+               af = AF_INET;
+
+       if (argc > 0) {
+               fprintf(stderr, "\"ip rule show\" need not eny arguments.\n");
+               return -1;
+       }
+
+       if (rtnl_open(&rth, 0) < 0)
+               return 1;
+
+       if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) {
+               perror("Cannot send dump request");
+               return 1;
+       }
+
+       if (rtnl_dump_filter(&rth, print_rule, stdout, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               return 1;
+       }
+
+       return 0;
+}
+
+
+int iprule_modify(int cmd, int argc, char **argv)
+{
+       int table_ok = 0;
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct rtmsg            r;
+               char                    buf[1024];
+       } req;
+
+       memset(&req, 0, sizeof(req));
+
+       req.n.nlmsg_type = cmd;
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST;
+       req.r.rtm_family = preferred_family;
+       req.r.rtm_protocol = RTPROT_BOOT;
+       req.r.rtm_scope = RT_SCOPE_UNIVERSE;
+       req.r.rtm_table = 0;
+       req.r.rtm_type = RTN_UNSPEC;
+
+       if (cmd == RTM_NEWRULE) {
+               req.n.nlmsg_flags |= NLM_F_CREATE|NLM_F_EXCL;
+               req.r.rtm_type = RTN_UNICAST;
+       }
+
+       while (argc > 0) {
+               if (strcmp(*argv, "from") == 0) {
+                       inet_prefix dst;
+                       NEXT_ARG();
+                       get_prefix(&dst, *argv, req.r.rtm_family);
+                       req.r.rtm_src_len = dst.bitlen;
+                       addattr_l(&req.n, sizeof(req), RTA_SRC, &dst.data, dst.bytelen);
+               } else if (strcmp(*argv, "to") == 0) {
+                       inet_prefix dst;
+                       NEXT_ARG();
+                       get_prefix(&dst, *argv, req.r.rtm_family);
+                       req.r.rtm_dst_len = dst.bitlen;
+                       addattr_l(&req.n, sizeof(req), RTA_DST, &dst.data, dst.bytelen);
+               } else if (matches(*argv, "preference") == 0 ||
+                          matches(*argv, "order") == 0 ||
+                          matches(*argv, "priority") == 0) {
+                       __u32 pref;
+                       NEXT_ARG();
+                       if (get_u32(&pref, *argv, 0))
+                               invarg("preference value is invalid\n", *argv);
+                       addattr32(&req.n, sizeof(req), RTA_PRIORITY, pref);
+               } else if (strcmp(*argv, "tos") == 0) {
+                       __u32 tos;
+                       NEXT_ARG();
+                       if (rtnl_dsfield_a2n(&tos, *argv))
+                               invarg("TOS value is invalid\n", *argv);
+                       req.r.rtm_tos = tos;
+               } else if (strcmp(*argv, "fwmark") == 0) {
+                       __u32 fwmark;
+                       NEXT_ARG();
+                       if (get_u32(&fwmark, *argv, 16))
+                               invarg("fwmark value is invalid\n", *argv);
+                       addattr32(&req.n, sizeof(req), RTA_PROTOINFO, fwmark);
+               } else if (matches(*argv, "realms") == 0) {
+                       __u32 realm;
+                       NEXT_ARG();
+                       if (get_rt_realms(&realm, *argv))
+                               invarg("invalid realms\n", *argv);
+                       addattr32(&req.n, sizeof(req), RTA_FLOW, realm);
+               } else if (matches(*argv, "table") == 0 ||
+                          strcmp(*argv, "lookup") == 0) {
+                       int tid;
+                       NEXT_ARG();
+                       if (rtnl_rttable_a2n(&tid, *argv))
+                               invarg("invalid table ID\n", *argv);
+                       req.r.rtm_table = tid;
+                       table_ok = 1;
+               } else if (strcmp(*argv, "dev") == 0 ||
+                          strcmp(*argv, "iif") == 0) {
+                       NEXT_ARG();
+                       addattr_l(&req.n, sizeof(req), RTA_IIF, *argv, strlen(*argv)+1);
+               } else if (strcmp(*argv, "nat") == 0 ||
+                          matches(*argv, "map-to") == 0) {
+                       NEXT_ARG();
+                       addattr32(&req.n, sizeof(req), RTA_GATEWAY, get_addr32(*argv));
+                       req.r.rtm_type = RTN_NAT;
+               } else {
+                       int type;
+
+                       if (strcmp(*argv, "type") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (rtnl_rtntype_a2n(&type, *argv))
+                               invarg("Failed to parse rule type", *argv);
+                       req.r.rtm_type = type;
+               }
+               argc--;
+               argv++;
+       }
+
+       if (req.r.rtm_family == AF_UNSPEC)
+               req.r.rtm_family = AF_INET;
+
+       if (!table_ok && cmd == RTM_NEWRULE)
+               req.r.rtm_table = RT_TABLE_MAIN;
+
+       if (rtnl_open(&rth, 0) < 0)
+               return 1;
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+               return 2;
+
+       return 0;
+}
+
+int do_iprule(int argc, char **argv)
+{
+       if (argc < 1) {
+               return iprule_list(0, NULL);
+       } else if (matches(argv[0], "list") == 0 ||
+                  matches(argv[0], "lst") == 0 ||
+                  matches(argv[0], "show") == 0) {
+               return iprule_list(argc-1, argv+1);
+       } else if (matches(argv[0], "add") == 0) {
+               return iprule_modify(RTM_NEWRULE, argc-1, argv+1);
+       } else if (matches(argv[0], "delete") == 0) {
+               return iprule_modify(RTM_DELRULE, argc-1, argv+1);
+       } else if (matches(argv[0], "help") == 0)
+               usage();
+
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip rule help\".\n", *argv);
+       exit(-1);
+}
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..41c262b51512624106144ae108c5a4f55735dd2f 100644 (file)
@@ -0,0 +1,581 @@
+/*
+ * iptunnel.c         "ip tunnel"
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *
+ * Changes:
+ *
+ * Rani Assaf <rani@magic.metawire.com> 980929:        resolve addresses
+ * Rani Assaf <rani@magic.metawire.com> 980930:        do not allow key for ipip/sit
+ * Phil Karn <karn@ka9q.ampr.org>      990408: "pmtudisc" flag
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+#include <linux/if_tunnel.h>
+
+#include "rt_names.h"
+#include "utils.h"
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: ip tunnel { add | change | del | show } [ NAME ]\n");
+       fprintf(stderr, "          [ mode { ipip | gre | sit } ] [ remote ADDR ] [ local ADDR ]\n");
+       fprintf(stderr, "          [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n");
+       fprintf(stderr, "          [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ dev PHYS_DEV ]\n");
+       fprintf(stderr, "\n");
+       fprintf(stderr, "Where: NAME := STRING\n");
+       fprintf(stderr, "       ADDR := { IP_ADDRESS | any }\n");
+       fprintf(stderr, "       TOS  := { NUMBER | inherit }\n");
+       fprintf(stderr, "       TTL  := { 1..255 | inherit }\n");
+       fprintf(stderr, "       KEY  := { DOTTED_QUAD | NUMBER }\n");
+       exit(-1);
+}
+
+static int do_ioctl_get_ifindex(char *dev)
+{
+       struct ifreq ifr;
+       int fd;
+       int err;
+
+       strcpy(ifr.ifr_name, dev);
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       err = ioctl(fd, SIOCGIFINDEX, &ifr);
+       if (err) {
+               perror("ioctl");
+               return 0;
+       }
+       close(fd);
+       return ifr.ifr_ifindex;
+}
+
+static int do_ioctl_get_iftype(char *dev)
+{
+       struct ifreq ifr;
+       int fd;
+       int err;
+
+       strcpy(ifr.ifr_name, dev);
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       err = ioctl(fd, SIOCGIFHWADDR, &ifr);
+       if (err) {
+               perror("ioctl");
+               return -1;
+       }
+       close(fd);
+       return ifr.ifr_addr.sa_family;
+}
+
+
+static char * do_ioctl_get_ifname(int idx)
+{
+       static struct ifreq ifr;
+       int fd;
+       int err;
+
+       ifr.ifr_ifindex = idx;
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       err = ioctl(fd, SIOCGIFNAME, &ifr);
+       if (err) {
+               perror("ioctl");
+               return NULL;
+       }
+       close(fd);
+       return ifr.ifr_name;
+}
+
+
+
+static int do_get_ioctl(char *basedev, struct ip_tunnel_parm *p)
+{
+       struct ifreq ifr;
+       int fd;
+       int err;
+
+       strcpy(ifr.ifr_name, basedev);
+       ifr.ifr_ifru.ifru_data = (void*)p;
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       err = ioctl(fd, SIOCGETTUNNEL, &ifr);
+       if (err)
+               perror("ioctl");
+       close(fd);
+       return err;
+}
+
+static int do_add_ioctl(int cmd, char *basedev, struct ip_tunnel_parm *p)
+{
+       struct ifreq ifr;
+       int fd;
+       int err;
+
+       if (cmd == SIOCCHGTUNNEL && p->name[0])
+               strcpy(ifr.ifr_name, p->name);
+       else
+               strcpy(ifr.ifr_name, basedev);
+       ifr.ifr_ifru.ifru_data = (void*)p;
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       err = ioctl(fd, cmd, &ifr);
+       if (err)
+               perror("ioctl");
+       close(fd);
+       return err;
+}
+
+static int do_del_ioctl(char *basedev, struct ip_tunnel_parm *p)
+{
+       struct ifreq ifr;
+       int fd;
+       int err;
+
+       if (p->name[0])
+               strcpy(ifr.ifr_name, p->name);
+       else
+               strcpy(ifr.ifr_name, basedev);
+       ifr.ifr_ifru.ifru_data = (void*)p;
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       err = ioctl(fd, SIOCDELTUNNEL, &ifr);
+       if (err)
+               perror("ioctl");
+       close(fd);
+       return err;
+}
+
+static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
+{
+       int count = 0;
+       char medium[IFNAMSIZ];
+
+       memset(p, 0, sizeof(*p));
+       memset(&medium, 0, sizeof(medium));
+
+       p->iph.version = 4;
+       p->iph.ihl = 5;
+#ifndef IP_DF
+#define IP_DF          0x4000          /* Flag: "Don't Fragment"       */
+#endif
+       p->iph.frag_off = htons(IP_DF);
+
+       while (argc > 0) {
+               if (strcmp(*argv, "mode") == 0) {
+                       NEXT_ARG();
+                       if (strcmp(*argv, "ipip") == 0 ||
+                           strcmp(*argv, "ip/ip") == 0) {
+                               if (p->iph.protocol && p->iph.protocol != IPPROTO_IPIP) {
+                                       fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
+                                       exit(-1);
+                               }
+                               p->iph.protocol = IPPROTO_IPIP;
+                       } else if (strcmp(*argv, "gre") == 0 ||
+                                  strcmp(*argv, "gre/ip") == 0) {
+                               if (p->iph.protocol && p->iph.protocol != IPPROTO_GRE) {
+                                       fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
+                                       exit(-1);
+                               }
+                               p->iph.protocol = IPPROTO_GRE;
+                       } else if (strcmp(*argv, "sit") == 0 ||
+                                  strcmp(*argv, "ipv6/ip") == 0) {
+                               if (p->iph.protocol && p->iph.protocol != IPPROTO_IPV6) {
+                                       fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
+                                       exit(-1);
+                               }
+                               p->iph.protocol = IPPROTO_IPV6;
+                       } else {
+                               fprintf(stderr,"Cannot guess tunnel mode.\n");
+                               exit(-1);
+                       }
+               } else if (strcmp(*argv, "key") == 0) {
+                       unsigned uval;
+                       NEXT_ARG();
+                       p->i_flags |= GRE_KEY;
+                       p->o_flags |= GRE_KEY;
+                       if (strchr(*argv, '.'))
+                               p->i_key = p->o_key = get_addr32(*argv);
+                       else {
+                               if (get_unsigned(&uval, *argv, 0)<0) {
+                                       fprintf(stderr, "invalid value of \"key\"\n");
+                                       exit(-1);
+                               }
+                               p->i_key = p->o_key = htonl(uval);
+                       }
+               } else if (strcmp(*argv, "ikey") == 0) {
+                       unsigned uval;
+                       NEXT_ARG();
+                       p->i_flags |= GRE_KEY;
+                       if (strchr(*argv, '.'))
+                               p->o_key = get_addr32(*argv);
+                       else {
+                               if (get_unsigned(&uval, *argv, 0)<0) {
+                                       fprintf(stderr, "invalid value of \"ikey\"\n");
+                                       exit(-1);
+                               }
+                               p->i_key = htonl(uval);
+                       }
+               } else if (strcmp(*argv, "okey") == 0) {
+                       unsigned uval;
+                       NEXT_ARG();
+                       p->o_flags |= GRE_KEY;
+                       if (strchr(*argv, '.'))
+                               p->o_key = get_addr32(*argv);
+                       else {
+                               if (get_unsigned(&uval, *argv, 0)<0) {
+                                       fprintf(stderr, "invalid value of \"okey\"\n");
+                                       exit(-1);
+                               }
+                               p->o_key = htonl(uval);
+                       }
+               } else if (strcmp(*argv, "seq") == 0) {
+                       p->i_flags |= GRE_SEQ;
+                       p->o_flags |= GRE_SEQ;
+               } else if (strcmp(*argv, "iseq") == 0) {
+                       p->i_flags |= GRE_SEQ;
+               } else if (strcmp(*argv, "oseq") == 0) {
+                       p->o_flags |= GRE_SEQ;
+               } else if (strcmp(*argv, "csum") == 0) {
+                       p->i_flags |= GRE_CSUM;
+                       p->o_flags |= GRE_CSUM;
+               } else if (strcmp(*argv, "icsum") == 0) {
+                       p->i_flags |= GRE_CSUM;
+               } else if (strcmp(*argv, "ocsum") == 0) {
+                       p->o_flags |= GRE_CSUM;
+               } else if (strcmp(*argv, "nopmtudisc") == 0) {
+                       p->iph.frag_off = 0;
+               } else if (strcmp(*argv, "pmtudisc") == 0) {
+                       p->iph.frag_off = htons(IP_DF);
+               } else if (strcmp(*argv, "remote") == 0) {
+                       NEXT_ARG();
+                       if (strcmp(*argv, "any"))
+                               p->iph.daddr = get_addr32(*argv);
+               } else if (strcmp(*argv, "local") == 0) {
+                       NEXT_ARG();
+                       if (strcmp(*argv, "any"))
+                               p->iph.saddr = get_addr32(*argv);
+               } else if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       strncpy(medium, *argv, IFNAMSIZ-1);
+               } else if (strcmp(*argv, "ttl") == 0) {
+                       unsigned uval;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "inherit") != 0) {
+                               if (get_unsigned(&uval, *argv, 0))
+                                       invarg("invalid TTL\n", *argv);
+                               if (uval > 255)
+                                       invarg("TTL must be <=255\n", *argv);
+                               p->iph.ttl = uval;
+                       }
+               } else if (strcmp(*argv, "tos") == 0 ||
+                          matches(*argv, "dsfield") == 0) {
+                       __u32 uval;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "inherit") != 0) {
+                               if (rtnl_dsfield_a2n(&uval, *argv))
+                                       invarg("bad TOS value", *argv);
+                               p->iph.tos = uval;
+                       } else
+                               p->iph.tos = 1;
+               } else {
+                       if (strcmp(*argv, "name") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       if (p->name[0])
+                               duparg2("name", *argv);
+                       strncpy(p->name, *argv, IFNAMSIZ);
+                       if (cmd == SIOCCHGTUNNEL && count == 0) {
+                               struct ip_tunnel_parm old_p;
+                               memset(&old_p, 0, sizeof(old_p));
+                               if (do_get_ioctl(*argv, &old_p))
+                                       return -1;
+                               *p = old_p;
+                       }
+               }
+               count++;
+               argc--; argv++;
+       }
+
+
+       if (p->iph.protocol == 0) {
+               if (memcmp(p->name, "gre", 3) == 0)
+                       p->iph.protocol = IPPROTO_GRE;
+               else if (memcmp(p->name, "ipip", 4) == 0)
+                       p->iph.protocol = IPPROTO_IPIP;
+               else if (memcmp(p->name, "sit", 3) == 0)
+                       p->iph.protocol = IPPROTO_IPV6;
+       }
+
+       if (p->iph.protocol == IPPROTO_IPIP || p->iph.protocol == IPPROTO_IPV6) {
+               if ((p->i_flags & GRE_KEY) || (p->o_flags & GRE_KEY)) {
+                       fprintf(stderr, "Keys are not allowed with ipip and sit.\n");
+                       return -1;
+               }
+       }
+
+       if (medium[0]) {
+               p->link = do_ioctl_get_ifindex(medium);
+               if (p->link == 0)
+                       return -1;
+       }
+
+       if (p->i_key == 0 && IN_MULTICAST(ntohl(p->iph.daddr))) {
+               p->i_key = p->iph.daddr;
+               p->i_flags |= GRE_KEY;
+       }
+       if (p->o_key == 0 && IN_MULTICAST(ntohl(p->iph.daddr))) {
+               p->o_key = p->iph.daddr;
+               p->o_flags |= GRE_KEY;
+       }
+       if (IN_MULTICAST(ntohl(p->iph.daddr)) && !p->iph.saddr) {
+               fprintf(stderr, "Broadcast tunnel requires a source address.\n");
+               return -1;
+       }
+       return 0;
+}
+
+
+static int do_add(int cmd, int argc, char **argv)
+{
+       struct ip_tunnel_parm p;
+
+       if (parse_args(argc, argv, cmd, &p) < 0)
+               return -1;
+
+       if (p.iph.ttl && p.iph.frag_off == 0) {
+               fprintf(stderr, "ttl != 0 and noptmudisc are incompatible\n");
+               return -1;
+       }
+
+       switch (p.iph.protocol) {
+       case IPPROTO_IPIP:
+               return do_add_ioctl(cmd, "tunl0", &p);
+       case IPPROTO_GRE:
+               return do_add_ioctl(cmd, "gre0", &p);
+       case IPPROTO_IPV6:
+               return do_add_ioctl(cmd, "sit0", &p);
+       default:        
+               fprintf(stderr, "cannot determine tunnel mode (ipip, gre or sit)\n");
+               return -1;
+       }
+       return -1;
+}
+
+int do_del(int argc, char **argv)
+{
+       struct ip_tunnel_parm p;
+
+       if (parse_args(argc, argv, SIOCDELTUNNEL, &p) < 0)
+               return -1;
+
+       switch (p.iph.protocol) {
+       case IPPROTO_IPIP:
+               return do_del_ioctl("tunl0", &p);
+       case IPPROTO_GRE:
+               return do_del_ioctl("gre0", &p);
+       case IPPROTO_IPV6:
+               return do_del_ioctl("sit0", &p);
+       default:        
+               return do_del_ioctl(p.name, &p);
+       }
+       return -1;
+}
+
+void print_tunnel(struct ip_tunnel_parm *p)
+{
+       char s1[1024];
+       char s2[1024];
+       char s3[64];
+       char s4[64];
+
+       inet_ntop(AF_INET, &p->i_key, s3, sizeof(s3));
+       inet_ntop(AF_INET, &p->o_key, s4, sizeof(s4));
+
+       /* Do not use format_host() for local addr,
+        * symbolic name will not be useful.
+        */
+       printf("%s: %s/ip  remote %s  local %s ",
+              p->name,
+              p->iph.protocol == IPPROTO_IPIP ? "ip" :
+              (p->iph.protocol == IPPROTO_GRE ? "gre" :
+               (p->iph.protocol == IPPROTO_IPV6 ? "ipv6" : "unknown")),
+              p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1))  : "any",
+              p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any");
+
+       if (p->link) {
+               char *n = do_ioctl_get_ifname(p->link);
+               if (n)
+                       printf(" dev %s ", n);
+       }
+
+       if (p->iph.ttl)
+               printf(" ttl %d ", p->iph.ttl);
+       else
+               printf(" ttl inherit ");
+       
+       if (p->iph.tos) {
+               SPRINT_BUF(b1);
+               printf(" tos");
+               if (p->iph.tos&1)
+                       printf(" inherit");
+               if (p->iph.tos&~1)
+                       printf("%c%s ", p->iph.tos&1 ? '/' : ' ',
+                              rtnl_dsfield_n2a(p->iph.tos&~1, b1, sizeof(b1)));
+       }
+
+       if (!(p->iph.frag_off&htons(IP_DF)))
+               printf(" nopmtudisc");
+
+       if ((p->i_flags&GRE_KEY) && (p->o_flags&GRE_KEY) && p->o_key == p->i_key)
+               printf(" key %s", s3);
+       else if ((p->i_flags|p->o_flags)&GRE_KEY) {
+               if (p->i_flags&GRE_KEY)
+                       printf(" ikey %s ", s3);
+               if (p->o_flags&GRE_KEY)
+                       printf(" okey %s ", s4);
+       }
+
+       if (p->i_flags&GRE_SEQ)
+               printf("%s  Drop packets out of sequence.\n", _SL_);
+       if (p->i_flags&GRE_CSUM)
+               printf("%s  Checksum in received packet is required.", _SL_);
+       if (p->o_flags&GRE_SEQ)
+               printf("%s  Sequence packets on output.", _SL_);
+       if (p->o_flags&GRE_CSUM)
+               printf("%s  Checksum output packets.", _SL_);
+}
+
+static int do_tunnels_list(struct ip_tunnel_parm *p)
+{
+       char name[IFNAMSIZ];
+       unsigned long  rx_bytes, rx_packets, rx_errs, rx_drops,
+       rx_fifo, rx_frame,
+       tx_bytes, tx_packets, tx_errs, tx_drops,
+       tx_fifo, tx_colls, tx_carrier, rx_multi;
+       int type;
+       struct ip_tunnel_parm p1;
+
+       char buf[512];
+       FILE *fp = fopen("/proc/net/dev", "r");
+       if (fp == NULL) {
+               perror("fopen");
+               return -1;
+       }
+
+       fgets(buf, sizeof(buf), fp);
+       fgets(buf, sizeof(buf), fp);
+
+       while (fgets(buf, sizeof(buf), fp) != NULL) {
+               char *ptr;
+               buf[sizeof(buf) - 1] = 0;
+               if ((ptr = strchr(buf, ':')) == NULL ||
+                   (*ptr++ = 0, sscanf(buf, "%s", name) != 1)) {
+                       fprintf(stderr, "Wrong format of /proc/net/dev. Sorry.\n");
+                       return -1;
+               }
+               if (sscanf(ptr, "%ld%ld%ld%ld%ld%ld%ld%*d%ld%ld%ld%ld%ld%ld%ld",
+                          &rx_bytes, &rx_packets, &rx_errs, &rx_drops,
+                          &rx_fifo, &rx_frame, &rx_multi,
+                          &tx_bytes, &tx_packets, &tx_errs, &tx_drops,
+                          &tx_fifo, &tx_colls, &tx_carrier) != 14)
+                       continue;
+               if (p->name[0] && strcmp(p->name, name))
+                       continue;
+               type = do_ioctl_get_iftype(name);
+               if (type == -1) {
+                       fprintf(stderr, "Failed to get type of [%s]\n", name);
+                       continue;
+               }
+               if (type != ARPHRD_TUNNEL && type != ARPHRD_IPGRE && type != ARPHRD_SIT)
+                       continue;
+               memset(&p1, 0, sizeof(p1));
+               if (do_get_ioctl(name, &p1))
+                       continue;
+               if ((p->link && p1.link != p->link) ||
+                   (p->name[0] && strcmp(p1.name, p->name)) ||
+                   (p->iph.daddr && p1.iph.daddr != p->iph.daddr) ||
+                   (p->iph.saddr && p1.iph.saddr != p->iph.saddr) ||
+                   (p->i_key && p1.i_key != p->i_key))
+                       continue;
+               print_tunnel(&p1);
+               if (show_stats) {
+                       printf("%s", _SL_);
+                       printf("RX: Packets    Bytes        Errors CsumErrs OutOfSeq Mcasts%s", _SL_);
+                       printf("    %-10ld %-12ld %-6ld %-8ld %-8ld %-8ld%s",
+                              rx_packets, rx_bytes, rx_errs, rx_frame, rx_fifo, rx_multi, _SL_);
+                       printf("TX: Packets    Bytes        Errors DeadLoop NoRoute  NoBufs%s", _SL_);
+                       printf("    %-10ld %-12ld %-6ld %-8ld %-8ld %-6ld",
+                              tx_packets, tx_bytes, tx_errs, tx_colls, tx_carrier, tx_drops);
+               }
+               printf("\n");
+       }
+       return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+       int err;
+       struct ip_tunnel_parm p;
+
+       if (parse_args(argc, argv, SIOCGETTUNNEL, &p) < 0)
+               return -1;
+
+       switch (p.iph.protocol) {
+       case IPPROTO_IPIP:      
+               err = do_get_ioctl(p.name[0] ? p.name : "tunl0", &p);
+               break;
+       case IPPROTO_GRE:
+               err = do_get_ioctl(p.name[0] ? p.name : "gre0", &p);
+               break;
+       case IPPROTO_IPV6:
+               err = do_get_ioctl(p.name[0] ? p.name : "sit0", &p);
+               break;
+       default:
+               do_tunnels_list(&p);
+               return 0;
+       }
+       if (err)
+               return -1;
+
+       print_tunnel(&p);
+       printf("\n");
+       return 0;
+}
+
+int do_iptunnel(int argc, char **argv)
+{
+       if (argc > 0) {
+               if (matches(*argv, "add") == 0)
+                       return do_add(SIOCADDTUNNEL, argc-1, argv+1);
+               if (matches(*argv, "change") == 0)
+                       return do_add(SIOCCHGTUNNEL, argc-1, argv+1);
+               if (matches(*argv, "del") == 0)
+                       return do_del(argc-1, argv+1);
+               if (matches(*argv, "show") == 0 ||
+                   matches(*argv, "lst") == 0 ||
+                   matches(*argv, "list") == 0)
+                       return do_show(argc-1, argv+1);
+               if (matches(*argv, "help") == 0)
+                       usage();
+       } else
+               return do_show(0, NULL);
+
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip tunnel help\".\n", *argv);
+       exit(-1);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..db43b5dff63075d2dd6af548f983df81807c019b 100644 (file)
--- a/ip/routef
+++ b/ip/routef
@@ -0,0 +1,3 @@
+#! /bin/sh
+
+exec ip -4 ro flush  scope global  type unicast
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8d1d352abdff179647b2e84f1642f7c37a5f413b 100644 (file)
--- a/ip/routel
+++ b/ip/routel
@@ -0,0 +1,60 @@
+#!/bin/sh
+#$Id$
+
+#
+# Script created by: Stephen R. van den Berg <srb@cuci.nl>, 1999/04/18
+# Donated to the public domain.
+#
+# This script transforms the output of "ip" into more readable text.
+# "ip" is the Linux-advanced-routing configuration tool part of the
+# iproute package.
+#
+
+test "X-h" = "X$1" && echo "Usage: $0 [tablenr [raw ip args...]]" && exit 64
+
+test -z "$*" && set 0
+
+ip route list table "$@" |
+ while read network rest
+ do set xx $rest
+    shift
+    proto=""
+    via=""
+    dev=""
+    scope=""
+    src=""
+    table=""
+    case $network in
+       broadcast|local|unreachable) via=$network
+          network=$1
+          shift
+          ;;
+    esac
+    while test $# != 0
+    do
+       key=$1
+       val=$2
+       eval "$key=$val"
+       shift 2
+    done
+    echo "$network     $via    $src    $proto  $scope  $dev    $table"
+ done | awk -F '       ' '
+BEGIN {
+   format="%15s%-3s %15s %15s %8s %8s%7s %s\n";
+   printf(format,"target","","gateway","source","proto","scope","dev","tbl");
+ }
+ { network=$1;
+   mask="";
+   if(match(network,"/"))
+    { mask=" "substr(network,RSTART+1);
+      network=substr(network,0,RSTART);
+    }
+   via=$2;
+   src=$3;
+   proto=$4;
+   scope=$5;
+   dev=$6;
+   table=$7;
+   printf(format,network,mask,via,src,proto,scope,dev,table);
+ }
+'
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..21e818b4adca795f8eca27f64d63274cc8493778 100644 (file)
@@ -0,0 +1,116 @@
+/*
+ * rtm_map.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "rt_names.h"
+#include "utils.h"
+
+char *rtnl_rtntype_n2a(int id, char *buf, int len)
+{
+       switch (id) {
+       case RTN_UNSPEC:
+               return "none";
+       case RTN_UNICAST:
+               return "unicast";
+       case RTN_LOCAL:
+               return "local";
+       case RTN_BROADCAST:
+               return "broadcast";
+       case RTN_ANYCAST:
+               return "anycast";
+       case RTN_MULTICAST:
+               return "multicast";
+       case RTN_BLACKHOLE:
+               return "blackhole";
+       case RTN_UNREACHABLE:
+               return "unreachable";
+       case RTN_PROHIBIT:
+               return "prohibit";
+       case RTN_THROW:
+               return "throw";
+       case RTN_NAT:
+               return "nat";
+       case RTN_XRESOLVE:
+               return "xresolve";
+       default:
+               snprintf(buf, len, "%d", id);
+               return buf;
+       }
+}
+
+
+int rtnl_rtntype_a2n(int *id, char *arg)
+{
+       char *end;
+       unsigned long res;
+
+       if (strcmp(arg, "local") == 0)
+               res = RTN_LOCAL;
+       else if (strcmp(arg, "nat") == 0)
+               res = RTN_NAT;
+       else if (matches(arg, "broadcast") == 0 ||
+                strcmp(arg, "brd") == 0)
+               res = RTN_BROADCAST;
+       else if (matches(arg, "anycast") == 0)
+               res = RTN_ANYCAST;
+       else if (matches(arg, "multicast") == 0)
+               res = RTN_MULTICAST;
+       else if (matches(arg, "prohibit") == 0)
+               res = RTN_PROHIBIT;
+       else if (matches(arg, "unreachable") == 0)
+               res = RTN_UNREACHABLE;
+       else if (matches(arg, "blackhole") == 0)
+               res = RTN_BLACKHOLE;
+       else if (matches(arg, "xresolve") == 0)
+               res = RTN_XRESOLVE;
+       else if (matches(arg, "unicast") == 0)
+               res = RTN_UNICAST;
+       else if (strcmp(arg, "throw") == 0)
+               res = RTN_THROW;
+       else {
+               res = strtoul(arg, &end, 0);
+               if (!end || end == arg || *end || res > 255)
+                       return -1;
+       }
+       *id = res;
+       return 0;
+}
+
+int get_rt_realms(__u32 *realms, char *arg)
+{
+       __u32 realm = 0;
+       char *p = strchr(arg, '/');
+
+       *realms = 0;
+       if (p) {
+               *p = 0;
+               if (rtnl_rtrealm_a2n(realms, arg)) {
+                       *p = '/';
+                       return -1;
+               }
+               *realms <<= 16;
+               *p = '/';
+               arg = p+1;
+       }
+       if (*arg && rtnl_rtrealm_a2n(&realm, arg))
+               return -1;
+       *realms |= realm;
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d01bc63517572202b9bb3e64a942e194d4135e5b 100644 (file)
@@ -0,0 +1,177 @@
+/*
+ * rtmon.c             RTnetlink listener.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <string.h>
+
+#include "SNAPSHOT.h"
+
+#include "utils.h"
+#include "libnetlink.h"
+
+int resolve_hosts = 0;
+static int init_phase = 1;
+
+static void write_stamp(FILE *fp)
+{
+       char buf[128];
+       struct nlmsghdr *n1 = (void*)buf;
+       struct timeval tv;
+
+       n1->nlmsg_type = 15;
+       n1->nlmsg_flags = 0;
+       n1->nlmsg_seq = 0;
+       n1->nlmsg_pid = 0;
+       n1->nlmsg_len = NLMSG_LENGTH(4*2);
+       gettimeofday(&tv, NULL);
+       ((__u32*)NLMSG_DATA(n1))[0] = tv.tv_sec;
+       ((__u32*)NLMSG_DATA(n1))[1] = tv.tv_usec;
+       fwrite((void*)n1, 1, NLMSG_ALIGN(n1->nlmsg_len), fp);
+}
+
+static int dump_msg(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       if (!init_phase)
+               write_stamp(fp);
+       fwrite((void*)n, 1, NLMSG_ALIGN(n->nlmsg_len), fp);
+       fflush(fp);
+       return 0;
+}
+
+void usage(void)
+{
+       fprintf(stderr, "Usage: rtmon file FILE [ all | LISTofOBJECTS]\n");
+       fprintf(stderr, "LISTofOBJECTS := [ link ] [ address ] [ route ]\n");
+       exit(-1);
+}
+
+int
+main(int argc, char **argv)
+{
+       FILE *fp;
+       struct rtnl_handle rth;
+       int family = AF_UNSPEC;
+       unsigned groups = ~0U;
+       int llink = 0;
+       int laddr = 0;
+       int lroute = 0;
+       char *file = NULL;
+
+       while (argc > 1) {
+               if (matches(argv[1], "-family") == 0) {
+                       argc--;
+                       argv++;
+                       if (argc <= 1)
+                               usage();
+                       if (strcmp(argv[1], "inet") == 0)
+                               family = AF_INET;
+                       else if (strcmp(argv[1], "inet6") == 0)
+                               family = AF_INET6;
+                       else if (strcmp(argv[1], "link") == 0)
+                               family = AF_INET6;
+                       else if (strcmp(argv[1], "help") == 0)
+                               usage();
+                       else {
+                               fprintf(stderr, "Protocol ID \"%s\" is unknown, try \"rtmon help\".\n", argv[1]);
+                               exit(-1);
+                       }
+               } else if (strcmp(argv[1], "-4") == 0) {
+                       family = AF_INET;
+               } else if (strcmp(argv[1], "-6") == 0) {
+                       family = AF_INET6;
+               } else if (strcmp(argv[1], "-0") == 0) {
+                       family = AF_PACKET;
+               } else if (matches(argv[1], "-Version") == 0) {
+                       printf("rtmon utility, iproute2-ss%s\n", SNAPSHOT);
+                       exit(0);
+               } else if (matches(argv[1], "file") == 0) {
+                       argc--;
+                       argv++;
+                       if (argc <= 1)
+                               usage();
+                       file = argv[1];
+               } else if (matches(argv[1], "link") == 0) {
+                       llink=1;
+                       groups = 0;
+               } else if (matches(argv[1], "address") == 0) {
+                       laddr=1;
+                       groups = 0;
+               } else if (matches(argv[1], "route") == 0) {
+                       lroute=1;
+                       groups = 0;
+               } else if (strcmp(argv[1], "all") == 0) {
+                       groups = ~0U;
+               } else if (matches(argv[1], "help") == 0) {
+                       usage();
+               } else {
+                       fprintf(stderr, "Argument \"%s\" is unknown, try \"rtmon help\".\n", argv[1]);
+                       exit(-1);
+               }
+               argc--; argv++;
+       }
+
+       if (file == NULL) {
+               fprintf(stderr, "Not enough information: argument \"file\" is required\n");
+               exit(-1);
+       }
+       if (llink)
+               groups |= RTMGRP_LINK;
+       if (laddr) {
+               if (!family || family == AF_INET)
+                       groups |= RTMGRP_IPV4_IFADDR;
+               if (!family || family == AF_INET6)
+                       groups |= RTMGRP_IPV6_IFADDR;
+       }
+       if (lroute) {
+               if (!family || family == AF_INET)
+                       groups |= RTMGRP_IPV4_ROUTE;
+               if (!family || family == AF_INET6)
+                       groups |= RTMGRP_IPV6_ROUTE;
+       }
+
+       fp = fopen(file, "w");
+       if (fp == NULL) {
+               perror("Cannot fopen");
+               exit(-1);
+       }
+
+       if (rtnl_open(&rth, groups) < 0)
+               exit(1);
+
+       if (rtnl_wilddump_request(&rth, AF_UNSPEC, RTM_GETLINK) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       write_stamp(fp);
+
+       if (rtnl_dump_filter(&rth, dump_msg, fp, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               return 1;
+       }
+
+       init_phase = 0;
+
+       if (rtnl_listen(&rth, dump_msg, (void*)fp) < 0)
+               exit(2);
+
+       exit(0);
+}
diff --git a/ip/rtpr b/ip/rtpr
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c3629fd60a0e08458ae4110596e08f599f0b02b9 100644 (file)
--- a/ip/rtpr
+++ b/ip/rtpr
@@ -0,0 +1,4 @@
+#! /bin/bash
+
+exec tr "[\\\\]" "[
+]"
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bc270bff30fde01ba0191e630bae4db1c7cd9a98 100644 (file)
@@ -0,0 +1,18 @@
+
+UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o
+
+NLOBJ=ll_map.o libnetlink.o
+
+all: libnetlink.a libutil.a
+
+libnetlink.a: $(NLOBJ)
+       $(AR) rcs $@ $(NLOBJ)
+
+libutil.a: $(UTILOBJ) $(ADDLIB)
+       $(AR) rcs $@ $(UTILOBJ) $(ADDLIB)
+
+install:
+
+clean:
+       rm -f $(NLOBJ) $(UTILOBJ) $(ADDLIB) libnetlink.a libutil.a
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9500df86d691302b1e18aa0d4d134a6d005a2584 100644 (file)
@@ -0,0 +1,98 @@
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include "utils.h"
+
+static __inline__ u_int16_t dn_ntohs(u_int16_t addr)
+{
+       union {
+               u_int8_t byte[2];
+               u_int16_t word;
+       } u;
+
+       u.word = addr;
+       return ((u_int16_t)u.byte[0]) | (((u_int16_t)u.byte[1]) << 8);
+}
+
+static __inline__ int do_digit(char *str, u_int16_t *addr, u_int16_t scale, size_t *pos, size_t len, int *started)
+{
+       u_int16_t tmp = *addr / scale;
+
+       if (*pos == len)
+               return 1;
+
+       if (((tmp) > 0) || *started || (scale == 1)) {
+               *str = tmp + '0';
+               *started = 1;
+               (*pos)++;
+               *addr -= (tmp * scale);
+       }
+
+       return 0;
+}
+
+
+static const char *dnet_ntop1(const struct dn_naddr *dna, char *str, size_t len)
+{
+       u_int16_t addr = dn_ntohs(*(u_int16_t *)dna->a_addr);
+       u_int16_t area = addr >> 10;
+       size_t pos = 0;
+       int started = 0;
+
+       if (dna->a_len != 2)
+               return NULL;
+
+       addr &= 0x03ff;
+
+       if (len == 0)
+               return str;
+
+       if (do_digit(str + pos, &area, 10, &pos, len, &started))
+               return str;
+
+       if (do_digit(str + pos, &area, 1, &pos, len, &started))
+               return str;
+
+       if (pos == len)
+               return str;
+
+       *(str + pos) = '.';
+       pos++;
+       started = 0;
+
+       if (do_digit(str + pos, &addr, 1000, &pos, len, &started))
+               return str;
+
+       if (do_digit(str + pos, &addr, 100, &pos, len, &started))
+               return str;
+
+       if (do_digit(str + pos, &addr, 10, &pos, len, &started))
+               return str;
+
+       if (do_digit(str + pos, &addr, 1, &pos, len, &started))
+               return str;
+
+       if (pos == len)
+               return str;
+
+       *(str + pos) = 0;
+
+       return str;
+}
+
+
+const char *dnet_ntop(int af, const void *addr, char *str, size_t len)
+{
+       switch(af) {
+               case AF_DECnet:
+                       errno = 0;
+                       return dnet_ntop1((struct dn_naddr *)addr, str, len);
+               default:
+                       errno = EAFNOSUPPORT;
+       }
+
+       return NULL;
+}
+
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bd7727aea0fb6d396476c082b3e0bcfd9315afb0 100644 (file)
@@ -0,0 +1,71 @@
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include "utils.h"
+
+static __inline__ u_int16_t dn_htons(u_int16_t addr)
+{
+        union {
+                u_int8_t byte[2];
+                u_int16_t word;
+        } u;
+
+        u.word = addr;
+        return ((u_int16_t)u.byte[0]) | (((u_int16_t)u.byte[1]) << 8);
+}
+
+
+static int dnet_num(const char *src, u_int16_t * dst)
+{
+       int rv = 0;
+       int tmp;
+       *dst = 0;
+
+       while ((tmp = *src++) != 0) {
+               tmp -= '0';
+               if ((tmp < 0) || (tmp > 9))
+                       return rv;
+
+               rv++;
+               (*dst) *= 10;
+               (*dst) += tmp;
+       }
+
+       return rv;
+}
+
+static int dnet_pton1(const char *src, struct dn_naddr *dna)
+{
+       u_int16_t area = 0;
+       u_int16_t node = 0;
+       int pos;
+
+       pos = dnet_num(src, &area);
+       if ((pos == 0) || (area > 63) || (*(src + pos) != '.'))
+               return 0;
+       pos = dnet_num(src + pos + 1, &node);
+       if ((pos == 0) || (node > 1023))
+               return 0;
+       dna->a_len = 2;
+       *(u_int16_t *)dna->a_addr = dn_htons((area << 10) | node);
+
+       return 1;
+}
+
+int dnet_pton(int af, const char *src, void *addr)
+{
+       int err;
+
+       switch (af) {
+       case AF_DECnet:
+               errno = 0;
+               err = dnet_pton1(src, (struct dn_naddr *)addr);
+               break;
+       default:
+               errno = EAFNOSUPPORT;
+               err = -1;
+       }
+
+       return err;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a3722d67d0bbad1e1ff33744f2ad5a11291b5b59 100644 (file)
@@ -0,0 +1,199 @@
+/* Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char rcsid[] = "$Id: inet_ntop.c,v 1.4 1996/09/27 03:24:13 drepper Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include <linux/in6.h>
+#ifndef IN6ADDRSZ
+#define IN6ADDRSZ sizeof(struct in6_addr)
+#endif
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/*
+ * WARNING: Don't even consider trying to compile this on a system where
+ * sizeof(int) < 4.  sizeof(int) > 4 is fine; all the world's not a VAX.
+ */
+
+static const char *inet_ntop4 __P((const u_char *src, char *dst, size_t size));
+static const char *inet_ntop6 __P((const u_char *src, char *dst, size_t size));
+
+/* char *
+ * inet_ntop(af, src, dst, size)
+ *     convert a network format address to presentation format.
+ * return:
+ *     pointer to presentation format address (`dst'), or NULL (see errno).
+ * author:
+ *     Paul Vixie, 1996.
+ */
+const char *
+inet_ntop(af, src, dst, size)
+       int af;
+       const void *src;
+       char *dst;
+       size_t size;
+{
+       switch (af) {
+       case AF_INET:
+               return (inet_ntop4(src, dst, size));
+       case AF_INET6:
+               return (inet_ntop6(src, dst, size));
+       default:
+               errno = (EAFNOSUPPORT);
+               return (NULL);
+       }
+       /* NOTREACHED */
+}
+
+/* const char *
+ * inet_ntop4(src, dst, size)
+ *     format an IPv4 address, more or less like inet_ntoa()
+ * return:
+ *     `dst' (as a const)
+ * notes:
+ *     (1) uses no statics
+ *     (2) takes a u_char* not an in_addr as input
+ * author:
+ *     Paul Vixie, 1996.
+ */
+static const char *
+inet_ntop4(src, dst, size)
+       const u_char *src;
+       char *dst;
+       size_t size;
+{
+       static const char fmt[] = "%u.%u.%u.%u";
+       char tmp[sizeof "255.255.255.255"];
+
+       if (SPRINTF((tmp, fmt, src[0], src[1], src[2], src[3])) > size) {
+               errno = (ENOSPC);
+               return (NULL);
+       }
+       strcpy(dst, tmp);
+       return (dst);
+}
+
+/* const char *
+ * inet_ntop6(src, dst, size)
+ *     convert IPv6 binary address into presentation (printable) format
+ * author:
+ *     Paul Vixie, 1996.
+ */
+static const char *
+inet_ntop6(src, dst, size)
+       const u_char *src;
+       char *dst;
+       size_t size;
+{
+       /*
+        * Note that int32_t and int16_t need only be "at least" large enough
+        * to contain a value of the specified size.  On some systems, like
+        * Crays, there is no such thing as an integer variable with 16 bits.
+        * Keep this in mind if you think this function should have been coded
+        * to use pointer overlays.  All the world's not a VAX.
+        */
+       char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"], *tp;
+       struct { int base, len; } best, cur;
+       u_int words[sizeof(struct in6_addr) / INT16SZ];
+       int i;
+
+       /*
+        * Preprocess:
+        *      Copy the input (bytewise) array into a wordwise array.
+        *      Find the longest run of 0x00's in src[] for :: shorthanding.
+        */
+       memset(words, '\0', sizeof words);
+       for (i = 0; i < IN6ADDRSZ; i++)
+               words[i / 2] |= (src[i] << ((1 - (i % 2)) << 3));
+       best.base = -1;
+       cur.base = -1;
+       for (i = 0; i < (IN6ADDRSZ / INT16SZ); i++) {
+               if (words[i] == 0) {
+                       if (cur.base == -1)
+                               cur.base = i, cur.len = 1;
+                       else
+                               cur.len++;
+               } else {
+                       if (cur.base != -1) {
+                               if (best.base == -1 || cur.len > best.len)
+                                       best = cur;
+                               cur.base = -1;
+                       }
+               }
+       }
+       if (cur.base != -1) {
+               if (best.base == -1 || cur.len > best.len)
+                       best = cur;
+       }
+       if (best.base != -1 && best.len < 2)
+               best.base = -1;
+
+       /*
+        * Format the result.
+        */
+       tp = tmp;
+       for (i = 0; i < (IN6ADDRSZ / INT16SZ); i++) {
+               /* Are we inside the best run of 0x00's? */
+               if (best.base != -1 && i >= best.base &&
+                   i < (best.base + best.len)) {
+                       if (i == best.base)
+                               *tp++ = ':';
+                       continue;
+               }
+               /* Are we following an initial run of 0x00s or any real hex? */
+               if (i != 0)
+                       *tp++ = ':';
+               /* Is this address an encapsulated IPv4? */
+               if (i == 6 && best.base == 0 &&
+                   (best.len == 6 || (best.len == 5 && words[5] == 0xffff))) {
+                       if (!inet_ntop4(src+12, tp, sizeof tmp - (tp - tmp)))
+                               return (NULL);
+                       tp += strlen(tp);
+                       break;
+               }
+               tp += SPRINTF((tp, "%x", words[i]));
+       }
+       /* Was it a trailing run of 0x00's? */
+       if (best.base != -1 && (best.base + best.len) == (IN6ADDRSZ / INT16SZ))
+               *tp++ = ':';
+       *tp++ = '\0';
+
+       /*
+        * Check for overflow, copy, and we're done.
+        */
+       if ((size_t)(tp - tmp) > size) {
+               errno = (ENOSPC);
+               return (NULL);
+       }
+       strcpy(dst, tmp);
+       return (dst);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a55e0e7ba3239eee32e87878f47ce458710f3087 100644 (file)
@@ -0,0 +1,70 @@
+/*
+ * inet_proto.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <string.h>
+
+#include "utils.h"
+
+char *inet_proto_n2a(int proto, char *buf, int len)
+{
+       static char ncache[16];
+       static int icache = -1;
+       struct protoent *pe;
+
+       if (proto == icache)
+               return ncache;
+
+       pe = getprotobynumber(proto);
+       if (pe) {
+               icache = proto;
+               strncpy(ncache, pe->p_name, 16);
+               strncpy(buf, pe->p_name, len);
+               return buf;
+       }
+       snprintf(buf, len, "ipproto-%d", proto);
+       return buf;
+}
+
+int inet_proto_a2n(char *buf)
+{
+       static char ncache[16];
+       static int icache = -1;
+       struct protoent *pe;
+
+       if (icache>=0 && strcmp(ncache, buf) == 0)
+               return icache;
+
+       if (buf[0] >= '0' && buf[0] <= '9') {
+               __u8 ret;
+               if (get_u8(&ret, buf, 10))
+                       return -1;
+               return ret;
+       }
+
+       pe = getprotobyname(buf);
+       if (pe) {
+               icache = pe->p_proto;
+               strncpy(ncache, pe->p_name, 16);
+               return pe->p_proto;
+       }
+       return -1;
+}
+
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..995083447a5136d47220d69514e2fefbd5253e00 100644 (file)
@@ -0,0 +1,217 @@
+/* Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char rcsid[] = "$Id: inet_pton.c,v 1.5 1996/09/27 03:24:16 drepper Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+#include <string.h>
+#include <errno.h>
+
+#include <linux/in6.h>
+#define IN6ADDRSZ sizeof(struct in6_addr)
+
+/*
+ * WARNING: Don't even consider trying to compile this on a system where
+ * sizeof(int) < 4.  sizeof(int) > 4 is fine; all the world's not a VAX.
+ */
+
+static int     inet_pton4 __P((const char *src, u_char *dst));
+static int     inet_pton6 __P((const char *src, u_char *dst));
+
+/* int
+ * inet_pton(af, src, dst)
+ *     convert from presentation format (which usually means ASCII printable)
+ *     to network format (which is usually some kind of binary format).
+ * return:
+ *     1 if the address was valid for the specified address family
+ *     0 if the address wasn't valid (`dst' is untouched in this case)
+ *     -1 if some other error occurred (`dst' is untouched in this case, too)
+ * author:
+ *     Paul Vixie, 1996.
+ */
+int
+inet_pton(af, src, dst)
+       int af;
+       const char *src;
+       void *dst;
+{
+       switch (af) {
+       case AF_INET:
+               return (inet_pton4(src, dst));
+       case AF_INET6:
+               return (inet_pton6(src, dst));
+       default:
+               errno = EAFNOSUPPORT;
+               return (-1);
+       }
+       /* NOTREACHED */
+}
+
+/* int
+ * inet_pton4(src, dst)
+ *     like inet_aton() but without all the hexadecimal and shorthand.
+ * return:
+ *     1 if `src' is a valid dotted quad, else 0.
+ * notice:
+ *     does not touch `dst' unless it's returning 1.
+ * author:
+ *     Paul Vixie, 1996.
+ */
+static int
+inet_pton4(src, dst)
+       const char *src;
+       u_char *dst;
+{
+       static const char digits[] = "0123456789";
+       int saw_digit, octets, ch;
+       u_char tmp[INADDRSZ], *tp;
+
+       saw_digit = 0;
+       octets = 0;
+       *(tp = tmp) = 0;
+       while ((ch = *src++) != '\0') {
+               const char *pch;
+
+               if ((pch = strchr(digits, ch)) != NULL) {
+                       u_int new = *tp * 10 + (pch - digits);
+
+                       if (new > 255)
+                               return (0);
+                       *tp = new;
+                       if (! saw_digit) {
+                               if (++octets > 4)
+                                       return (0);
+                               saw_digit = 1;
+                       }
+               } else if (ch == '.' && saw_digit) {
+                       if (octets == 4)
+                               return (0);
+                       *++tp = 0;
+                       saw_digit = 0;
+               } else
+                       return (0);
+       }
+       if (octets < 4)
+               return (0);
+
+       memcpy(dst, tmp, INADDRSZ);
+       return (1);
+}
+
+/* int
+ * inet_pton6(src, dst)
+ *     convert presentation level address to network order binary form.
+ * return:
+ *     1 if `src' is a valid [RFC1884 2.2] address, else 0.
+ * notice:
+ *     (1) does not touch `dst' unless it's returning 1.
+ *     (2) :: in a full address is silently ignored.
+ * credit:
+ *     inspired by Mark Andrews.
+ * author:
+ *     Paul Vixie, 1996.
+ */
+static int
+inet_pton6(src, dst)
+       const char *src;
+       u_char *dst;
+{
+       static const char xdigits_l[] = "0123456789abcdef",
+                         xdigits_u[] = "0123456789ABCDEF";
+       u_char tmp[IN6ADDRSZ], *tp, *endp, *colonp;
+       const char *xdigits, *curtok;
+       int ch, saw_xdigit;
+       u_int val;
+
+       memset((tp = tmp), '\0', IN6ADDRSZ);
+       endp = tp + IN6ADDRSZ;
+       colonp = NULL;
+       /* Leading :: requires some special handling. */
+       if (*src == ':')
+               if (*++src != ':')
+                       return (0);
+       curtok = src;
+       saw_xdigit = 0;
+       val = 0;
+       while ((ch = *src++) != '\0') {
+               const char *pch;
+
+               if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
+                       pch = strchr((xdigits = xdigits_u), ch);
+               if (pch != NULL) {
+                       val <<= 4;
+                       val |= (pch - xdigits);
+                       if (val > 0xffff)
+                               return (0);
+                       saw_xdigit = 1;
+                       continue;
+               }
+               if (ch == ':') {
+                       curtok = src;
+                       if (!saw_xdigit) {
+                               if (colonp)
+                                       return (0);
+                               colonp = tp;
+                               continue;
+                       }
+                       if (tp + INT16SZ > endp)
+                               return (0);
+                       *tp++ = (u_char) (val >> 8) & 0xff;
+                       *tp++ = (u_char) val & 0xff;
+                       saw_xdigit = 0;
+                       val = 0;
+                       continue;
+               }
+               if (ch == '.' && ((tp + INADDRSZ) <= endp) &&
+                   inet_pton4(curtok, tp) > 0) {
+                       tp += INADDRSZ;
+                       saw_xdigit = 0;
+                       break;  /* '\0' was seen by inet_pton4(). */
+               }
+               return (0);
+       }
+       if (saw_xdigit) {
+               if (tp + INT16SZ > endp)
+                       return (0);
+               *tp++ = (u_char) (val >> 8) & 0xff;
+               *tp++ = (u_char) val & 0xff;
+       }
+       if (colonp != NULL) {
+               /*
+                * Since some memmove()'s erroneously fail to handle
+                * overlapping regions, we'll do the shift by hand.
+                */
+               const int n = tp - colonp;
+               int i;
+
+               for (i = 1; i <= n; i++) {
+                       endp[- i] = colonp[n - i];
+                       colonp[n - i] = 0;
+               }
+               tp = endp;
+       }
+       if (tp != endp)
+               return (0);
+       memcpy(dst, tmp, IN6ADDRSZ);
+       return (1);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b2d67902546fb4352a6f517a8afcb1a2925086cb 100644 (file)
@@ -0,0 +1,71 @@
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include "utils.h"
+
+static __inline__ int do_digit(char *str, u_int32_t addr, u_int32_t scale, size_t *pos, size_t len)
+{
+       u_int32_t tmp = addr >> (scale * 4);
+
+       if (*pos == len)
+               return 1;
+
+       tmp &= 0x0f;
+       if (tmp > 9)
+               *str = tmp + 'A' - 10;
+       else
+               *str = tmp + '0';
+       (*pos)++;
+
+       return 0;
+}
+
+static const char *ipx_ntop1(const struct ipx_addr *addr, char *str, size_t len)
+{
+       int i;
+       size_t pos = 0;
+
+       if (len == 0)
+               return str;
+
+       for(i = 7; i >= 0; i--)
+               if (do_digit(str + pos, ntohl(addr->ipx_net), i, &pos, len))
+                       return str;
+
+       if (pos == len)
+               return str;
+
+       *(str + pos) = '.';
+       pos++;
+       
+       for(i = 0; i < 6; i++) {
+               if (do_digit(str + pos, addr->ipx_node[i], 1, &pos, len))
+                       return str;
+               if (do_digit(str + pos, addr->ipx_node[i], 0, &pos, len))
+                       return str;
+       }
+
+       if (pos == len)
+               return str;
+
+       *(str + pos) = 0;
+
+       return str;
+}
+
+
+const char *ipx_ntop(int af, const void *addr, char *str, size_t len)
+{
+       switch(af) {
+               case AF_IPX:
+                       errno = 0;
+                       return ipx_ntop1((struct ipx_addr *)addr, str, len);
+               default:
+                       errno = EAFNOSUPPORT;
+       }
+
+       return NULL;
+}
+
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..1a52b7f1a58f9633b9c93eb2d5cda85139df7d8a 100644 (file)
@@ -0,0 +1,107 @@
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include "utils.h"
+
+static u_int32_t hexget(char c)
+{
+       if (c >= 'A' && c <= 'F')
+               return c - 'A' + 10;
+       if (c >= 'a' && c <= 'f')
+               return c - 'a' + 10;
+       if (c >= '0' && c <= '9')
+               return c - '0';
+
+       return 0xf0;
+}
+
+static int ipx_getnet(u_int32_t *net, const char *str)
+{
+       int i;
+       u_int32_t tmp;
+
+       for(i = 0; *str && (i < 8); i++) {
+
+               if ((tmp = hexget(*str)) & 0xf0) {
+                       if (*str == '.')
+                               return 0;
+                       else
+                               return -1;
+               }
+
+               str++;
+               (*net) <<= 4;
+               (*net) |= tmp;
+       }
+
+       if (*str == 0)
+               return 0;
+
+       return -1;
+}
+
+static int ipx_getnode(u_int8_t *node, const char *str)
+{
+       int i;
+       u_int32_t tmp;
+
+       for(i = 0; i < 6; i++) {
+               if ((tmp = hexget(*str++)) & 0xf0)
+                       return -1;
+               node[i] = (u_int8_t)tmp;
+               node[i] <<= 4;
+               if ((tmp = hexget(*str++)) & 0xf0)
+                       return -1;
+               node[i] |= (u_int8_t)tmp;
+               if (*str == ':')
+                       str++;
+       }
+
+       return 0;
+}
+
+static int ipx_pton1(const char *src, struct ipx_addr *addr)
+{
+       char *sep = (char *)src;
+       int no_node = 0;
+
+       memset(addr, 0, sizeof(struct ipx_addr));
+
+       while(*sep && (*sep != '.'))
+               sep++;
+
+       if (*sep != '.')
+               no_node = 1;
+
+       if (ipx_getnet(&addr->ipx_net, src))
+               return 0;
+
+       addr->ipx_net = htonl(addr->ipx_net);
+
+       if (no_node)
+               return 1;
+
+       if (ipx_getnode(addr->ipx_node, sep + 1))
+               return 0;
+
+       return 1;
+}
+
+int ipx_pton(int af, const char *src, void *addr)
+{
+       int err;
+
+       switch (af) {
+       case AF_IPX:
+               errno = 0;
+               err = ipx_pton1(src, (struct ipx_addr *)addr);
+               break;
+       default:
+               errno = EAFNOSUPPORT;
+               err = -1;
+       }
+
+       return err;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a1f39d409521a7c3f38f08b4684e140b1e126b6a 100644 (file)
@@ -0,0 +1,521 @@
+/*
+ * libnetlink.c        RTnetlink service routines.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <net/if_arp.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/uio.h>
+
+#include "libnetlink.h"
+
+void rtnl_close(struct rtnl_handle *rth)
+{
+       close(rth->fd);
+}
+
+int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions)
+{
+       int addr_len;
+
+       memset(rth, 0, sizeof(rth));
+
+       rth->fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+       if (rth->fd < 0) {
+               perror("Cannot open netlink socket");
+               return -1;
+       }
+
+       memset(&rth->local, 0, sizeof(rth->local));
+       rth->local.nl_family = AF_NETLINK;
+       rth->local.nl_groups = subscriptions;
+
+       if (bind(rth->fd, (struct sockaddr*)&rth->local, sizeof(rth->local)) < 0) {
+               perror("Cannot bind netlink socket");
+               return -1;
+       }
+       addr_len = sizeof(rth->local);
+       if (getsockname(rth->fd, (struct sockaddr*)&rth->local, &addr_len) < 0) {
+               perror("Cannot getsockname");
+               return -1;
+       }
+       if (addr_len != sizeof(rth->local)) {
+               fprintf(stderr, "Wrong address length %d\n", addr_len);
+               return -1;
+       }
+       if (rth->local.nl_family != AF_NETLINK) {
+               fprintf(stderr, "Wrong address family %d\n", rth->local.nl_family);
+               return -1;
+       }
+       rth->seq = time(NULL);
+       return 0;
+}
+
+int rtnl_wilddump_request(struct rtnl_handle *rth, int family, int type)
+{
+       struct {
+               struct nlmsghdr nlh;
+               struct rtgenmsg g;
+       } req;
+       struct sockaddr_nl nladdr;
+
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+
+       req.nlh.nlmsg_len = sizeof(req);
+       req.nlh.nlmsg_type = type;
+       req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+       req.nlh.nlmsg_pid = 0;
+       req.nlh.nlmsg_seq = rth->dump = ++rth->seq;
+       req.g.rtgen_family = family;
+
+       return sendto(rth->fd, (void*)&req, sizeof(req), 0, (struct sockaddr*)&nladdr, sizeof(nladdr));
+}
+
+int rtnl_send(struct rtnl_handle *rth, char *buf, int len)
+{
+       struct sockaddr_nl nladdr;
+
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+
+       return sendto(rth->fd, buf, len, 0, (struct sockaddr*)&nladdr, sizeof(nladdr));
+}
+
+int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len)
+{
+       struct nlmsghdr nlh;
+       struct sockaddr_nl nladdr;
+       struct iovec iov[2] = { { &nlh, sizeof(nlh) }, { req, len } };
+       struct msghdr msg = {
+               (void*)&nladdr, sizeof(nladdr),
+               iov,    2,
+               NULL,   0,
+               0
+       };
+
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+
+       nlh.nlmsg_len = NLMSG_LENGTH(len);
+       nlh.nlmsg_type = type;
+       nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+       nlh.nlmsg_pid = 0;
+       nlh.nlmsg_seq = rth->dump = ++rth->seq;
+
+       return sendmsg(rth->fd, &msg, 0);
+}
+
+int rtnl_dump_filter(struct rtnl_handle *rth,
+                    int (*filter)(struct sockaddr_nl *, struct nlmsghdr *n, void *),
+                    void *arg1,
+                    int (*junk)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+                    void *arg2)
+{
+       char    buf[8192];
+       struct sockaddr_nl nladdr;
+       struct iovec iov = { buf, sizeof(buf) };
+
+       while (1) {
+               int status;
+               struct nlmsghdr *h;
+
+               struct msghdr msg = {
+                       (void*)&nladdr, sizeof(nladdr),
+                       &iov,   1,
+                       NULL,   0,
+                       0
+               };
+
+               status = recvmsg(rth->fd, &msg, 0);
+
+               if (status < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       perror("OVERRUN");
+                       continue;
+               }
+               if (status == 0) {
+                       fprintf(stderr, "EOF on netlink\n");
+                       return -1;
+               }
+               if (msg.msg_namelen != sizeof(nladdr)) {
+                       fprintf(stderr, "sender address length == %d\n", msg.msg_namelen);
+                       exit(1);
+               }
+
+               h = (struct nlmsghdr*)buf;
+               while (NLMSG_OK(h, status)) {
+                       int err;
+
+                       if (h->nlmsg_pid != rth->local.nl_pid ||
+                           h->nlmsg_seq != rth->dump) {
+                               if (junk) {
+                                       err = junk(&nladdr, h, arg2);
+                                       if (err < 0)
+                                               return err;
+                               }
+                               goto skip_it;
+                       }
+
+                       if (h->nlmsg_type == NLMSG_DONE)
+                               return 0;
+                       if (h->nlmsg_type == NLMSG_ERROR) {
+                               struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
+                               if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
+                                       fprintf(stderr, "ERROR truncated\n");
+                               } else {
+                                       errno = -err->error;
+                                       perror("RTNETLINK answers");
+                               }
+                               return -1;
+                       }
+                       err = filter(&nladdr, h, arg1);
+                       if (err < 0)
+                               return err;
+
+skip_it:
+                       h = NLMSG_NEXT(h, status);
+               }
+               if (msg.msg_flags & MSG_TRUNC) {
+                       fprintf(stderr, "Message truncated\n");
+                       continue;
+               }
+               if (status) {
+                       fprintf(stderr, "!!!Remnant of size %d\n", status);
+                       exit(1);
+               }
+       }
+}
+
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
+             unsigned groups, struct nlmsghdr *answer,
+             int (*junk)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+             void *jarg)
+{
+       int status;
+       unsigned seq;
+       struct nlmsghdr *h;
+       struct sockaddr_nl nladdr;
+       struct iovec iov = { (void*)n, n->nlmsg_len };
+       char   buf[8192];
+       struct msghdr msg = {
+               (void*)&nladdr, sizeof(nladdr),
+               &iov,   1,
+               NULL,   0,
+               0
+       };
+
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+       nladdr.nl_pid = peer;
+       nladdr.nl_groups = groups;
+
+       n->nlmsg_seq = seq = ++rtnl->seq;
+       if (answer == NULL)
+               n->nlmsg_flags |= NLM_F_ACK;
+
+       status = sendmsg(rtnl->fd, &msg, 0);
+
+       if (status < 0) {
+               perror("Cannot talk to rtnetlink");
+               return -1;
+       }
+
+       iov.iov_base = buf;
+
+       while (1) {
+               iov.iov_len = sizeof(buf);
+               status = recvmsg(rtnl->fd, &msg, 0);
+
+               if (status < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       perror("OVERRUN");
+                       continue;
+               }
+               if (status == 0) {
+                       fprintf(stderr, "EOF on netlink\n");
+                       return -1;
+               }
+               if (msg.msg_namelen != sizeof(nladdr)) {
+                       fprintf(stderr, "sender address length == %d\n", msg.msg_namelen);
+                       exit(1);
+               }
+               for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
+                       int err;
+                       int len = h->nlmsg_len;
+                       int l = len - sizeof(*h);
+
+                       if (l<0 || len>status) {
+                               if (msg.msg_flags & MSG_TRUNC) {
+                                       fprintf(stderr, "Truncated message\n");
+                                       return -1;
+                               }
+                               fprintf(stderr, "!!!malformed message: len=%d\n", len);
+                               exit(1);
+                       }
+
+                       if (h->nlmsg_pid != rtnl->local.nl_pid ||
+                           h->nlmsg_seq != seq) {
+                               if (junk) {
+                                       err = junk(&nladdr, h, jarg);
+                                       if (err < 0)
+                                               return err;
+                               }
+                               continue;
+                       }
+
+                       if (h->nlmsg_type == NLMSG_ERROR) {
+                               struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
+                               if (l < sizeof(struct nlmsgerr)) {
+                                       fprintf(stderr, "ERROR truncated\n");
+                               } else {
+                                       errno = -err->error;
+                                       if (errno == 0) {
+                                               if (answer)
+                                                       memcpy(answer, h, h->nlmsg_len);
+                                               return 0;
+                                       }
+                                       perror("RTNETLINK answers");
+                               }
+                               return -1;
+                       }
+                       if (answer) {
+                               memcpy(answer, h, h->nlmsg_len);
+                               return 0;
+                       }
+
+                       fprintf(stderr, "Unexpected reply!!!\n");
+
+                       status -= NLMSG_ALIGN(len);
+                       h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
+               }
+               if (msg.msg_flags & MSG_TRUNC) {
+                       fprintf(stderr, "Message truncated\n");
+                       continue;
+               }
+               if (status) {
+                       fprintf(stderr, "!!!Remnant of size %d\n", status);
+                       exit(1);
+               }
+       }
+}
+
+int rtnl_listen(struct rtnl_handle *rtnl, 
+             int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+             void *jarg)
+{
+       int status;
+       struct nlmsghdr *h;
+       struct sockaddr_nl nladdr;
+       struct iovec iov;
+       char   buf[8192];
+       struct msghdr msg = {
+               (void*)&nladdr, sizeof(nladdr),
+               &iov,   1,
+               NULL,   0,
+               0
+       };
+
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+       nladdr.nl_pid = 0;
+       nladdr.nl_groups = 0;
+
+
+       iov.iov_base = buf;
+
+       while (1) {
+               iov.iov_len = sizeof(buf);
+               status = recvmsg(rtnl->fd, &msg, 0);
+
+               if (status < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       perror("OVERRUN");
+                       continue;
+               }
+               if (status == 0) {
+                       fprintf(stderr, "EOF on netlink\n");
+                       return -1;
+               }
+               if (msg.msg_namelen != sizeof(nladdr)) {
+                       fprintf(stderr, "Sender address length == %d\n", msg.msg_namelen);
+                       exit(1);
+               }
+               for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
+                       int err;
+                       int len = h->nlmsg_len;
+                       int l = len - sizeof(*h);
+
+                       if (l<0 || len>status) {
+                               if (msg.msg_flags & MSG_TRUNC) {
+                                       fprintf(stderr, "Truncated message\n");
+                                       return -1;
+                               }
+                               fprintf(stderr, "!!!malformed message: len=%d\n", len);
+                               exit(1);
+                       }
+
+                       err = handler(&nladdr, h, jarg);
+                       if (err < 0)
+                               return err;
+
+                       status -= NLMSG_ALIGN(len);
+                       h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
+               }
+               if (msg.msg_flags & MSG_TRUNC) {
+                       fprintf(stderr, "Message truncated\n");
+                       continue;
+               }
+               if (status) {
+                       fprintf(stderr, "!!!Remnant of size %d\n", status);
+                       exit(1);
+               }
+       }
+}
+
+int rtnl_from_file(FILE *rtnl, 
+             int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
+             void *jarg)
+{
+       int status;
+       struct sockaddr_nl nladdr;
+       char   buf[8192];
+       struct nlmsghdr *h = (void*)buf;
+
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+       nladdr.nl_pid = 0;
+       nladdr.nl_groups = 0;
+
+       while (1) {
+               int err, len, type;
+               int l;
+
+               status = fread(&buf, 1, sizeof(*h), rtnl);
+
+               if (status < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       perror("rtnl_from_file: fread");
+                       return -1;
+               }
+               if (status == 0)
+                       return 0;
+
+               len = h->nlmsg_len;
+               type= h->nlmsg_type;
+               l = len - sizeof(*h);
+
+               if (l<0 || len>sizeof(buf)) {
+                       fprintf(stderr, "!!!malformed message: len=%d @%lu\n",
+                               len, ftell(rtnl));
+                       return -1;
+               }
+
+               status = fread(NLMSG_DATA(h), 1, NLMSG_ALIGN(l), rtnl);
+
+               if (status < 0) {
+                       perror("rtnl_from_file: fread");
+                       return -1;
+               }
+               if (status < l) {
+                       fprintf(stderr, "rtnl-from_file: truncated message\n");
+                       return -1;
+               }
+
+               err = handler(&nladdr, h, jarg);
+               if (err < 0)
+                       return err;
+       }
+}
+
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data)
+{
+       int len = RTA_LENGTH(4);
+       struct rtattr *rta;
+       if (NLMSG_ALIGN(n->nlmsg_len) + len > maxlen)
+               return -1;
+       rta = (struct rtattr*)(((char*)n) + NLMSG_ALIGN(n->nlmsg_len));
+       rta->rta_type = type;
+       rta->rta_len = len;
+       memcpy(RTA_DATA(rta), &data, 4);
+       n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + len;
+       return 0;
+}
+
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, void *data, int alen)
+{
+       int len = RTA_LENGTH(alen);
+       struct rtattr *rta;
+
+       if (NLMSG_ALIGN(n->nlmsg_len) + len > maxlen)
+               return -1;
+       rta = (struct rtattr*)(((char*)n) + NLMSG_ALIGN(n->nlmsg_len));
+       rta->rta_type = type;
+       rta->rta_len = len;
+       memcpy(RTA_DATA(rta), data, alen);
+       n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + len;
+       return 0;
+}
+
+int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data)
+{
+       int len = RTA_LENGTH(4);
+       struct rtattr *subrta;
+
+       if (RTA_ALIGN(rta->rta_len) + len > maxlen)
+               return -1;
+       subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len));
+       subrta->rta_type = type;
+       subrta->rta_len = len;
+       memcpy(RTA_DATA(subrta), &data, 4);
+       rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len;
+       return 0;
+}
+
+int rta_addattr_l(struct rtattr *rta, int maxlen, int type, void *data, int alen)
+{
+       struct rtattr *subrta;
+       int len = RTA_LENGTH(alen);
+
+       if (RTA_ALIGN(rta->rta_len) + len > maxlen)
+               return -1;
+       subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len));
+       subrta->rta_type = type;
+       subrta->rta_len = len;
+       memcpy(RTA_DATA(subrta), data, alen);
+       rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len;
+       return 0;
+}
+
+
+int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+       while (RTA_OK(rta, len)) {
+               if (rta->rta_type <= max)
+                       tb[rta->rta_type] = rta;
+               rta = RTA_NEXT(rta,len);
+       }
+       if (len)
+               fprintf(stderr, "!!!Deficit %d, rta_len=%d\n", len, rta->rta_len);
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..082cb3c45d65e9425f119c7bc7b0dc0dd84236d8 100644 (file)
@@ -0,0 +1,91 @@
+/*
+ * ll_addr.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/sockios.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+
+
+const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen)
+{
+       int i;
+       int l;
+
+       if (alen == 4 &&
+           (type == ARPHRD_TUNNEL || type == ARPHRD_SIT || type == ARPHRD_IPGRE)) {
+               return inet_ntop(AF_INET, addr, buf, blen);
+       }
+       l = 0;
+       for (i=0; i<alen; i++) {
+               if (i==0) {
+                       snprintf(buf+l, blen, "%02x", addr[i]);
+                       blen -= 2;
+                       l += 2;
+               } else {
+                       snprintf(buf+l, blen, ":%02x", addr[i]);
+                       blen -= 3;
+                       l += 3;
+               }
+       }
+       return buf;
+}
+
+int ll_addr_a2n(unsigned char *lladdr, int len, char *arg)
+{
+       if (strchr(arg, '.')) {
+               inet_prefix pfx;
+               if (get_addr_1(&pfx, arg, AF_INET)) {
+                       fprintf(stderr, "\"%s\" is invalid lladdr.\n", arg);
+                       return -1;
+               }
+               if (len < 4)
+                       return -1;
+               memcpy(lladdr, pfx.data, 4);
+               return 4;
+       } else {
+               int i;
+
+               for (i=0; i<len; i++) {
+                       int temp;
+                       char *cp = strchr(arg, ':');
+                       if (cp) {
+                               *cp = 0;
+                               cp++;
+                       }
+                       if (sscanf(arg, "%x", &temp) != 1) {
+                               fprintf(stderr, "\"%s\" is invalid lladdr.\n", arg);
+                               return -1;
+                       }
+                       if (temp < 0 || temp > 255) {
+                               fprintf(stderr, "\"%s\" is invalid lladdr.\n", arg);
+                               return -1;
+                       }
+                       lladdr[i] = temp;
+                       if (!cp)
+                               break;
+                       arg = cp;
+               }
+               return i+1;
+       }
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e5a95e6a4fa09a0289b358d4b118a3213297389c 100644 (file)
@@ -0,0 +1,169 @@
+/*
+ * ll_map.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <string.h>
+
+#include "libnetlink.h"
+#include "ll_map.h"
+
+struct idxmap
+{
+       struct idxmap * next;
+       int             index;
+       int             type;
+       int             alen;
+       unsigned        flags;
+       unsigned char   addr[8];
+       char            name[16];
+};
+
+static struct idxmap *idxmap[16];
+
+int ll_remember_index(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       int h;
+       struct ifinfomsg *ifi = NLMSG_DATA(n);
+       struct idxmap *im, **imp;
+       struct rtattr *tb[IFLA_MAX+1];
+
+       if (n->nlmsg_type != RTM_NEWLINK)
+               return 0;
+
+       if (n->nlmsg_len < NLMSG_LENGTH(sizeof(ifi)))
+               return -1;
+
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(n));
+       if (tb[IFLA_IFNAME] == NULL)
+               return 0;
+
+       h = ifi->ifi_index&0xF;
+
+       for (imp=&idxmap[h]; (im=*imp)!=NULL; imp = &im->next)
+               if (im->index == ifi->ifi_index)
+                       break;
+
+       if (im == NULL) {
+               im = malloc(sizeof(*im));
+               if (im == NULL)
+                       return 0;
+               im->next = *imp;
+               im->index = ifi->ifi_index;
+               *imp = im;
+       }
+
+       im->type = ifi->ifi_type;
+       im->flags = ifi->ifi_flags;
+       if (tb[IFLA_ADDRESS]) {
+               int alen;
+               im->alen = alen = RTA_PAYLOAD(tb[IFLA_ADDRESS]);
+               if (alen > sizeof(im->addr))
+                       alen = sizeof(im->addr);
+               memcpy(im->addr, RTA_DATA(tb[IFLA_ADDRESS]), alen);
+       } else {
+               im->alen = 0;
+               memset(im->addr, 0, sizeof(im->addr));
+       }
+       strcpy(im->name, RTA_DATA(tb[IFLA_IFNAME]));
+       return 0;
+}
+
+const char *ll_idx_n2a(int idx, char *buf)
+{
+       struct idxmap *im;
+
+       if (idx == 0)
+               return "*";
+       for (im = idxmap[idx&0xF]; im; im = im->next)
+               if (im->index == idx)
+                       return im->name;
+       snprintf(buf, 16, "if%d", idx);
+       return buf;
+}
+
+
+const char *ll_index_to_name(int idx)
+{
+       static char nbuf[16];
+
+       return ll_idx_n2a(idx, nbuf);
+}
+
+int ll_index_to_type(int idx)
+{
+       struct idxmap *im;
+
+       if (idx == 0)
+               return -1;
+       for (im = idxmap[idx&0xF]; im; im = im->next)
+               if (im->index == idx)
+                       return im->type;
+       return -1;
+}
+
+unsigned ll_index_to_flags(int idx)
+{
+       struct idxmap *im;
+
+       if (idx == 0)
+               return 0;
+
+       for (im = idxmap[idx&0xF]; im; im = im->next)
+               if (im->index == idx)
+                       return im->flags;
+       return 0;
+}
+
+int ll_name_to_index(char *name)
+{
+       static char ncache[16];
+       static int icache;
+       struct idxmap *im;
+       int i;
+
+       if (name == NULL)
+               return 0;
+       if (icache && strcmp(name, ncache) == 0)
+               return icache;
+       for (i=0; i<16; i++) {
+               for (im = idxmap[i]; im; im = im->next) {
+                       if (strcmp(im->name, name) == 0) {
+                               icache = im->index;
+                               strcpy(ncache, name);
+                               return im->index;
+                       }
+               }
+       }
+       return 0;
+}
+
+int ll_init_map(struct rtnl_handle *rth)
+{
+       if (rtnl_wilddump_request(rth, AF_UNSPEC, RTM_GETLINK) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       if (rtnl_dump_filter(rth, ll_remember_index, &idxmap, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..71f149dc207afb6514c2d47694500659df7c0ce2 100644 (file)
@@ -0,0 +1,127 @@
+/*
+ * ll_proto.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/sockios.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+
+
+#define __PF(f,n) { ETH_P_##f, #n },
+static struct {
+       int id;
+       char *name;
+} llproto_names[] = {
+__PF(LOOP,loop)
+__PF(PUP,pup)  
+#ifdef ETH_P_PUPAT
+__PF(PUPAT,pupat)
+#endif    
+__PF(IP,ip)
+__PF(X25,x25)
+__PF(ARP,arp)
+__PF(BPQ,bpq)
+#ifdef ETH_P_IEEEPUP
+__PF(IEEEPUP,ieeepup)
+#endif  
+#ifdef ETH_P_IEEEPUPAT
+__PF(IEEEPUPAT,ieeepupat)
+#endif  
+__PF(DEC,dec)       
+__PF(DNA_DL,dna_dl)    
+__PF(DNA_RC,dna_rc)    
+__PF(DNA_RT,dna_rt)    
+__PF(LAT,lat)       
+__PF(DIAG,diag)      
+__PF(CUST,cust)      
+__PF(SCA,sca)       
+__PF(RARP,rarp)      
+__PF(ATALK,atalk)     
+__PF(AARP,aarp)      
+__PF(IPX,ipx)       
+__PF(IPV6,ipv6)      
+#ifdef ETH_P_PPP_DISC
+__PF(PPP_DISC,ppp_disc)
+#endif      
+#ifdef ETH_P_PPP_SES
+__PF(PPP_SES,ppp_ses)
+#endif      
+#ifdef ETH_P_ATMMPOA
+__PF(ATMMPOA,atmmpoa)
+#endif
+#ifdef ETH_P_ATMFATE
+__PF(ATMFATE,atmfate)
+#endif      
+
+__PF(802_3,802_3)     
+__PF(AX25,ax25)      
+__PF(ALL,all)       
+__PF(802_2,802_2)     
+__PF(SNAP,snap)      
+__PF(DDCMP,ddcmp)     
+__PF(WAN_PPP,wan_ppp)   
+__PF(PPP_MP,ppp_mp)    
+__PF(LOCALTALK,localtalk) 
+__PF(PPPTALK,ppptalk)   
+__PF(TR_802_2,tr_802_2)  
+__PF(MOBITEX,mobitex)   
+__PF(CONTROL,control)   
+__PF(IRDA,irda)      
+#ifdef ETH_P_ECONET
+__PF(ECONET,econet)
+#endif      
+
+{ 0x8100, "802.1Q" },
+{ ETH_P_IP, "ipv4" },
+};
+#undef __PF
+
+
+char * ll_proto_n2a(unsigned short id, char *buf, int len)
+{
+        int i;
+
+       id = ntohs(id);
+
+        for (i=0; i<sizeof(llproto_names)/sizeof(llproto_names[0]); i++) {
+                 if (llproto_names[i].id == id)
+                       return llproto_names[i].name;
+       }
+        snprintf(buf, len, "[%d]", id);
+        return buf;
+}
+
+int ll_proto_a2n(unsigned short *id, char *buf)
+{
+        int i;
+        for (i=0; i<sizeof(llproto_names)/sizeof(llproto_names[0]); i++) {
+                 if (strcasecmp(llproto_names[i].name, buf) == 0) {
+                        *id = htons(llproto_names[i].id);
+                        return 0;
+                }
+       }
+       if (get_u16(id, buf, 0))
+               return -1;
+       *id = htons(*id);
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..165ecfa60b5cf95d1967556fcf50da173b525c96 100644 (file)
@@ -0,0 +1,128 @@
+/*
+ * ll_types.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/sockios.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+char * ll_type_n2a(int type, char *buf, int len)
+{
+#define __PF(f,n) { ARPHRD_##f, #n },
+static struct {
+       int type;
+       char *name;
+} arphrd_names[] = {
+{ 0, "generic" },
+__PF(ETHER,ether)
+__PF(EETHER,eether)
+__PF(AX25,ax25)
+__PF(PRONET,pronet)
+__PF(CHAOS,chaos)
+#ifdef ARPHRD_IEEE802_TR
+__PF(IEEE802,ieee802)
+#else
+__PF(IEEE802,tr)
+#endif
+__PF(ARCNET,arcnet)
+__PF(APPLETLK,atalk)
+__PF(DLCI,dlci)
+#ifdef ARPHRD_ATM
+__PF(ATM,atm)
+#endif
+__PF(METRICOM,metricom)
+#ifdef ARPHRD_IEEE1394
+__PF(IEEE1394,ieee1394)
+#endif
+
+__PF(SLIP,slip)
+__PF(CSLIP,cslip)
+__PF(SLIP6,slip6)
+__PF(CSLIP6,cslip6)
+__PF(RSRVD,rsrvd)
+__PF(ADAPT,adapt)
+__PF(ROSE,rose)
+__PF(X25,x25)
+#ifdef ARPHRD_HWX25
+__PF(HWX25,hwx25)
+#endif
+__PF(PPP,ppp)
+__PF(HDLC,hdlc)
+__PF(LAPB,lapb)
+#ifdef ARPHRD_DDCMP
+__PF(DDCMP,ddcmp)
+#endif
+#ifdef ARPHRD_RAWHDLC
+__PF(RAWHDLC,rawhdlc)
+#endif
+
+__PF(TUNNEL,ipip)
+__PF(TUNNEL6,tunnel6)
+__PF(FRAD,frad)
+__PF(SKIP,skip)
+__PF(LOOPBACK,loopback)
+__PF(LOCALTLK,ltalk)
+__PF(FDDI,fddi)
+__PF(BIF,bif)
+__PF(SIT,sit)
+__PF(IPDDP,ip/ddp)
+__PF(IPGRE,gre)
+__PF(PIMREG,pimreg)
+__PF(HIPPI,hippi)
+__PF(ASH,ash)
+__PF(ECONET,econet)
+__PF(IRDA,irda)
+__PF(FCPP,fcpp)
+__PF(FCAL,fcal)
+__PF(FCPL,fcpl)
+__PF(FCFABRIC,fcfb0)
+__PF(FCFABRIC+1,fcfb1)
+__PF(FCFABRIC+2,fcfb2)
+__PF(FCFABRIC+3,fcfb3)
+__PF(FCFABRIC+4,fcfb4)
+__PF(FCFABRIC+5,fcfb5)
+__PF(FCFABRIC+6,fcfb6)
+__PF(FCFABRIC+7,fcfb7)
+__PF(FCFABRIC+8,fcfb8)
+__PF(FCFABRIC+9,fcfb9)
+__PF(FCFABRIC+10,fcfb10)
+__PF(FCFABRIC+11,fcfb11)
+__PF(FCFABRIC+12,fcfb12)
+#ifdef ARPHRD_IEEE802_TR
+__PF(IEEE802_TR,tr)
+#endif
+#ifdef ARPHRD_IEEE80211
+__PF(IEEE80211,ieee802.11)
+#endif
+#ifdef ARPHRD_VOID
+__PF(VOID,void)
+#endif
+};
+#undef __PF
+
+        int i;
+        for (i=0; i<sizeof(arphrd_names)/sizeof(arphrd_names[0]); i++) {
+                 if (arphrd_names[i].type == type)
+                       return arphrd_names[i].name;
+       }
+        snprintf(buf, len, "[%d]", type);
+        return buf;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..429f73e930c8d0d2f190cccd73e83dc9cb2425c3 100644 (file)
@@ -0,0 +1,388 @@
+/*
+ * rt_names.c          rtnetlink names DB.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/time.h>
+
+static void rtnl_tab_initialize(char *file, char **tab, int size)
+{
+       char buf[512];
+       FILE *fp;
+
+       fp = fopen(file, "r");
+       if (!fp)
+               return;
+       while (fgets(buf, sizeof(buf), fp)) {
+               char *p = buf;
+               int id;
+               char namebuf[512];
+
+               while (*p == ' ' || *p == '\t')
+                       p++;
+               if (*p == '#' || *p == '\n' || *p == 0)
+                       continue;
+               if (sscanf(p, "0x%x %s\n", &id, namebuf) != 2 &&
+                   sscanf(p, "0x%x %s #", &id, namebuf) != 2 &&
+                   sscanf(p, "%d %s\n", &id, namebuf) != 2 &&
+                   sscanf(p, "%d %s #", &id, namebuf) != 2) {
+                       fprintf(stderr, "Database %s is corrupted at %s\n",
+                               file, p);
+                       return;
+               }
+
+               if (id<0 || id>size)
+                       continue;
+
+               tab[id] = strdup(namebuf);
+       }
+       fclose(fp);
+}
+
+
+static char * rtnl_rtprot_tab[256] = {
+       "none",
+       "redirect",
+       "kernel",
+       "boot",
+       "static",
+       NULL,
+       NULL,
+       NULL,
+       "gated",
+       "ra",
+       "mrt",
+       "zebra",
+       "bird",
+};
+
+
+
+static int rtnl_rtprot_init;
+
+static void rtnl_rtprot_initialize(void)
+{
+       rtnl_rtprot_init = 1;
+       rtnl_tab_initialize("/etc/iproute2/rt_protos",
+                           rtnl_rtprot_tab, 256);
+}
+
+char * rtnl_rtprot_n2a(int id, char *buf, int len)
+{
+       if (id<0 || id>=256) {
+               snprintf(buf, len, "%d", id);
+               return buf;
+       }
+       if (!rtnl_rtprot_tab[id]) {
+               if (!rtnl_rtprot_init)
+                       rtnl_rtprot_initialize();
+       }
+       if (rtnl_rtprot_tab[id])
+               return rtnl_rtprot_tab[id];
+       snprintf(buf, len, "%d", id);
+       return buf;
+}
+
+int rtnl_rtprot_a2n(__u32 *id, char *arg)
+{
+       static char *cache = NULL;
+       static unsigned long res;
+       char *end;
+       int i;
+
+       if (cache && strcmp(cache, arg) == 0) {
+               *id = res;
+               return 0;
+       }
+
+       if (!rtnl_rtprot_init)
+               rtnl_rtprot_initialize();
+
+       for (i=0; i<256; i++) {
+               if (rtnl_rtprot_tab[i] &&
+                   strcmp(rtnl_rtprot_tab[i], arg) == 0) {
+                       cache = rtnl_rtprot_tab[i];
+                       res = i;
+                       *id = res;
+                       return 0;
+               }
+       }
+
+       res = strtoul(arg, &end, 0);
+       if (!end || end == arg || *end || res > 255)
+               return -1;
+       *id = res;
+       return 0;
+}
+
+
+
+static char * rtnl_rtscope_tab[256] = {
+       "global",
+};
+
+static int rtnl_rtscope_init;
+
+static void rtnl_rtscope_initialize(void)
+{
+       rtnl_rtscope_init = 1;
+       rtnl_rtscope_tab[255] = "nowhere";
+       rtnl_rtscope_tab[254] = "host";
+       rtnl_rtscope_tab[253] = "link";
+       rtnl_rtscope_tab[200] = "site";
+       rtnl_tab_initialize("/etc/iproute2/rt_scopes",
+                           rtnl_rtscope_tab, 256);
+}
+
+char * rtnl_rtscope_n2a(int id, char *buf, int len)
+{
+       if (id<0 || id>=256) {
+               snprintf(buf, len, "%d", id);
+               return buf;
+       }
+       if (!rtnl_rtscope_tab[id]) {
+               if (!rtnl_rtscope_init)
+                       rtnl_rtscope_initialize();
+       }
+       if (rtnl_rtscope_tab[id])
+               return rtnl_rtscope_tab[id];
+       snprintf(buf, len, "%d", id);
+       return buf;
+}
+
+int rtnl_rtscope_a2n(__u32 *id, char *arg)
+{
+       static char *cache = NULL;
+       static unsigned long res;
+       char *end;
+       int i;
+
+       if (cache && strcmp(cache, arg) == 0) {
+               *id = res;
+               return 0;
+       }
+
+       if (!rtnl_rtscope_init)
+               rtnl_rtscope_initialize();
+
+       for (i=0; i<256; i++) {
+               if (rtnl_rtscope_tab[i] &&
+                   strcmp(rtnl_rtscope_tab[i], arg) == 0) {
+                       cache = rtnl_rtscope_tab[i];
+                       res = i;
+                       *id = res;
+                       return 0;
+               }
+       }
+
+       res = strtoul(arg, &end, 0);
+       if (!end || end == arg || *end || res > 255)
+               return -1;
+       *id = res;
+       return 0;
+}
+
+
+
+static char * rtnl_rtrealm_tab[256] = {
+       "unknown",
+};
+
+static int rtnl_rtrealm_init;
+
+static void rtnl_rtrealm_initialize(void)
+{
+       rtnl_rtrealm_init = 1;
+       rtnl_tab_initialize("/etc/iproute2/rt_realms",
+                           rtnl_rtrealm_tab, 256);
+}
+
+char * rtnl_rtrealm_n2a(int id, char *buf, int len)
+{
+       if (id<0 || id>=256) {
+               snprintf(buf, len, "%d", id);
+               return buf;
+       }
+       if (!rtnl_rtrealm_tab[id]) {
+               if (!rtnl_rtrealm_init)
+                       rtnl_rtrealm_initialize();
+       }
+       if (rtnl_rtrealm_tab[id])
+               return rtnl_rtrealm_tab[id];
+       snprintf(buf, len, "%d", id);
+       return buf;
+}
+
+
+int rtnl_rtrealm_a2n(__u32 *id, char *arg)
+{
+       static char *cache = NULL;
+       static unsigned long res;
+       char *end;
+       int i;
+
+       if (cache && strcmp(cache, arg) == 0) {
+               *id = res;
+               return 0;
+       }
+
+       if (!rtnl_rtrealm_init)
+               rtnl_rtrealm_initialize();
+
+       for (i=0; i<256; i++) {
+               if (rtnl_rtrealm_tab[i] &&
+                   strcmp(rtnl_rtrealm_tab[i], arg) == 0) {
+                       cache = rtnl_rtrealm_tab[i];
+                       res = i;
+                       *id = res;
+                       return 0;
+               }
+       }
+
+       res = strtoul(arg, &end, 0);
+       if (!end || end == arg || *end || res > 255)
+               return -1;
+       *id = res;
+       return 0;
+}
+
+
+
+static char * rtnl_rttable_tab[256] = {
+       "unspec",
+};
+
+static int rtnl_rttable_init;
+
+static void rtnl_rttable_initialize(void)
+{
+       rtnl_rttable_init = 1;
+       rtnl_rttable_tab[255] = "local";
+       rtnl_rttable_tab[254] = "main";
+       rtnl_tab_initialize("/etc/iproute2/rt_tables",
+                           rtnl_rttable_tab, 256);
+}
+
+char * rtnl_rttable_n2a(int id, char *buf, int len)
+{
+       if (id<0 || id>=256) {
+               snprintf(buf, len, "%d", id);
+               return buf;
+       }
+       if (!rtnl_rttable_tab[id]) {
+               if (!rtnl_rttable_init)
+                       rtnl_rttable_initialize();
+       }
+       if (rtnl_rttable_tab[id])
+               return rtnl_rttable_tab[id];
+       snprintf(buf, len, "%d", id);
+       return buf;
+}
+
+int rtnl_rttable_a2n(__u32 *id, char *arg)
+{
+       static char *cache = NULL;
+       static unsigned long res;
+       char *end;
+       int i;
+
+       if (cache && strcmp(cache, arg) == 0) {
+               *id = res;
+               return 0;
+       }
+
+       if (!rtnl_rttable_init)
+               rtnl_rttable_initialize();
+
+       for (i=0; i<256; i++) {
+               if (rtnl_rttable_tab[i] &&
+                   strcmp(rtnl_rttable_tab[i], arg) == 0) {
+                       cache = rtnl_rttable_tab[i];
+                       res = i;
+                       *id = res;
+                       return 0;
+               }
+       }
+
+       i = strtoul(arg, &end, 0);
+       if (!end || end == arg || *end || i > 255)
+               return -1;
+       *id = i;
+       return 0;
+}
+
+
+static char * rtnl_rtdsfield_tab[256] = {
+       "0",
+};
+
+static int rtnl_rtdsfield_init;
+
+static void rtnl_rtdsfield_initialize(void)
+{
+       rtnl_rtdsfield_init = 1;
+       rtnl_tab_initialize("/etc/iproute2/rt_dsfield",
+                           rtnl_rtdsfield_tab, 256);
+}
+
+char * rtnl_dsfield_n2a(int id, char *buf, int len)
+{
+       if (id<0 || id>=256) {
+               snprintf(buf, len, "%d", id);
+               return buf;
+       }
+       if (!rtnl_rtdsfield_tab[id]) {
+               if (!rtnl_rtdsfield_init)
+                       rtnl_rtdsfield_initialize();
+       }
+       if (rtnl_rtdsfield_tab[id])
+               return rtnl_rtdsfield_tab[id];
+       snprintf(buf, len, "0x%02x", id);
+       return buf;
+}
+
+
+int rtnl_dsfield_a2n(__u32 *id, char *arg)
+{
+       static char *cache = NULL;
+       static unsigned long res;
+       char *end;
+       int i;
+
+       if (cache && strcmp(cache, arg) == 0) {
+               *id = res;
+               return 0;
+       }
+
+       if (!rtnl_rtdsfield_init)
+               rtnl_rtdsfield_initialize();
+
+       for (i=0; i<256; i++) {
+               if (rtnl_rtdsfield_tab[i] &&
+                   strcmp(rtnl_rtdsfield_tab[i], arg) == 0) {
+                       cache = rtnl_rtdsfield_tab[i];
+                       res = i;
+                       *id = res;
+                       return 0;
+               }
+       }
+
+       res = strtoul(arg, &end, 16);
+       if (!end || end == arg || *end || res > 255)
+               return -1;
+       *id = res;
+       return 0;
+}
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6763be2a737315e347b04f9658092cb70c743667 100644 (file)
@@ -0,0 +1,528 @@
+/*
+ * utils.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *
+ * Changes:
+ *
+ * Rani Assaf <rani@magic.metawire.com> 980929:        resolve addresses
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <netdb.h>
+#include <arpa/inet.h>
+#include <resolv.h>
+#include <linux/pkt_sched.h>
+
+#include "utils.h"
+
+int get_integer(int *val, char *arg, int base)
+{
+       long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtol(arg, &ptr, base);
+       if (!ptr || ptr == arg || *ptr || res > INT_MAX || res < INT_MIN)
+               return -1;
+       *val = res;
+       return 0;
+}
+
+int get_unsigned(unsigned *val, char *arg, int base)
+{
+       unsigned long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtoul(arg, &ptr, base);
+       if (!ptr || ptr == arg || *ptr || res > UINT_MAX)
+               return -1;
+       *val = res;
+       return 0;
+}
+
+int get_u32(__u32 *val, char *arg, int base)
+{
+       unsigned long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtoul(arg, &ptr, base);
+       if (!ptr || ptr == arg || *ptr || res > 0xFFFFFFFFUL)
+               return -1;
+       *val = res;
+       return 0;
+}
+
+int get_u16(__u16 *val, char *arg, int base)
+{
+       unsigned long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtoul(arg, &ptr, base);
+       if (!ptr || ptr == arg || *ptr || res > 0xFFFF)
+               return -1;
+       *val = res;
+       return 0;
+}
+
+int get_u8(__u8 *val, char *arg, int base)
+{
+       unsigned long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtoul(arg, &ptr, base);
+       if (!ptr || ptr == arg || *ptr || res > 0xFF)
+               return -1;
+       *val = res;
+       return 0;
+}
+
+int get_s16(__s16 *val, char *arg, int base)
+{
+       long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtol(arg, &ptr, base);
+       if (!ptr || ptr == arg || *ptr || res > 0x7FFF || res < -0x8000)
+               return -1;
+       *val = res;
+       return 0;
+}
+
+int get_s8(__s8 *val, char *arg, int base)
+{
+       long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtol(arg, &ptr, base);
+       if (!ptr || ptr == arg || *ptr || res > 0x7F || res < -0x80)
+               return -1;
+       *val = res;
+       return 0;
+}
+
+int get_addr_1(inet_prefix *addr, char *name, int family)
+{
+       char *cp;
+       unsigned char *ap = (unsigned char*)addr->data;
+       int i;
+
+       memset(addr, 0, sizeof(*addr));
+
+       if (strcmp(name, "default") == 0 ||
+           strcmp(name, "all") == 0 ||
+           strcmp(name, "any") == 0) {
+               if (family == AF_DECnet)
+                       return -1;
+               addr->family = family;
+               addr->bytelen = (family == AF_INET6 ? 16 : 4);
+               addr->bitlen = -1;
+               return 0;
+       }
+
+       if (strchr(name, ':')) {
+               addr->family = AF_INET6;
+               if (family != AF_UNSPEC && family != AF_INET6)
+                       return -1;
+               if (inet_pton(AF_INET6, name, addr->data) <= 0)
+                       return -1;
+               addr->bytelen = 16;
+               addr->bitlen = -1;
+               return 0;
+       }
+
+       if (family == AF_DECnet) {
+               struct dn_naddr dna;
+               addr->family = AF_DECnet;
+               if (dnet_pton(AF_DECnet, name, &dna) <= 0)
+                       return -1;
+               memcpy(addr->data, dna.a_addr, 2);
+               addr->bytelen = 2;
+               addr->bitlen = -1;
+               return 0;
+       }
+
+       addr->family = AF_INET;
+       if (family != AF_UNSPEC && family != AF_INET)
+               return -1;
+       addr->bytelen = 4;
+       addr->bitlen = -1;
+       for (cp=name, i=0; *cp; cp++) {
+               if (*cp <= '9' && *cp >= '0') {
+                       ap[i] = 10*ap[i] + (*cp-'0');
+                       continue;
+               }
+               if (*cp == '.' && ++i <= 3)
+                       continue;
+               return -1;
+       }
+       return 0;
+}
+
+int get_prefix_1(inet_prefix *dst, char *arg, int family)
+{
+       int err;
+       unsigned plen;
+       char *slash;
+
+       memset(dst, 0, sizeof(*dst));
+
+       if (strcmp(arg, "default") == 0 ||
+           strcmp(arg, "any") == 0 ||
+           strcmp(arg, "all") == 0) {
+               if (family == AF_DECnet)
+                       return -1;
+               dst->family = family;
+               dst->bytelen = 0;
+               dst->bitlen = 0;
+               return 0;
+       }
+
+       slash = strchr(arg, '/');
+       if (slash)
+               *slash = 0;
+       err = get_addr_1(dst, arg, family);
+       if (err == 0) {
+               switch(dst->family) {
+                       case AF_INET6:
+                               dst->bitlen = 128;
+                               break;
+                       case AF_DECnet:
+                               dst->bitlen = 16;
+                               break;
+                       default:
+                       case AF_INET:
+                               dst->bitlen = 32;
+               }
+               if (slash) {
+                       if (get_integer(&plen, slash+1, 0) || plen > dst->bitlen) {
+                               err = -1;
+                               goto done;
+                       }
+                       dst->bitlen = plen;
+               }
+       }
+done:
+       if (slash)
+               *slash = '/';
+       return err;
+}
+
+int get_addr(inet_prefix *dst, char *arg, int family)
+{
+       if (family == AF_PACKET) {
+               fprintf(stderr, "Error: \"%s\" may be inet address, but it is not allowed in this context.\n", arg);
+               exit(1);
+       }
+       if (get_addr_1(dst, arg, family)) {
+               fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", arg);
+               exit(1);
+       }
+       return 0;
+}
+
+int get_prefix(inet_prefix *dst, char *arg, int family)
+{
+       if (family == AF_PACKET) {
+               fprintf(stderr, "Error: \"%s\" may be inet prefix, but it is not allowed in this context.\n", arg);
+               exit(1);
+       }
+       if (get_prefix_1(dst, arg, family)) {
+               fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", arg);
+               exit(1);
+       }
+       return 0;
+}
+
+__u32 get_addr32(char *name)
+{
+       inet_prefix addr;
+       if (get_addr_1(&addr, name, AF_INET)) {
+               fprintf(stderr, "Error: an IP address is expected rather than \"%s\"\n", name);
+               exit(1);
+       }
+       return addr.data[0];
+}
+
+void incomplete_command()
+{
+       fprintf(stderr, "Command line is not complete. Try option \"help\"\n");
+       exit(-1);
+}
+
+void invarg(char *msg, char *arg)
+{
+       fprintf(stderr, "Error: argument \"%s\" is wrong: %s\n", arg, msg);
+       exit(-1);
+}
+
+void duparg(char *key, char *arg)
+{
+       fprintf(stderr, "Error: duplicate \"%s\": \"%s\" is the second value.\n", key, arg);
+       exit(-1);
+}
+
+void duparg2(char *key, char *arg)
+{
+       fprintf(stderr, "Error: either \"%s\" is duplicate, or \"%s\" is a garbage.\n", key, arg);
+       exit(-1);
+}
+
+int matches(char *cmd, char *pattern)
+{
+       int len = strlen(cmd);
+       if (len > strlen(pattern))
+               return -1;
+       return memcmp(pattern, cmd, len);
+}
+
+int inet_addr_match(inet_prefix *a, inet_prefix *b, int bits)
+{
+       __u32 *a1 = a->data;
+       __u32 *a2 = b->data;
+       int words = bits >> 0x05;
+
+       bits &= 0x1f;
+
+       if (words)
+               if (memcmp(a1, a2, words << 2))
+                       return -1;
+
+       if (bits) {
+               __u32 w1, w2;
+               __u32 mask;
+
+               w1 = a1[words];
+               w2 = a2[words];
+
+               mask = htonl((0xffffffff) << (0x20 - bits));
+
+               if ((w1 ^ w2) & mask)
+                       return 1;
+       }
+
+       return 0;
+}
+
+int __iproute2_hz_internal;
+
+int __get_hz(void)
+{
+       char name[1024];
+       int hz = 0;
+       FILE *fp;
+
+       if (getenv("HZ"))
+               return atoi(getenv("HZ")) ? : HZ;
+
+       if (getenv("PROC_NET_PSCHED")) {
+               snprintf(name, sizeof(name)-1, "%s", getenv("PROC_NET_PSCHED"));
+       } else if (getenv("PROC_ROOT")) { 
+               snprintf(name, sizeof(name)-1, "%s/net/psched", getenv("PROC_ROOT"));
+       } else {
+               strcpy(name, "/proc/net/psched");
+       }
+       fp = fopen(name, "r");
+
+       if (fp) {
+               unsigned nom, denom;
+               if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2)
+                       if (nom == 1000000)
+                               hz = denom;
+               fclose(fp);
+       }
+       if (hz)
+               return hz;
+       return HZ;
+}
+
+const char *rt_addr_n2a(int af, int len, void *addr, char *buf, int buflen)
+{
+       switch (af) {
+       case AF_INET:
+       case AF_INET6:
+               return inet_ntop(af, addr, buf, buflen);
+       case AF_IPX:
+               return ipx_ntop(af, addr, buf, buflen);
+       case AF_DECnet:
+       {
+               struct dn_naddr dna = { 2, { 0, 0, }};
+               memcpy(dna.a_addr, addr, 2);
+               return dnet_ntop(af, &dna, buf, buflen);
+       }
+       default:
+               return "???";
+       }
+}
+
+#ifdef RESOLVE_HOSTNAMES
+struct namerec
+{
+       struct namerec *next;
+       inet_prefix addr;
+       char        *name;
+};
+
+static struct namerec *nht[256];
+
+char *resolve_address(char *addr, int len, int af)
+{
+       struct namerec *n;
+       struct hostent *h_ent;
+       unsigned hash;
+       static int notfirst;
+
+
+       if (af == AF_INET6 && ((__u32*)addr)[0] == 0 &&
+           ((__u32*)addr)[1] == 0 && ((__u32*)addr)[2] == htonl(0xffff)) {
+               af = AF_INET;
+               addr += 12;
+               len = 4;
+       }
+
+       hash = addr[len-1] ^ addr[len-2] ^ addr[len-3] ^ addr[len-4];
+
+       for (n = nht[hash]; n; n = n->next) {
+               if (n->addr.family == af &&
+                   n->addr.bytelen == len &&
+                   memcmp(n->addr.data, addr, len) == 0)
+                       return n->name;
+       }
+       if ((n = malloc(sizeof(*n))) == NULL)
+               return NULL;
+       n->addr.family = af;
+       n->addr.bytelen = len;
+       n->name = NULL;
+       memcpy(n->addr.data, addr, len);
+       n->next = nht[hash];
+       nht[hash] = n;
+       if (++notfirst == 1)
+               sethostent(1);
+       fflush(stdout);
+
+       if ((h_ent = gethostbyaddr(addr, len, af)) != NULL)
+               n->name = strdup(h_ent->h_name);
+
+       /* Even if we fail, "negative" entry is remembered. */
+       return n->name;
+}
+#endif
+
+
+const char *format_host(int af, int len, void *addr, char *buf, int buflen)
+{
+#ifdef RESOLVE_HOSTNAMES
+       if (resolve_hosts) {
+               char *n;
+               if (len <= 0) {
+                       switch (af) {
+                       case AF_INET:
+                               len = 4;
+                               break;
+                       case AF_INET6:
+                               len = 16;
+                               break;
+                       case AF_IPX:
+                               len = 10;
+                               break;
+#ifdef AF_DECnet
+                       /* I see no reasons why gethostbyname
+                          may not work for DECnet */
+                       case AF_DECnet:
+                               len = 2;
+                               break;
+#endif
+                       default: ;
+                       }
+               }
+               if (len > 0 &&
+                   (n = resolve_address(addr, len, af)) != NULL)
+                       return n;
+       }
+#endif
+       return rt_addr_n2a(af, len, addr, buf, buflen);
+}
+
+
+__u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen)
+{
+       __u8 *ptr = buf;
+       int i;
+
+       for (i=0; i<len; i++) {
+               if (blen < 3)
+                       break;
+               sprintf(ptr, "%02x", str[i]);
+               ptr += 2;
+               blen -= 2;
+               if (i != len-1 && blen > 1) {
+                       *ptr++ = ':';
+                       blen--;
+               }
+       }
+       return buf;
+}
+
+__u8* hexstring_a2n(const __u8 *str, __u8 *buf, int blen)
+{
+       int cnt = 0;
+
+       for (;;) {
+               unsigned acc;
+               char ch;
+
+               acc = 0;
+
+               while ((ch = *str) != ':' && ch != 0) {
+                       if (ch >= '0' && ch <= '9')
+                               ch -= '0';
+                       else if (ch >= 'a' && ch <= 'f')
+                               ch -= 'a'-10;
+                       else if (ch >= 'A' && ch <= 'F')
+                               ch -= 'A'-10;
+                       else
+                               return NULL;
+                       acc = (acc<<4) + ch;
+                       str++;
+               }
+
+               if (acc > 255)
+                       return NULL;
+               if (cnt < blen) {
+                       buf[cnt] = acc;
+                       cnt++;
+               }
+               if (ch == 0)
+                       break;
+               ++str;
+       }
+       if (cnt < blen)
+               memset(buf+cnt, 0, blen-cnt);
+       return buf;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..685b0044ea043a60ad507451ffbd6d8aa2aa89f0 100644 (file)
@@ -0,0 +1,37 @@
+SSOBJ=ss.o ssfilter.o
+NSTATOBJ=nstat.o
+IFSTATOBJ=ifstat.o
+RTACCTOBJ=rtacct.o
+ARPDOBJ=arpd.o
+RTSTATOBJ=rtstat.o
+
+ALLOBJ=$(SSOBJ) $(NSTATOBJ) $(IFSTATOBJ) $(RTACCTOBJ) $(ARPDOBJ) $(RTSTATOBJ)
+TARGETS=ss nstat ifstat rtacct arpd rtstat
+
+all: $(TARGETS)
+
+ss: $(SSOBJ) $(LIBUTIL)
+
+nstat: $(NSTATOBJ)
+       $(CC) $(CFLAGS) $(LDFLAGS) -o nstat $(NSTATOBJ) -lm
+
+ifstat: $(IFSTATOBJ)
+       $(CC) $(CFLAGS) $(LDFLAGS) -o ifstat $(IFSTATOBJ) $(LIBNETLINK) -lm
+
+rtacct: $(RTACCTOBJ)
+       $(CC) $(CFLAGS) $(LDFLAGS) -o rtacct $(RTACCTOBJ) $(LIBNETLINK) -lm
+
+arpd: $(ARPDOBJ)
+       $(CC) $(CFLAGS) $(LDFLAGS) -o arpd $(ARPDOBJ) $(LIBNETLINK) -ldb
+
+rtstat: $(RTSTATOBJ)
+       $(CC) $(CFLAGS) $(LDFLAGS) -o rtstat $(RTSTATOBJ)
+
+ssfilter.c: ssfilter.y
+       bison ssfilter.y -o ssfilter.c
+
+install: all 
+       install -m 0755 -s $(TARGETS) $(DESTDIR)$(SBINDIR)
+
+clean:
+       rm -f $(ALLOBJ) $(TARGETS) ssfilter.c
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4590dafccc67d46f028cc5941fd49c1e1f88c367 100644 (file)
@@ -0,0 +1,846 @@
+/*
+ * arpd.c      ARP helper daemon.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <stdio.h>
+#include <syslog.h>
+#include <malloc.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <db.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <time.h>
+#include <signal.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <linux/if_packet.h>
+#include <linux/filter.h>
+
+#include "libnetlink.h"
+#include "utils.h"
+
+int resolve_hosts;
+
+DB     *dbase;
+char   *dbname = "/var/lib/arpd/arpd.db";
+
+int    ifnum;  
+int    *ifvec;
+char   **ifnames;
+
+struct dbkey
+{
+       __u32   iface;
+       __u32   addr;
+};
+
+#define IS_NEG(x)      (((__u8*)(x))[0] == 0xFF)
+#define NEG_TIME(x)    (((x)[2]<<24)|((x)[3]<<16)|((x)[4]<<8)|(x)[5]) 
+#define NEG_AGE(x)     ((__u32)time(NULL) - NEG_TIME((__u8*)x))
+#define NEG_VALID(x)   (NEG_AGE(x) < negative_timeout)
+#define NEG_CNT(x)     (((__u8*)(x))[1])
+
+struct rtnl_handle rth;
+
+struct pollfd pset[2];
+int udp_sock = -1;
+
+volatile int do_exit;
+volatile int do_sync;
+volatile int do_stats;
+
+struct {
+       unsigned long arp_new;
+       unsigned long arp_change;
+
+       unsigned long app_recv;
+       unsigned long app_success;
+       unsigned long app_bad;
+       unsigned long app_neg;
+       unsigned long app_suppressed;
+
+       unsigned long kern_neg;
+       unsigned long kern_new;
+       unsigned long kern_change;
+
+       unsigned long probes_sent;
+       unsigned long probes_suppressed;
+} stats;
+
+int active_probing;
+int negative_timeout = 60;
+int no_kernel_broadcasts;
+int broadcast_rate = 1000;
+int broadcast_burst = 3000;
+
+void usage(void)
+{
+       fprintf(stderr,
+"Usage: arpd [ -lk ] [ -a N ] [ -b dbase ] [ -f file ] [ interfaces ]\n");
+       exit(1);
+}
+
+int handle_if(int ifindex)
+{
+       int i;
+
+       if (ifnum == 0)
+               return 1;
+
+       for (i=0; i<ifnum; i++)
+               if (ifvec[i] == ifindex)
+                       return 1;
+       return 0;
+}
+
+int sysctl_adjusted;
+
+void do_sysctl_adjustments(void)
+{
+       int i;
+
+       if (!ifnum)
+               return;
+
+       for (i=0; i<ifnum; i++) {
+               char buf[128];
+               FILE *fp;
+
+               if (active_probing) {
+                       sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/mcast_solicit", ifnames[i]);
+                       if ((fp = fopen(buf, "w")) != NULL) {
+                               if (no_kernel_broadcasts)
+                                       strcpy(buf, "0\n");
+                               else
+                                       sprintf(buf, "%d\n", active_probing>=2 ? 1 : 3-active_probing);
+                               fputs(buf, fp);
+                               fclose(fp);
+                       }
+               }
+
+               sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/app_solicit", ifnames[i]);
+               if ((fp = fopen(buf, "w")) != NULL) {
+                       sprintf(buf, "%d\n", active_probing<=1 ? 1 : active_probing);
+                       fputs(buf, fp);
+                       fclose(fp);
+               }
+       }
+       sysctl_adjusted = 1;
+}
+
+void undo_sysctl_adjustments(void)
+{
+       int i;
+
+       if (!sysctl_adjusted)
+               return;
+
+       for (i=0; i<ifnum; i++) {
+               char buf[128];
+               FILE *fp;
+
+               if (active_probing) {
+                       sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/mcast_solicit", ifnames[i]);
+                       if ((fp = fopen(buf, "w")) != NULL) {
+                               strcpy(buf, "3\n");
+                               fputs(buf, fp);
+                               fclose(fp);
+                       }
+               }
+               sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/app_solicit", ifnames[i]);
+               if ((fp = fopen(buf, "w")) != NULL) {
+                       strcpy(buf, "0\n");
+                       fputs(buf, fp);
+                       fclose(fp);
+               }
+       }
+       sysctl_adjusted = 0;
+}
+
+
+int send_probe(int ifindex, __u32 addr)
+{
+       struct ifreq ifr;
+       struct sockaddr_in dst;
+       int len;
+       unsigned char buf[256];
+       struct arphdr *ah = (struct arphdr*)buf;
+       unsigned char *p = (unsigned char *)(ah+1);
+       struct sockaddr_ll sll;
+
+       memset(&ifr, 0, sizeof(ifr));
+       ifr.ifr_ifindex = ifindex;
+       if (ioctl(udp_sock, SIOCGIFNAME, &ifr))
+               return -1;
+       if (ioctl(udp_sock, SIOCGIFHWADDR, &ifr))
+               return -1;
+       if (ifr.ifr_hwaddr.sa_family != ARPHRD_ETHER)
+               return -1;
+       if (setsockopt(udp_sock, SOL_SOCKET, SO_BINDTODEVICE, ifr.ifr_name, strlen(ifr.ifr_name)+1) < 0)
+               return -1;
+
+       dst.sin_family = AF_INET;
+       dst.sin_port = htons(1025);
+       dst.sin_addr.s_addr = addr;
+       if (connect(udp_sock, (struct sockaddr*)&dst, sizeof(dst)) < 0)
+               return -1;
+       len = sizeof(dst);
+       if (getsockname(udp_sock, (struct sockaddr*)&dst, &len) < 0)
+               return -1;
+
+       ah->ar_hrd = htons(ifr.ifr_hwaddr.sa_family);
+       ah->ar_pro = htons(ETH_P_IP);
+       ah->ar_hln = 6;
+       ah->ar_pln = 4;
+       ah->ar_op  = htons(ARPOP_REQUEST);
+
+       memcpy(p, ifr.ifr_hwaddr.sa_data, ah->ar_hln);
+       p += ah->ar_hln;
+
+       memcpy(p, &dst.sin_addr, 4);
+       p+=4;
+
+       sll.sll_family = AF_PACKET;
+       memset(sll.sll_addr, 0xFF, sizeof(sll.sll_addr));
+       sll.sll_ifindex = ifindex;
+       sll.sll_protocol = htons(ETH_P_ARP);
+       memcpy(p, &sll.sll_addr, ah->ar_hln);
+       p+=ah->ar_hln;
+
+       memcpy(p, &addr, 4);
+       p+=4;
+
+       len = sendto(pset[0].fd, buf, p-buf, 0, (struct sockaddr*)&sll, sizeof(sll));
+       if (len < 0)
+               return -1;
+       stats.probes_sent++;
+       return 0;
+}
+
+/* Be very tough on sending probes: 1 per second with burst of 3. */
+
+int queue_active_probe(int ifindex, __u32 addr)
+{
+       static struct timeval prev;
+       static int buckets;
+       struct timeval now;
+
+       gettimeofday(&now, NULL);
+       if (prev.tv_sec) {
+               int diff = (now.tv_sec-prev.tv_sec)*1000+(now.tv_usec-prev.tv_usec)/1000;
+               buckets += diff;
+       } else {
+               buckets = broadcast_burst;
+       }
+       if (buckets > broadcast_burst)
+               buckets = broadcast_burst;
+       if (buckets >= broadcast_rate && !send_probe(ifindex, addr)) {
+               buckets -= broadcast_rate;
+               prev = now;
+               return 0;
+       }
+       stats.probes_suppressed++;
+       return -1;
+}
+
+int respond_to_kernel(int ifindex, __u32 addr, char *lla, int llalen)
+{
+       struct {
+               struct nlmsghdr         n;
+               struct ndmsg            ndm;
+               char                    buf[256];
+       } req;
+
+       memset(&req.n, 0, sizeof(req.n));
+       memset(&req.ndm, 0, sizeof(req.ndm));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST;
+       req.n.nlmsg_type = RTM_NEWNEIGH;
+       req.ndm.ndm_family = AF_INET;
+       req.ndm.ndm_state = NUD_STALE;
+       req.ndm.ndm_ifindex = ifindex;
+       req.ndm.ndm_type = RTN_UNICAST;
+
+       addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
+       addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
+       return rtnl_send(&rth, (char*)&req, req.n.nlmsg_len) <= 0;
+}
+
+void prepare_neg_entry(__u8 *ndata, __u32 stamp)
+{
+       ndata[0] = 0xFF;
+       ndata[1] = 0;
+       ndata[2] = stamp>>24;
+       ndata[3] = stamp>>16;
+       ndata[4] = stamp>>8;
+       ndata[5] = stamp;
+}
+
+
+int do_one_request(struct nlmsghdr *n)
+{
+       struct ndmsg *ndm = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr * tb[NDA_MAX+1];
+       struct dbkey key;
+       DBT dbkey, dbdat;
+       int do_acct = 0;
+
+       if (n->nlmsg_type == NLMSG_DONE) {
+               dbase->sync(dbase, 0);
+
+               /* Now we have at least mirror of kernel db, so that
+                * may start real resolution.
+                */
+               do_sysctl_adjustments();
+               return 0;
+       }
+
+       if (n->nlmsg_type != RTM_GETNEIGH && n->nlmsg_type != RTM_NEWNEIGH)
+               return 0;
+
+       len -= NLMSG_LENGTH(sizeof(*ndm));
+       if (len < 0)
+               return -1;
+
+       if (ndm->ndm_family != AF_INET ||
+           (ifnum && !handle_if(ndm->ndm_ifindex)) ||
+           ndm->ndm_flags ||
+           ndm->ndm_type != RTN_UNICAST ||
+           !(ndm->ndm_state&~NUD_NOARP))
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
+
+       if (!tb[NDA_DST])
+               return 0;
+
+       key.iface = ndm->ndm_ifindex;
+       memcpy(&key.addr, RTA_DATA(tb[NDA_DST]), 4);
+       dbkey.data = &key;
+       dbkey.size = sizeof(key);
+
+       if (dbase->get(dbase, &dbkey, &dbdat, 0) != 0) {
+               dbdat.data = 0;
+               dbdat.size = 0;
+       }
+
+       if (n->nlmsg_type == RTM_GETNEIGH) {
+               if (!(n->nlmsg_flags&NLM_F_REQUEST))
+                       return 0;
+
+               if (!(ndm->ndm_state&(NUD_PROBE|NUD_INCOMPLETE))) {
+                       stats.app_bad++;
+                       return 0;
+               }
+
+               if (ndm->ndm_state&NUD_PROBE) {
+                       /* If we get this, kernel still has some valid
+                        * address, but unicast probing failed and host
+                        * is either dead or changed its mac address.
+                        * Kernel is going to initiate broadcast resolution.
+                        * OK, we invalidate our information as well.
+                        */
+                       if (dbdat.data && !IS_NEG(dbdat.data))
+                               stats.app_neg++;
+
+                       dbase->del(dbase, &dbkey, 0);
+               } else {
+                       /* If we get this kernel does not have any information.
+                        * If we have something tell this to kernel. */
+                       stats.app_recv++;
+                       if (dbdat.data && !IS_NEG(dbdat.data)) {
+                               stats.app_success++;
+                               respond_to_kernel(key.iface, key.addr, dbdat.data, dbdat.size);
+                               return 0;
+                       }
+
+                       /* Sheeit! We have nothing to tell. */
+                       /* If we have recent negative entry, be silent. */
+                       if (dbdat.data && NEG_VALID(dbdat.data)) {
+                               if (NEG_CNT(dbdat.data) >= active_probing) {
+                                       stats.app_suppressed++;
+                                       return 0;
+                               }
+                               do_acct = 1;
+                       }
+               }
+
+               if (active_probing &&
+                   queue_active_probe(ndm->ndm_ifindex, key.addr) == 0 &&
+                   do_acct) {
+                       NEG_CNT(dbdat.data)++;
+                       dbase->put(dbase, &dbkey, &dbdat, 0);
+               }
+       } else if (n->nlmsg_type == RTM_NEWNEIGH) {
+               if (n->nlmsg_flags&NLM_F_REQUEST)
+                       return 0;
+
+               if (ndm->ndm_state&NUD_FAILED) {
+                       /* Kernel was not able to resolve. Host is dead.
+                        * Create negative entry if it is not present
+                        * or renew it if it is too old. */
+                       if (!dbdat.data ||
+                           !IS_NEG(dbdat.data) ||
+                           !NEG_VALID(dbdat.data)) {
+                               __u8 ndata[6];
+                               stats.kern_neg++;
+                               prepare_neg_entry(ndata, time(NULL));
+                               dbdat.data = ndata;
+                               dbdat.size = sizeof(ndata);
+                               dbase->put(dbase, &dbkey, &dbdat, 0);
+                       }
+               } else if (tb[NDA_LLADDR]) {
+                       if (dbdat.data && !IS_NEG(dbdat.data)) {
+                               if (memcmp(RTA_DATA(tb[NDA_LLADDR]), dbdat.data, dbdat.size) == 0)
+                                       return 0;
+                               stats.kern_change++;
+                       } else {
+                               stats.kern_new++;
+                       }
+                       dbdat.data = RTA_DATA(tb[NDA_LLADDR]);
+                       dbdat.size = RTA_PAYLOAD(tb[NDA_LLADDR]);
+                       dbase->put(dbase, &dbkey, &dbdat, 0);
+               }
+       }
+       return 0;
+}
+
+void load_initial_table(void)
+{
+       rtnl_wilddump_request(&rth, AF_INET, RTM_GETNEIGH);
+}
+
+void get_kern_msg(void)
+{
+       int status;
+       struct nlmsghdr *h;
+       struct sockaddr_nl nladdr;
+       struct iovec iov;
+       char   buf[8192];
+       struct msghdr msg = {
+               (void*)&nladdr, sizeof(nladdr),
+               &iov,   1,
+               NULL,   0,
+               0
+       };
+
+       memset(&nladdr, 0, sizeof(nladdr));
+
+       iov.iov_base = buf;
+       iov.iov_len = sizeof(buf);
+
+       status = recvmsg(rth.fd, &msg, MSG_DONTWAIT);
+
+       if (status <= 0)
+               return;
+
+       if (msg.msg_namelen != sizeof(nladdr))
+               return;
+
+       if (nladdr.nl_pid)
+               return;
+
+       for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
+               int len = h->nlmsg_len;
+               int l = len - sizeof(*h);
+
+               if (l < 0 || len > status)
+                       return;
+
+               if (do_one_request(h) < 0)
+                       return;
+
+               status -= NLMSG_ALIGN(len);
+               h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
+       }
+}
+
+/* Receive gratuitous ARP messages and store them, that's all. */
+void get_arp_pkt(void)
+{
+       unsigned char buf[1024];
+       struct sockaddr_ll sll;
+       int sll_len = sizeof(sll);
+       struct arphdr *a = (struct arphdr*)buf;
+       struct dbkey key;
+       DBT dbkey, dbdat;
+       int n;
+
+       n = recvfrom(pset[0].fd, buf, sizeof(buf), MSG_DONTWAIT, (struct sockaddr*)&sll, &sll_len);
+       if (n < 0) {
+               if (errno != EINTR && errno != EAGAIN)
+                       syslog(LOG_ERR, "recvfrom: %m");
+               return;
+       }
+
+       if (ifnum && !handle_if(sll.sll_ifindex))
+               return;
+
+       /* Sanity checks */
+
+       if (n < sizeof(*a) ||
+           (a->ar_op != htons(ARPOP_REQUEST) &&
+            a->ar_op != htons(ARPOP_REPLY)) ||
+           a->ar_pln != 4 ||
+           a->ar_pro != htons(ETH_P_IP) ||
+           a->ar_hln != sll.sll_halen ||
+           sizeof(*a) + 2*4 + 2*a->ar_hln > n)
+               return;
+
+       key.iface = sll.sll_ifindex;
+       memcpy(&key.addr, (char*)(a+1) + a->ar_hln, 4);
+
+       /* DAD message, ignore. */ 
+       if (key.addr == 0)
+               return;
+
+       dbkey.data = &key;
+       dbkey.size = sizeof(key);
+
+       if (dbase->get(dbase, &dbkey, &dbdat, 0) == 0 && !IS_NEG(dbdat.data)) {
+               if (memcmp(dbdat.data, a+1, dbdat.size) == 0)
+                       return;
+               stats.arp_change++;
+       } else {
+               stats.arp_new++;
+       }
+
+       dbdat.data = a+1;
+       dbdat.size = a->ar_hln;
+       dbase->put(dbase, &dbkey, &dbdat, 0);
+}
+
+void catch_signal(int sig, void (*handler)(int))
+{
+       struct sigaction sa;
+
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_handler = handler;
+#ifdef SA_INTERRUPT
+       sa.sa_flags = SA_INTERRUPT;
+#endif 
+       sigaction(sig, &sa, NULL);
+}
+
+#include <setjmp.h>
+sigjmp_buf env;
+volatile int in_poll;
+
+void sig_exit(int signo)
+{
+       do_exit = 1;
+       if (in_poll)
+               siglongjmp(env, 1);
+}
+
+void sig_sync(int signo)
+{
+       do_sync = 1;
+       if (in_poll)
+               siglongjmp(env, 1);
+}
+
+void sig_stats(int signo)
+{
+       do_sync = 1;
+       do_stats = 1;
+       if (in_poll)
+               siglongjmp(env, 1);
+}
+
+void send_stats(void)
+{
+       syslog(LOG_INFO, "arp_rcv: n%lu c%lu app_rcv: tot %lu hits %lu bad %lu neg %lu sup %lu",
+              stats.arp_new, stats.arp_change,
+
+              stats.app_recv, stats.app_success,
+              stats.app_bad, stats.app_neg, stats.app_suppressed
+              );
+       syslog(LOG_INFO, "kern: n%lu c%lu neg %lu arp_send: %lu rlim %lu",
+              stats.kern_new, stats.kern_change, stats.kern_neg,
+
+              stats.probes_sent, stats.probes_suppressed
+              );
+       do_stats = 0;
+}
+
+
+int main(int argc, char **argv)
+{
+       int opt;
+       int do_list = 0;
+       char *do_load = NULL;
+
+       while ((opt = getopt(argc, argv, "h?b:lf:a:n:kR:B:")) != EOF) {
+               switch (opt) {
+               case 'b':
+                       dbname = optarg;
+                       break;
+               case 'f':
+                       if (do_load) {
+                               fprintf(stderr, "Duplicate option -f\n");
+                               usage();
+                       }
+                       do_load = optarg;
+                       break;
+               case 'l':
+                       do_list = 1;
+                       break;
+               case 'a':
+                       active_probing = atoi(optarg);
+                       break;
+               case 'n':
+                       negative_timeout = atoi(optarg);
+                       break;
+               case 'k':
+                       no_kernel_broadcasts = 1;
+                       break;
+               case 'R':
+                       if ((broadcast_rate = atoi(optarg)) <= 0 ||
+                           (broadcast_rate = 1000/broadcast_rate) <= 0) {
+                               fprintf(stderr, "Invalid ARP rate\n");
+                               exit(-1);
+                       }
+                       break;
+               case 'B':
+                       if ((broadcast_burst = atoi(optarg)) <= 0 ||
+                           (broadcast_burst = 1000*broadcast_burst) <= 0) {
+                               fprintf(stderr, "Invalid ARP burst\n");
+                               exit(-1);
+                       }
+                       break;
+               case 'h':
+               case '?':
+               default:
+                       usage();
+               }
+       }
+       argc -= optind;
+       argv += optind;
+
+       if (argc > 0) {
+               ifnum = argc;
+               ifnames = argv;
+               ifvec = malloc(argc*sizeof(int));
+               if (!ifvec) {
+                       perror("malloc");
+                       exit(-1);
+               }
+       }
+
+       if ((udp_sock = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+               perror("socket");
+               exit(-1);
+       }
+
+        if (ifnum) {
+               int i;
+               struct ifreq ifr;
+               memset(&ifr, 0, sizeof(ifr));
+               for (i=0; i<ifnum; i++) {
+                       strncpy(ifr.ifr_name, ifnames[i], IFNAMSIZ);
+                       if (ioctl(udp_sock, SIOCGIFINDEX, &ifr)) {
+                               perror("ioctl(SIOCGIFINDEX)");
+                               exit(-1);;
+                       }
+                       ifvec[i] = ifr.ifr_ifindex;
+               }
+       }
+
+       dbase = dbopen(dbname, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
+       if (dbase == NULL) {
+               perror("db_open");
+               exit(-1);
+       }
+
+       if (do_load) {
+               char buf[128];
+               FILE *fp;
+               struct dbkey k;
+               DBT dbkey, dbdat;
+
+               dbkey.data = &k;
+               dbkey.size = sizeof(k);
+
+               if (strcmp(do_load, "-") == 0 || strcmp(do_load, "--") == 0) {
+                       fp = stdin;
+               } else if ((fp = fopen(do_load, "r")) == NULL) {
+                       perror("fopen");
+                       goto do_abort;
+               }
+
+               buf[sizeof(buf)-1] = 0;
+               while (fgets(buf, sizeof(buf)-1, fp)) {
+                       __u8 b1[6];
+                       char ipbuf[128];
+                       char macbuf[128];
+
+                       if (buf[0] == '#')
+                               continue;
+
+                       if (sscanf(buf, "%u%s%s", &k.iface, ipbuf, macbuf) != 3) {
+                               fprintf(stderr, "Wrong format of input file \"%s\"\n", do_load);
+                               goto do_abort;
+                       }
+                       if (strncmp(macbuf, "FAILED:", 7) == 0)
+                               continue;
+                       if (!inet_aton(ipbuf, (struct in_addr*)&k.addr)) {
+                               fprintf(stderr, "Invalid IP address: \"%s\"\n", ipbuf);
+                               goto do_abort;
+                       }
+                       dbdat.data = hexstring_a2n(macbuf, b1, 6);
+                       if (dbdat.data == NULL)
+                               goto do_abort;
+                       dbdat.size = 6;
+
+                       if (dbase->put(dbase, &dbkey, &dbdat, 0)) {
+                               perror("hash->put");
+                               goto do_abort;
+                       }
+               }
+               dbase->sync(dbase, 0);
+               if (fp != stdin)
+                       fclose(fp);
+       }
+
+       if (do_list) {
+               DBT dbkey, dbdat;
+               printf("%-8s %-15s %s\n", "#Ifindex", "IP", "MAC");
+               while (dbase->seq(dbase, &dbkey, &dbdat, R_NEXT) == 0) {
+                       struct dbkey *key = dbkey.data; 
+                       if (handle_if(key->iface)) {
+                               if (!IS_NEG(dbdat.data)) {
+                                       __u8 b1[18];
+                                       printf("%-8d %-15s %s\n",
+                                              key->iface,
+                                              inet_ntoa(*(struct in_addr*)&key->addr),
+                                              hexstring_n2a(dbdat.data, 6, b1, 18));
+                               } else {
+                                       printf("%-8d %-15s FAILED: %dsec ago\n",
+                                              key->iface,
+                                              inet_ntoa(*(struct in_addr*)&key->addr),
+                                              NEG_AGE(dbdat.data));
+                               }
+                       }
+               }
+       }
+
+       if (do_load || do_list)
+               goto out;
+
+       pset[0].fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+       if (pset[0].fd < 0) {
+               perror("socket");
+               exit(-1);
+       }
+
+       if (1) {
+               struct sockaddr_ll sll;
+               memset(&sll, 0, sizeof(sll));
+               sll.sll_family = AF_PACKET;
+               sll.sll_protocol = htons(ETH_P_ARP);
+               sll.sll_ifindex = (ifnum == 1 ? ifvec[0] : 0);
+               if (bind(pset[0].fd, (struct sockaddr*)&sll, sizeof(sll)) < 0) {
+                       perror("bind");
+                       goto do_abort;
+               }
+       }
+
+       if (rtnl_open(&rth, RTMGRP_NEIGH) < 0) {
+               perror("rtnl_open");
+               goto do_abort;
+       }
+       pset[1].fd = rth.fd;
+
+       load_initial_table();
+
+       if (1) {
+               int fd;
+               pid_t pid = fork();
+
+               if (pid > 0)
+                       _exit(0);
+               if (pid < 0) {
+                       perror("arpd: fork");
+                       goto do_abort;
+               }
+
+               chdir("/");
+               fd = open("/dev/null", O_RDWR);
+               if (fd >= 0) {
+                       dup2(fd, 0);
+                       dup2(fd, 1);
+                       dup2(fd, 2);
+                       if (fd > 2)
+                               close(fd);
+               }
+               setsid();
+       }
+
+       openlog("arpd", LOG_PID | LOG_CONS, LOG_DAEMON);
+       catch_signal(SIGINT, sig_exit);
+       catch_signal(SIGTERM, sig_exit);
+       catch_signal(SIGHUP, sig_sync);
+       catch_signal(SIGUSR1, sig_stats);
+
+#define EVENTS (POLLIN|POLLPRI|POLLERR|POLLHUP)
+       pset[0].events = EVENTS;
+       pset[0].revents = 0;
+       pset[1].events = EVENTS;
+       pset[1].revents = 0;
+
+       sigsetjmp(env, 1);
+
+       for (;;) {
+               in_poll = 1;
+
+               if (do_exit)
+                       break;
+               if (do_sync) {
+                       in_poll = 0;
+                       dbase->sync(dbase, 0);
+                       do_sync = 0;
+                       in_poll = 1;
+               }
+               if (do_stats)
+                       send_stats();
+               if (poll(pset, 2, 30000) > 0) {
+                       in_poll = 0;
+                       if (pset[0].revents&EVENTS)
+                               get_arp_pkt();
+                       if (pset[1].revents&EVENTS)
+                               get_kern_msg();
+               } else {
+                       do_sync = 1;
+               }
+       }
+
+       undo_sysctl_adjustments();
+out:
+       dbase->close(dbase);
+       exit(0);
+
+do_abort:
+       dbase->close(dbase);
+       exit(-1);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..67489b9a753bd880a2c9aa3c4326852a53c733eb 100644 (file)
@@ -0,0 +1,729 @@
+/*
+ * ifstat.c    handy utility to read net interface statistics
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <fnmatch.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/poll.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <math.h>
+
+#include <libnetlink.h>
+#include <linux/netdevice.h>
+
+#include <SNAPSHOT.h>
+
+int dump_zeros = 0;
+int reset_history = 0;
+int ignore_history = 0;
+int no_output = 0;
+int no_update = 0;
+int scan_interval = 0;
+int time_constant = 0;
+int show_errors = 0;
+double W;
+char **patterns;
+int npatterns;
+
+char info_source[128];
+int source_mismatch;
+
+#define MAXS (sizeof(struct net_device_stats)/sizeof(unsigned long))
+
+struct ifstat_ent
+{
+       struct ifstat_ent       *next;
+       char                    *name;
+       int                     ifindex;
+       unsigned long long      val[MAXS];
+       double                  rate[MAXS];
+       unsigned long           ival[MAXS];
+};
+
+struct ifstat_ent *kern_db;
+struct ifstat_ent *hist_db;
+
+int match(char *id)
+{
+       int i;
+
+       if (npatterns == 0)
+               return 1;
+
+       for (i=0; i<npatterns; i++) {
+               if (!fnmatch(patterns[i], id, 0))
+                       return 1;
+       }
+       return 0;
+}
+
+int get_nlmsg(struct sockaddr_nl *who, struct nlmsghdr *m, void *arg)
+{
+       struct ifinfomsg *ifi = NLMSG_DATA(m);
+       struct rtattr * tb[IFLA_MAX+1];
+       int len = m->nlmsg_len;
+       struct ifstat_ent *n;
+       int i;
+
+       if (m->nlmsg_type != RTM_NEWLINK)
+               return 0;
+
+       len -= NLMSG_LENGTH(sizeof(*ifi));
+       if (len < 0)
+               return -1;
+
+       if (!(ifi->ifi_flags&IFF_UP))
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
+       if (tb[IFLA_IFNAME] == NULL || tb[IFLA_STATS] == NULL)
+               return 0;
+
+       n = malloc(sizeof(*n));
+       if (!n)
+               abort();
+       n->ifindex = ifi->ifi_index;
+       n->name = strdup(RTA_DATA(tb[IFLA_IFNAME]));
+       memcpy(&n->ival, RTA_DATA(tb[IFLA_STATS]), sizeof(n->ival));
+       memset(&n->rate, 0, sizeof(n->rate));
+       for (i=0; i<MAXS; i++)
+               n->val[i] = n->ival[i];
+       n->next = kern_db;
+       kern_db = n;
+       return 0;
+}
+
+void load_info(void)
+{
+       struct ifstat_ent *db, *n;
+       struct rtnl_handle rth;
+
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       if (rtnl_wilddump_request(&rth, AF_INET, RTM_GETLINK) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       if (rtnl_dump_filter(&rth, get_nlmsg, NULL, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+
+       rtnl_close(&rth);
+
+       db = kern_db;
+       kern_db = NULL;
+
+       while (db) {
+               n = db;
+               db = db->next;
+               n->next = kern_db;
+               kern_db = n;
+       }
+}
+
+void load_raw_table(FILE *fp)
+{
+       char buf[4096];
+       struct ifstat_ent *db = NULL;
+       struct ifstat_ent *n;
+
+       while (fgets(buf, sizeof(buf), fp) != NULL) {
+               char *p;
+               char *next;
+               int i;
+
+               if (buf[0] == '#') {
+                       buf[strlen(buf)-1] = 0;
+                       if (info_source[0] && strcmp(info_source, buf+1))
+                               source_mismatch = 1;
+                       strncpy(info_source, buf+1, sizeof(info_source)-1);
+                       continue;
+               }
+               if ((n = malloc(sizeof(*n))) == NULL)
+                       abort();
+
+               if (!(p = strchr(buf, ' ')))
+                       abort();
+               *p++ = 0;
+
+               if (sscanf(buf, "%d", &n->ifindex) != 1)
+                       abort();
+               if (!(next = strchr(p, ' ')))
+                       abort();
+               *next++ = 0;
+
+               n->name = strdup(p);
+               p = next;
+
+               for (i=0; i<MAXS; i++) {
+                       unsigned rate;
+                       if (!(next = strchr(p, ' ')))
+                               abort();
+                       *next++ = 0;
+                       if (sscanf(p, "%llu", n->val+i) != 1)
+                               abort();
+                       n->ival[i] = (unsigned long)n->val[i];
+                       p = next;
+                       if (!(next = strchr(p, ' ')))
+                               abort();
+                       *next++ = 0;
+                       if (sscanf(p, "%u", &rate) != 1)
+                               abort();
+                       n->rate[i] = rate;
+                       p = next;
+               }
+               n->next = db;
+               db = n;
+       }
+
+       while (db) {
+               n = db;
+               db = db->next;
+               n->next = kern_db;
+               kern_db = n;
+       }
+}
+
+void dump_raw_db(FILE *fp, int to_hist)
+{
+       struct ifstat_ent *n, *h;
+       h = hist_db;
+       fprintf(fp, "#%s\n", info_source);
+
+       for (n=kern_db; n; n=n->next) {
+               int i;
+               unsigned long long *vals = n->val;
+               double *rates = n->rate;
+               if (!match(n->name)) {
+                       struct ifstat_ent *h1;
+                       if (!to_hist)
+                               continue;
+                       for (h1 = h; h1; h1 = h1->next) {
+                               if (h1->ifindex == n->ifindex) {
+                                       vals = h1->val;
+                                       rates = h1->rate;
+                                       h = h1->next;
+                                       break;
+                               }
+                       }
+               }
+               fprintf(fp, "%d %s ", n->ifindex, n->name);
+               for (i=0; i<MAXS; i++)
+                       fprintf(fp, "%llu %u ", vals[i], (unsigned)rates[i]);
+               fprintf(fp, "\n");
+       }
+}
+
+
+void format_rate(FILE *fp, unsigned long long *vals, double *rates, int i)
+{
+       char temp[64];
+       if (vals[i] > 1024*1024*1024)
+               fprintf(fp, "%7lluM ", vals[i]/(1024*1024));
+       else if (vals[i] > 1024*1024)
+               fprintf(fp, "%7lluK ", vals[i]/1024);
+       else
+               fprintf(fp, "%8llu ", vals[i]);
+
+       if (rates[i] > 1024*1024) {
+               sprintf(temp, "%uM", (unsigned)(rates[i]/(1024*1024)));
+               fprintf(fp, "%-6s ", temp);
+       } else if (rates[i] > 1024) {
+               sprintf(temp, "%uK", (unsigned)(rates[i]/1024));
+               fprintf(fp, "%-6s ", temp);
+       } else
+               fprintf(fp, "%-6u ", (unsigned)rates[i]);
+}
+
+void format_pair(FILE *fp, unsigned long long *vals, int i, int k)
+{
+       char temp[64];
+       if (vals[i] > 1024*1024*1024)
+               fprintf(fp, "%7lluM ", vals[i]/(1024*1024));
+       else if (vals[i] > 1024*1024)
+               fprintf(fp, "%7lluK ", vals[i]/1024);
+       else
+               fprintf(fp, "%8llu ", vals[i]);
+
+       if (vals[k] > 1024*1024*1024) {
+               sprintf(temp, "%uM", (unsigned)(vals[k]/(1024*1024)));
+               fprintf(fp, "%-6s ", temp);
+       } else if (vals[k] > 1024*1024) {
+               sprintf(temp, "%uK", (unsigned)(vals[k]/1024));
+               fprintf(fp, "%-6s ", temp);
+       } else
+               fprintf(fp, "%-6u ", (unsigned)vals[k]);
+}
+
+void print_head(FILE *fp)
+{
+       fprintf(fp, "#%s\n", info_source);
+       fprintf(fp, "%-15s ", "Interface");
+
+       fprintf(fp, "%8s/%-6s ", "RX Pkts", "Rate");
+       fprintf(fp, "%8s/%-6s ", "TX Pkts", "Rate");
+       fprintf(fp, "%8s/%-6s ", "RX Data", "Rate");
+       fprintf(fp, "%8s/%-6s\n","TX Data", "Rate");
+
+       if (!show_errors) {
+               fprintf(fp, "%-15s ", "");
+               fprintf(fp, "%8s/%-6s ", "RX Errs", "Drop");
+               fprintf(fp, "%8s/%-6s ", "TX Errs", "Drop");
+               fprintf(fp, "%8s/%-6s ", "RX Over", "Rate");
+               fprintf(fp, "%8s/%-6s\n","TX Coll", "Rate");
+       } else {
+               fprintf(fp, "%-15s ", "");
+               fprintf(fp, "%8s/%-6s ", "RX Errs", "Rate");
+               fprintf(fp, "%8s/%-6s ", "RX Drop", "Rate");
+               fprintf(fp, "%8s/%-6s ", "RX Over", "Rate");
+               fprintf(fp, "%8s/%-6s\n","RX Leng", "Rate");
+
+               fprintf(fp, "%-15s ", "");
+               fprintf(fp, "%8s/%-6s ", "RX Crc", "Rate");
+               fprintf(fp, "%8s/%-6s ", "RX Frm", "Rate");
+               fprintf(fp, "%8s/%-6s ", "RX Fifo", "Rate");
+               fprintf(fp, "%8s/%-6s\n","RX Miss", "Rate");
+
+               fprintf(fp, "%-15s ", "");
+               fprintf(fp, "%8s/%-6s ", "TX Errs", "Rate");
+               fprintf(fp, "%8s/%-6s ", "TX Drop", "Rate");
+               fprintf(fp, "%8s/%-6s ", "TX Coll", "Rate");
+               fprintf(fp, "%8s/%-6s\n","TX Carr", "Rate");
+
+               fprintf(fp, "%-15s ", "");
+               fprintf(fp, "%8s/%-6s ", "TX Abrt", "Rate");
+               fprintf(fp, "%8s/%-6s ", "TX Fifo", "Rate");
+               fprintf(fp, "%8s/%-6s ", "TX Hear", "Rate");
+               fprintf(fp, "%8s/%-6s\n","TX Wind", "Rate");
+       }
+}
+
+void print_one_if(FILE *fp, struct ifstat_ent *n, unsigned long long *vals)
+{
+       int i;
+       fprintf(fp, "%-15s ", n->name);
+       for (i=0; i<4; i++)
+               format_rate(fp, vals, n->rate, i);
+       fprintf(fp, "\n");
+
+       if (!show_errors) {
+               fprintf(fp, "%-15s ", "");
+               format_pair(fp, vals, 4, 6);
+               format_pair(fp, vals, 5, 7);
+               format_rate(fp, vals, n->rate, 11);
+               format_rate(fp, vals, n->rate, 9);
+               fprintf(fp, "\n");
+       } else {
+               fprintf(fp, "%-15s ", "");
+               format_rate(fp, vals, n->rate, 4);
+               format_rate(fp, vals, n->rate, 6);
+               format_rate(fp, vals, n->rate, 11);
+               format_rate(fp, vals, n->rate, 10);
+               fprintf(fp, "\n");
+
+               fprintf(fp, "%-15s ", "");
+               format_rate(fp, vals, n->rate, 12);
+               format_rate(fp, vals, n->rate, 13);
+               format_rate(fp, vals, n->rate, 14);
+               format_rate(fp, vals, n->rate, 15);
+               fprintf(fp, "\n");
+
+               fprintf(fp, "%-15s ", "");
+               format_rate(fp, vals, n->rate, 5);
+               format_rate(fp, vals, n->rate, 7);
+               format_rate(fp, vals, n->rate, 9);
+               format_rate(fp, vals, n->rate, 17);
+               fprintf(fp, "\n");
+
+               fprintf(fp, "%-15s ", "");
+               format_rate(fp, vals, n->rate, 16);
+               format_rate(fp, vals, n->rate, 18);
+               format_rate(fp, vals, n->rate, 19);
+               format_rate(fp, vals, n->rate, 20);
+               fprintf(fp, "\n");
+       }
+}
+
+
+void dump_kern_db(FILE *fp)
+{
+       struct ifstat_ent *n, *h;
+       h = hist_db;
+
+       print_head(fp);
+
+       for (n=kern_db; n; n=n->next) {
+               if (!match(n->name))
+                       continue;
+               print_one_if(fp, n, n->val);
+       }
+}
+
+
+void dump_incr_db(FILE *fp)
+{
+       struct ifstat_ent *n, *h;
+       h = hist_db;
+
+       print_head(fp);
+
+       for (n=kern_db; n; n=n->next) {
+               int i;
+               unsigned long long vals[MAXS];
+               struct ifstat_ent *h1;
+
+               memcpy(vals, n->val, sizeof(vals));
+
+               for (h1 = h; h1; h1 = h1->next) {
+                       if (h1->ifindex == n->ifindex) {
+                               for (i = 0; i < MAXS; i++)
+                                       vals[i] -= h1->val[i];
+                               h = h1->next;
+                               break;
+                       }
+               }
+               if (!match(n->name))
+                       continue;
+               print_one_if(fp, n, vals);
+       }
+}
+
+
+static int children;
+
+void sigchild(int signo)
+{
+}
+
+void update_db(int interval)
+{
+       struct ifstat_ent *n, *h;
+
+       n = kern_db;
+       kern_db = NULL;
+
+       load_info();
+
+       h = kern_db;
+       kern_db = n;
+
+       for (n = kern_db; n; n = n->next) {
+               struct ifstat_ent *h1;
+               for (h1 = h; h1; h1 = h1->next) {
+                       if (h1->ifindex == n->ifindex) {
+                               int i;
+                               for (i = 0; i < MAXS; i++) {
+                                       if ((long)(h1->ival[i] - n->ival[i]) < 0) {
+                                               memset(n->ival, 0, sizeof(n->ival)); 
+                                               break;
+                                       }
+                               }
+                               for (i = 0; i < MAXS; i++) { 
+                                       double sample;
+                                       unsigned long incr = h1->ival[i] - n->ival[i];
+                                       n->val[i] += incr;
+                                       n->ival[i] = h1->ival[i];
+                                       sample = (double)(incr*1000)/interval;
+                                       if (interval >= scan_interval) {
+                                               n->rate[i] += W*(sample-n->rate[i]);
+                                       } else if (interval >= 1000) {
+                                               if (interval >= time_constant) {
+                                                       n->rate[i] = sample;
+                                               } else {
+                                                       double w = W*(double)interval/scan_interval;
+                                                       n->rate[i] += w*(sample-n->rate[i]);
+                                               }
+                                       }
+                               }
+
+                               while (h != h1) {
+                                       struct ifstat_ent *tmp = h;
+                                       h = h->next;
+                                       free(tmp->name);
+                                       free(tmp);
+                               };
+                               h = h1->next;
+                               free(h1->name);
+                               free(h1);
+                               break;
+                       }
+               }
+       }
+}
+
+#define T_DIFF(a,b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000)
+
+
+void server_loop(int fd)
+{
+       struct timeval snaptime;
+       struct pollfd p;
+       p.fd = fd;
+       p.events = p.revents = POLLIN;
+
+       sprintf(info_source, "%d.%lu sampling_interval=%d time_const=%d",
+               getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
+
+       load_info();
+
+       for (;;) {
+               int status;
+               int tdiff;
+               struct timeval now;
+               gettimeofday(&now, NULL);
+               tdiff = T_DIFF(now, snaptime);
+               if (tdiff >= scan_interval) {
+                       update_db(tdiff);
+                       snaptime = now;
+                       tdiff = 0;
+               }
+               if (poll(&p, 1, tdiff + scan_interval) > 0
+                   && (p.revents&POLLIN)) {
+                       int clnt = accept(fd, NULL, NULL);
+                       if (clnt >= 0) {
+                               pid_t pid;
+                               if (children >= 5) {
+                                       close(clnt);
+                               } else if ((pid = fork()) != 0) {
+                                       if (pid>0)
+                                               children++;
+                                       close(clnt);
+                               } else {
+                                       FILE *fp = fdopen(clnt, "w");
+                                       if (fp) {
+                                               if (tdiff > 0)
+                                                       update_db(tdiff);
+                                               dump_raw_db(fp, 0);
+                                       }
+                                       exit(0);
+                               }
+                       }
+               }
+               while (children && waitpid(-1, &status, WNOHANG) > 0)
+                       children--;
+       }
+}
+
+int verify_forging(int fd)
+{
+       struct ucred cred;
+       int olen = sizeof(cred);
+       if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
+           olen < sizeof(cred))
+               return -1;
+       if (cred.uid == getuid() || cred.uid == 0)
+               return 0;
+       return -1;
+}
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr,
+"Usage: ifstat [ -h?vVzrnasd:t: ] [ PATTERN [ PATTERN ] ]\n"
+               );
+       exit(-1);
+}
+
+
+int main(int argc, char *argv[])
+{
+       char hist_name[128];
+       struct sockaddr_un sun;
+       FILE *hist_fp = NULL;
+       int ch;
+       int fd;
+
+       while ((ch = getopt(argc, argv, "h?vVzrnasd:t:e")) != EOF) {
+               switch(ch) {
+               case 'z':
+                       dump_zeros = 1;
+                       break;
+               case 'r':
+                       reset_history = 1;
+                       break;
+               case 'a':
+                       ignore_history = 1;
+                       break;
+               case 's':
+                       no_update = 1;
+                       break;
+               case 'n':
+                       no_output = 1;
+                       break;
+               case 'e':
+                       show_errors = 1;
+                       break;
+               case 'd':
+                       scan_interval = 1000*atoi(optarg);
+                       break;
+               case 't':
+                       if (sscanf(optarg, "%d", &time_constant) != 1 ||
+                           time_constant <= 0) {
+                               fprintf(stderr, "ifstat: invalid time constant divisor\n");
+                               exit(-1);
+                       }
+                       break;
+               case 'v':
+               case 'V':
+                       printf("ifstat utility, iproute2-ss%s\n", SNAPSHOT);
+                       exit(0);
+               case 'h':
+               case '?':
+               default:
+                       usage();
+               }
+       }
+
+       argc -= optind;
+       argv += optind;
+
+       sun.sun_family = AF_UNIX;
+       sun.sun_path[0] = 0;
+       sprintf(sun.sun_path+1, "ifstat%d", getuid());
+
+       if (scan_interval > 0) {
+               if (time_constant == 0)
+                       time_constant = 60;
+               time_constant *= 1000;
+               W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant);
+               if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
+                       perror("ifstat: socket");
+                       exit(-1);
+               }
+               if (bind(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) < 0) {
+                       perror("ifstat: bind");
+                       exit(-1);
+               }
+               if (listen(fd, 5) < 0) {
+                       perror("ifstat: listen");
+                       exit(-1);
+               }
+               if (fork())
+                       exit(0);
+               chdir("/");
+               close(0); close(1); close(2); setsid();
+               signal(SIGPIPE, SIG_IGN);
+               signal(SIGCHLD, sigchild);
+               server_loop(fd);
+               exit(0);
+       }
+
+       patterns = argv;
+       npatterns = argc;
+
+       if (getenv("IFSTAT_HISTORY"))
+               snprintf(hist_name, sizeof(hist_name), getenv("IFSTAT_HISTORY"));
+       else
+               sprintf(hist_name, "/tmp/.ifstat.u%d", getuid());
+
+       if (reset_history)
+               unlink(hist_name);
+
+       if (!ignore_history || !no_update) {
+               struct stat stb;
+
+               fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600);
+               if (fd < 0) {
+                       perror("ifstat: open history file");
+                       exit(-1);
+               }
+               if ((hist_fp = fdopen(fd, "r+")) == NULL) {
+                       perror("ifstat: fdopen history file");
+                       exit(-1);
+               }
+               if (flock(fileno(hist_fp), LOCK_EX)) {
+                       perror("ifstat: flock history file");
+                       exit(-1);
+               }
+               if (fstat(fileno(hist_fp), &stb) != 0) {
+                       perror("ifstat: fstat history file");
+                       exit(-1);
+               }
+               if (stb.st_nlink != 1 || stb.st_uid != getuid()) {
+                       fprintf(stderr, "ifstat: something is so wrong with history file, that I prefer not to proceed.\n");
+                       exit(-1);
+               }
+               if (!ignore_history) {
+                       FILE *tfp;
+                       long uptime;
+                       if ((tfp = fopen("/proc/uptime", "r")) != NULL) {
+                               if (fscanf(tfp, "%ld", &uptime) != 1)
+                                       uptime = -1;
+                               fclose(tfp);
+                       }
+                       if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) {
+                               fprintf(stderr, "ifstat: history is aged out, resetting\n");
+                               ftruncate(fileno(hist_fp), 0);
+                       }
+               }
+
+               load_raw_table(hist_fp);
+
+               hist_db = kern_db;
+               kern_db = NULL;
+       }
+
+       if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 &&
+           (connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0
+            || (strcpy(sun.sun_path+1, "ifstat0"),
+                connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0))
+           && verify_forging(fd) == 0) {
+               FILE *sfp = fdopen(fd, "r");
+               load_raw_table(sfp);
+               if (hist_db && source_mismatch) {
+                       fprintf(stderr, "ifstat: history is stale, ignoring it.\n");
+                       hist_db = NULL;
+               }
+               fclose(sfp);
+       } else {
+               if (fd >= 0)
+                       close(fd);
+               if (hist_db && info_source[0] && strcmp(info_source, "kernel")) {
+                       fprintf(stderr, "ifstat: history is stale, ignoring it.\n");
+                       hist_db = NULL;
+                       info_source[0] = 0;
+               }
+               load_info();
+               if (info_source[0] == 0)
+                       strcpy(info_source, "kernel");
+       }
+
+       if (!no_output) {
+               if (ignore_history || hist_db == NULL)
+                       dump_kern_db(stdout);
+               else
+                       dump_incr_db(stdout);
+       }
+       if (!no_update) {
+               ftruncate(fileno(hist_fp), 0);
+               rewind(hist_fp);
+               dump_raw_db(hist_fp, 1);
+               fflush(hist_fp);
+       }
+       exit(0);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6d13c8eea5407f31ead8a930f99f988e5dbe0fdd 100644 (file)
@@ -0,0 +1,53 @@
+#! /bin/bash
+
+echo -n "Send network configuration summary to [ENTER means kuznet@ms2.inr.ac.ru] "
+IFS="" read mail || exit 1
+[ -z "$mail" ] && mail=kuznet@ms2.inr.ac.ru
+
+
+netbug=""
+while [ "$netbug" = "" ]; do
+       netbug=`echo netbug.$$.$RANDOM`
+       if [ -e /tmp/$netbug ]; then
+               netbug=""
+       fi
+done
+
+tmppath=/tmp/$netbug
+
+trap "rm -rf $tmppath $tmppath.tar.gz" 0 SIGINT
+
+mkdir $tmppath
+mkdir $tmppath/net
+
+cat /proc/slabinfo > $tmppath/slabinfo
+cat /proc/net/netstat > $tmppath/net/netstat
+cat /proc/net/unix > $tmppath/net/unix
+cat /proc/net/packet > $tmppath/net/packet
+cat /proc/net/netlink > $tmppath/net/netlink
+cat /proc/net/psched > $tmppath/net/psched
+cat /proc/net/softnet_stat > $tmppath/net/softnet_stat
+cat /proc/net/sockstat > $tmppath/net/sockstat
+cat /proc/net/tcp > $tmppath/net/tcp
+cat /proc/net/udp > $tmppath/net/udp
+cat /proc/net/raw > $tmppath/net/raw
+cat /proc/net/snmp > $tmppath/net/snmp
+
+ss -aioem -D $tmppath/tcpdiag
+
+if [ -e /proc/net/tcp6 ]; then
+       cat /proc/net/sockstat6 > $tmppath/net/sockstat6
+       cat /proc/net/tcp6 > $tmppath/net/tcp6
+       cat /proc/net/udp6 > $tmppath/net/udp6
+       cat /proc/net/raw6 > $tmppath/net/raw6
+       cat /proc/net/snmp6 > $tmppath/net/snmp6
+fi
+
+cd /tmp
+tar c $netbug | gzip -9c > $netbug.tar.gz
+
+uuencode $netbug.tar.gz $netbug.tar.gz | mail -s $netbug "$mail"
+
+echo "Sending to <$mail>; subject is $netbug"
+
+exit 0
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9580ccf348036bd4e846d4ac4a0a8cad5ad649b5 100644 (file)
@@ -0,0 +1,614 @@
+/*
+ * nstat.c     handy utility to read counters /proc/net/netstat and snmp
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <fnmatch.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/poll.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <math.h>
+
+#include <SNAPSHOT.h>
+
+int dump_zeros = 0;
+int reset_history = 0;
+int ignore_history = 0;
+int no_output = 0;
+int no_update = 0;
+int scan_interval = 0;
+int time_constant = 0;
+double W;
+char **patterns;
+int npatterns;
+
+char info_source[128];
+int source_mismatch;
+
+int generic_proc_open(char *env, char *name)
+{
+       char store[128];
+       char *p = getenv(env);
+       if (!p) {
+               p = getenv("PROC_ROOT") ? : "/proc";
+               snprintf(store, sizeof(store)-1, "%s/%s", p, name);
+               p = store;
+       }
+       return open(store, O_RDONLY);
+}
+
+int net_netstat_open(void)
+{
+       return generic_proc_open("PROC_NET_NETSTAT", "net/netstat");
+}
+
+int net_snmp_open(void)
+{
+       return generic_proc_open("PROC_NET_SNMP", "net/snmp");
+}
+
+int net_snmp6_open(void)
+{
+       return generic_proc_open("PROC_NET_SNMP6", "net/snmp6");
+}
+
+struct nstat_ent
+{
+       struct nstat_ent *next;
+       char             *id;
+       unsigned long long val;
+       unsigned long      ival;
+       double             rate;
+};
+
+struct nstat_ent *kern_db;
+struct nstat_ent *hist_db;
+
+char *useless_numbers[] = {
+"IpForwarding", "IpDefaultTTL",
+"TcpRtoAlgorithm", "TcpRtoMin", "TcpRtoMax",
+"TcpMaxConn", "TcpCurrEstab"
+};
+
+int useless_number(char *id)
+{
+       int i;
+       for (i=0; i<sizeof(useless_numbers)/sizeof(*useless_numbers); i++)
+               if (strcmp(id, useless_numbers[i]) == 0)
+                       return 1;
+       return 0;
+}
+
+int match(char *id)
+{
+       int i;
+
+       if (npatterns == 0)
+               return 1;
+
+       for (i=0; i<npatterns; i++) {
+               if (!fnmatch(patterns[i], id, 0))
+                       return 1;
+       }
+       return 0;
+}
+
+void load_good_table(FILE *fp)
+{
+       char buf[4096];
+       struct nstat_ent *db = NULL;
+       struct nstat_ent *n;
+
+       while (fgets(buf, sizeof(buf), fp) != NULL) {
+               int nr;
+               unsigned long long val;
+               double rate;
+               char idbuf[256];
+               if (buf[0] == '#') {
+                       buf[strlen(buf)-1] = 0;
+                       if (info_source[0] && strcmp(info_source, buf+1))
+                               source_mismatch = 1;
+                       strncpy(info_source, buf+1, sizeof(info_source)-1);
+                       continue;
+               }
+               nr = sscanf(buf, "%s%llu%lg", idbuf, &val, &rate);
+               if (nr < 2)
+                       abort();
+               if (nr < 3)
+                       rate = 0;
+               if (useless_number(idbuf))
+                       continue;
+               if ((n = malloc(sizeof(*n))) == NULL)
+                       abort();
+               n->id = strdup(idbuf);
+               n->ival = (unsigned long)val;
+               n->val = val;
+               n->rate = rate;
+               n->next = db;
+               db = n;
+       }
+
+       while (db) {
+               n = db;
+               db = db->next;
+               n->next = kern_db;
+               kern_db = n;
+       }
+}
+
+
+void load_ugly_table(FILE *fp)
+{
+       char buf[4096];
+       struct nstat_ent *db = NULL;
+       struct nstat_ent *n;
+
+       while (fgets(buf, sizeof(buf), fp) != NULL) {
+               char idbuf[256];
+               int  off;
+               char *p;
+
+               p = strchr(buf, ':');
+               if (!p)
+                       abort();
+               *p = 0;
+               strcpy(idbuf, buf);
+               off = strlen(idbuf);
+               p += 2;
+
+               while (*p) {
+                       char *next;
+                       if ((next = strchr(p, ' ')) != NULL)
+                               *next++ = 0;
+                       else if ((next = strchr(p, '\n')) != NULL)
+                               *next++ = 0;
+                       strcpy(idbuf+off, p);
+                       n = malloc(sizeof(*n));
+                       if (!n)
+                               abort();
+                       n->id = strdup(idbuf);
+                       n->rate = 0;
+                       n->next = db;
+                       db = n;
+                       p = next;
+               }
+               n = db;
+               if (fgets(buf, sizeof(buf), fp) == NULL)
+                       abort();
+               do {
+                       p = strrchr(buf, ' ');
+                       if (!p)
+                               abort();
+                       *p = 0;
+                       if (sscanf(p+1, "%lu", &n->ival) != 1)
+                               abort();
+                       n->val = n->ival;
+                       /* Trick to skip "dummy" trailing ICMP MIB in 2.4 */
+                       if (strcmp(idbuf, "IcmpOutAddrMaskReps") == 0)
+                               idbuf[5] = 0;
+                       else
+                               n = n->next;
+               } while (p > buf + off + 2);
+       }
+
+       while (db) {
+               n = db;
+               db = db->next;
+               if (useless_number(n->id)) {
+                       free(n->id);
+                       free(n);
+               } else {
+                       n->next = kern_db;
+                       kern_db = n;
+               }
+       }
+}
+
+void load_snmp(void)
+{
+       FILE *fp = fdopen(net_snmp_open(), "r");
+       if (fp) {
+               load_ugly_table(fp);
+               fclose(fp);
+       }
+}
+
+void load_snmp6(void)
+{
+       FILE *fp = fdopen(net_snmp6_open(), "r");
+       if (fp) {
+               load_good_table(fp);
+               fclose(fp);
+       }
+}
+
+void load_netstat(void)
+{
+       FILE *fp = fdopen(net_netstat_open(), "r");
+       if (fp) {
+               load_ugly_table(fp);
+               fclose(fp);
+       }
+}
+
+void dump_kern_db(FILE *fp, int to_hist)
+{
+       struct nstat_ent *n, *h;
+       h = hist_db;
+       fprintf(fp, "#%s\n", info_source);
+       for (n=kern_db; n; n=n->next) {
+               unsigned long long val = n->val;
+               if (!dump_zeros && !val && !n->rate)
+                       continue;
+               if (!match(n->id)) {
+                       struct nstat_ent *h1;
+                       if (!to_hist)
+                               continue;
+                       for (h1 = h; h1; h1 = h1->next) {
+                               if (strcmp(h1->id, n->id) == 0) {
+                                       val = h1->val;
+                                       h = h1->next;
+                                       break;
+                               }
+                       }
+               }
+               fprintf(fp, "%-32s%-16llu%6.1f\n", n->id, val, n->rate);
+       }
+}
+
+void dump_incr_db(FILE *fp)
+{
+       struct nstat_ent *n, *h;
+       h = hist_db;
+       fprintf(fp, "#%s\n", info_source);
+       for (n=kern_db; n; n=n->next) {
+               int ovfl = 0;
+               unsigned long long val = n->val;
+               struct nstat_ent *h1;
+               for (h1 = h; h1; h1 = h1->next) {
+                       if (strcmp(h1->id, n->id) == 0) {
+                               if (val < h1->val) {
+                                       ovfl = 1;
+                                       val = h1->val;
+                               }
+                               val -= h1->val;
+                               h = h1->next;
+                               break;
+                       }
+               }
+               if (!dump_zeros && !val && !n->rate)
+                       continue;
+               if (!match(n->id))
+                       continue;
+               fprintf(fp, "%-32s%-16llu%6.1f%s\n", n->id, val,
+                       n->rate, ovfl?" (overflow)":"");
+       }
+}
+
+static int children;
+
+void sigchild(int signo)
+{
+}
+
+void update_db(int interval)
+{
+       struct nstat_ent *n, *h;
+
+       n = kern_db;
+       kern_db = NULL;
+
+       load_netstat();
+       load_snmp6();
+       load_snmp();
+
+       h = kern_db;
+       kern_db = n;
+
+       for (n = kern_db; n; n = n->next) {
+               struct nstat_ent *h1;
+               for (h1 = h; h1; h1 = h1->next) {
+                       if (strcmp(h1->id, n->id) == 0) {
+                               double sample;
+                               unsigned long incr = h1->ival - n->ival;
+                               n->val += incr;
+                               n->ival = h1->ival;
+                               sample = (double)(incr*1000)/interval;
+                               if (interval >= scan_interval) {
+                                       n->rate += W*(sample-n->rate);
+                               } else if (interval >= 1000) {
+                                       if (interval >= time_constant) {
+                                               n->rate = sample;
+                                       } else {
+                                               double w = W*(double)interval/scan_interval;
+                                               n->rate += w*(sample-n->rate);
+                                       }
+                               }
+
+                               while (h != h1) {
+                                       struct nstat_ent *tmp = h;
+                                       h = h->next;
+                                       free(tmp->id);
+                                       free(tmp);
+                               };
+                               h = h1->next;
+                               free(h1->id);
+                               free(h1);
+                               break;
+                       }
+               }
+       }
+}
+
+#define T_DIFF(a,b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000)
+
+
+void server_loop(int fd)
+{
+       struct timeval snaptime;
+       struct pollfd p;
+       p.fd = fd;
+       p.events = p.revents = POLLIN;
+       
+       sprintf(info_source, "%d.%lu sampling_interval=%d time_const=%d",
+               getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
+
+       load_netstat();
+       load_snmp6();
+       load_snmp();
+
+       for (;;) {
+               int status;
+               int tdiff;
+               struct timeval now;
+               gettimeofday(&now, NULL);
+               tdiff = T_DIFF(now, snaptime);
+               if (tdiff >= scan_interval) {
+                       update_db(tdiff);
+                       snaptime = now;
+                       tdiff = 0;
+               }
+               if (poll(&p, 1, tdiff + scan_interval) > 0
+                   && (p.revents&POLLIN)) {
+                       int clnt = accept(fd, NULL, NULL);
+                       if (clnt >= 0) {
+                               pid_t pid;
+                               if (children >= 5) {
+                                       close(clnt);
+                               } else if ((pid = fork()) != 0) {
+                                       if (pid>0)
+                                               children++;
+                                       close(clnt);
+                               } else {
+                                       FILE *fp = fdopen(clnt, "w");
+                                       if (fp) {
+                                               if (tdiff > 0)
+                                                       update_db(tdiff);
+                                               dump_kern_db(fp, 0);
+                                       }
+                                       exit(0);
+                               }
+                       }
+               }
+               while (children && waitpid(-1, &status, WNOHANG) > 0)
+                       children--;
+       }
+}
+
+int verify_forging(int fd)
+{
+       struct ucred cred;
+       int olen = sizeof(cred);
+       if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
+           olen < sizeof(cred))
+               return -1;
+       if (cred.uid == getuid() || cred.uid == 0)
+               return 0;
+       return -1;
+}
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr,
+"Usage: nstat [ -h?vVzrnasd:t: ] [ PATTERN [ PATTERN ] ]\n"
+               );
+       exit(-1);
+}
+
+
+int main(int argc, char *argv[])
+{
+       char hist_name[128];
+       struct sockaddr_un sun;
+       FILE *hist_fp = NULL;
+       int ch;
+       int fd;
+
+       while ((ch = getopt(argc, argv, "h?vVzrnasd:t:")) != EOF) {
+               switch(ch) {
+               case 'z':
+                       dump_zeros = 1;
+                       break;
+               case 'r':
+                       reset_history = 1;
+                       break;
+               case 'a':
+                       ignore_history = 1;
+                       break;
+               case 's':
+                       no_update = 1;
+                       break;
+               case 'n':
+                       no_output = 1;
+                       break;
+               case 'd':
+                       scan_interval = 1000*atoi(optarg);
+                       break;
+               case 't':
+                       if (sscanf(optarg, "%d", &time_constant) != 1 ||
+                           time_constant <= 0) {
+                               fprintf(stderr, "nstat: invalid time constant divisor\n");
+                               exit(-1);
+                       }
+                       break;
+               case 'v':
+               case 'V':
+                       printf("nstat utility, iproute2-ss%s\n", SNAPSHOT);
+                       exit(0);
+               case 'h':
+               case '?':
+               default:
+                       usage();
+               }
+       }
+
+       argc -= optind;
+       argv += optind;
+
+       sun.sun_family = AF_UNIX;
+       sun.sun_path[0] = 0;
+       sprintf(sun.sun_path+1, "nstat%d", getuid());
+
+       if (scan_interval > 0) {
+               if (time_constant == 0)
+                       time_constant = 60;
+               time_constant *= 1000;
+               W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant);
+               if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
+                       perror("nstat: socket");
+                       exit(-1);
+               }
+               if (bind(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) < 0) {
+                       perror("nstat: bind");
+                       exit(-1);
+               }
+               if (listen(fd, 5) < 0) {
+                       perror("nstat: listen");
+                       exit(-1);
+               }
+               if (fork())
+                       exit(0);
+               chdir("/");
+               close(0); close(1); close(2); setsid();
+               signal(SIGPIPE, SIG_IGN);
+               signal(SIGCHLD, sigchild);
+               server_loop(fd);
+               exit(0);
+       }
+
+       patterns = argv;
+       npatterns = argc;
+
+       if (getenv("NSTAT_HISTORY"))
+               snprintf(hist_name, sizeof(hist_name), getenv("NSTAT_HISTORY"));
+       else
+               sprintf(hist_name, "/tmp/.nstat.u%d", getuid());
+
+       if (reset_history)
+               unlink(hist_name);
+
+       if (!ignore_history || !no_update) {
+               struct stat stb;
+
+               fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600);
+               if (fd < 0) {
+                       perror("nstat: open history file");
+                       exit(-1);
+               }
+               if ((hist_fp = fdopen(fd, "r+")) == NULL) {
+                       perror("nstat: fdopen history file");
+                       exit(-1);
+               }
+               if (flock(fileno(hist_fp), LOCK_EX)) {
+                       perror("nstat: flock history file");
+                       exit(-1);
+               }
+               if (fstat(fileno(hist_fp), &stb) != 0) {
+                       perror("nstat: fstat history file");
+                       exit(-1);
+               }
+               if (stb.st_nlink != 1 || stb.st_uid != getuid()) {
+                       fprintf(stderr, "nstat: something is so wrong with history file, that I prefer not to proceed.\n");
+                       exit(-1);
+               }
+               if (!ignore_history) {
+                       FILE *tfp;
+                       long uptime;
+                       if ((tfp = fopen("/proc/uptime", "r")) != NULL) {
+                               if (fscanf(tfp, "%ld", &uptime) != 1)
+                                       uptime = -1;
+                               fclose(tfp);
+                       }
+                       if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) {
+                               fprintf(stderr, "nstat: history is aged out, resetting\n");
+                               ftruncate(fileno(hist_fp), 0);
+                       }
+               }
+
+               load_good_table(hist_fp);
+
+               hist_db = kern_db;
+               kern_db = NULL;
+       }
+
+       if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 &&
+           (connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0
+            || (strcpy(sun.sun_path+1, "nstat0"),
+                connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0))
+           && verify_forging(fd) == 0) {
+               FILE *sfp = fdopen(fd, "r");
+               load_good_table(sfp);
+               if (hist_db && source_mismatch) {
+                       fprintf(stderr, "nstat: history is stale, ignoring it.\n");
+                       hist_db = NULL;
+               }
+               fclose(sfp);
+       } else {
+               if (fd >= 0)
+                       close(fd);
+               if (hist_db && info_source[0] && strcmp(info_source, "kernel")) {
+                       fprintf(stderr, "nstat: history is stale, ignoring it.\n");
+                       hist_db = NULL;
+                       info_source[0] = 0;
+               }
+               load_netstat();
+               load_snmp6();
+               load_snmp();
+               if (info_source[0] == 0)
+                       strcpy(info_source, "kernel");
+       }
+
+       if (!no_output) {
+               if (ignore_history || hist_db == NULL)
+                       dump_kern_db(stdout, 0);
+               else
+                       dump_incr_db(stdout);
+       }
+       if (!no_update) {
+               ftruncate(fileno(hist_fp), 0);
+               rewind(hist_fp);
+               dump_kern_db(hist_fp, 1);
+               fflush(hist_fp);
+       }
+       exit(0);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..5c6748b9cdfddf72b3bfad7908899d275bcdea64 100644 (file)
@@ -0,0 +1,625 @@
+/*
+ * rtacct.c            Applet to display contents of /proc/net/rt_acct.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <fnmatch.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/poll.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <math.h>
+
+#include "rt_names.h"
+
+#include <SNAPSHOT.h>
+
+int reset_history = 0;
+int ignore_history = 0;
+int no_output = 0;
+int no_update = 0;
+int scan_interval = 0;
+int time_constant = 0;
+int dump_zeros = 0;
+unsigned long magic_number = 0;
+double W;
+
+int generic_proc_open(char *env, char *name)
+{
+       char store[1024];
+       char *p = getenv(env);
+       if (!p) {
+               p = getenv("PROC_ROOT") ? : "/proc";
+               snprintf(store, sizeof(store)-1, "%s/%s", p, name);
+               p = store;
+       }
+       return open(store, O_RDONLY);
+}
+
+int net_rtacct_open(void)
+{
+       return generic_proc_open("PROC_NET_RTACCT", "net/rt_acct");
+}
+
+__u32 rmap[256/4];
+
+struct rtacct_data
+{
+       __u32                   ival[256*4];
+
+       unsigned long long      val[256*4];
+       double                  rate[256*4];
+       __u8                    signature[128];
+};
+
+struct rtacct_data kern_db_static;
+
+struct rtacct_data *kern_db = &kern_db_static;
+struct rtacct_data *hist_db;
+
+void nread(int fd, char *buf, int tot)
+{
+       int count = 0;
+
+       while (count < tot) {
+               int n = read(fd, buf+count, tot-count);
+               if (n < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       exit(-1);
+               }
+               if (n == 0)
+                       exit(-1);
+               count += n;
+       }
+}
+
+
+__u32 *read_kern_table(__u32 *tbl)
+{
+       static __u32 *tbl_ptr;
+       int fd;
+
+       if (magic_number) {
+               if (tbl_ptr != NULL)
+                       return tbl_ptr;
+
+               fd = open("/dev/mem", O_RDONLY);
+               if (fd < 0) {
+                       perror("magic open");
+                       exit(-1);
+               }
+               tbl_ptr = mmap(NULL, 4096,
+                              PROT_READ,
+                              MAP_SHARED,
+                              fd, magic_number);
+               if ((unsigned long)tbl_ptr == ~0UL) {
+                       perror("magic mmap");
+                       exit(-1);
+               }
+               close(fd);
+               return tbl_ptr;
+       }
+
+       fd = net_rtacct_open();
+       if (fd >= 0) {
+               nread(fd, (char*)tbl, 256*16);
+               close(fd);
+       } else {
+               memset(tbl, 0, 256*16);
+       }
+       return tbl;
+}
+
+void format_rate(FILE *fp, double rate)
+{
+       char temp[64];
+
+       if (rate > 1024*1024) {
+               sprintf(temp, "%uM", (unsigned)rint(rate/(1024*1024)));
+               fprintf(fp, " %-10s", temp);
+       } else if (rate > 1024) {
+               sprintf(temp, "%uK", (unsigned)rint(rate/1024));
+               fprintf(fp, " %-10s", temp);
+       } else
+               fprintf(fp, " %-10u", (unsigned)rate);
+}
+
+void format_count(FILE *fp, unsigned long long val)
+{
+       if (val > 1024*1024*1024)
+               fprintf(fp, " %10lluM", val/(1024*1024));
+       else if (val > 1024*1024)
+               fprintf(fp, " %10lluK", val/1024);
+       else
+               fprintf(fp, " %10llu", val);
+}
+
+void dump_abs_db(FILE *fp)
+{
+       int realm;
+       char b1[16];
+
+       if (!no_output) {
+               fprintf(fp, "#%s\n", kern_db->signature);
+               fprintf(fp,
+"%-10s "
+"%-10s "
+"%-10s "
+"%-10s "
+"%-10s "
+"\n"
+                      , "Realm", "BytesTo", "PktsTo", "BytesFrom", "PktsFrom"); 
+               fprintf(fp,
+"%-10s "
+"%-10s "
+"%-10s "
+"%-10s "
+"%-10s "
+"\n"
+                      , "", "BPSTo", "PPSTo", "BPSFrom", "PPSFrom"); 
+
+       }
+
+       for (realm=0; realm<256; realm++) {
+               int i;
+               unsigned long long *val;
+               double             *rate;
+
+               if (!(rmap[realm>>5] & (1<<(realm&0x1f))))
+                       continue;
+
+               val = &kern_db->val[realm*4];
+               rate = &kern_db->rate[realm*4];
+
+               if (!dump_zeros &&
+                   !val[0] && !rate[0] &&
+                   !val[1] && !rate[1] &&
+                   !val[2] && !rate[2] &&
+                   !val[3] && !rate[3])
+                       continue;
+
+               if (hist_db) {
+                       memcpy(&hist_db->val[realm*4], val, sizeof(*val)*4);
+               }
+
+               if (no_output)
+                       continue;
+
+               fprintf(fp, "%-10s", rtnl_rtrealm_n2a(realm, b1, sizeof(b1)));
+               for (i = 0; i < 4; i++)
+                       format_count(fp, val[i]); 
+               fprintf(fp, "\n%-10s", "");
+               for (i = 0; i < 4; i++)
+                       format_rate(fp, rate[i]); 
+               fprintf(fp, "\n");
+       }
+}
+
+
+void dump_incr_db(FILE *fp)
+{
+       int k, realm;
+       char b1[16];
+
+       if (!no_output) {
+               fprintf(fp, "#%s\n", kern_db->signature);
+               fprintf(fp,
+"%-10s "
+"%-10s "
+"%-10s "
+"%-10s "
+"%-10s "
+"\n"
+                      , "Realm", "BytesTo", "PktsTo", "BytesFrom", "PktsFrom"); 
+               fprintf(fp,
+"%-10s "
+"%-10s "
+"%-10s "
+"%-10s "
+"%-10s "
+"\n"
+                      , "", "BPSTo", "PPSTo", "BPSFrom", "PPSFrom"); 
+       }
+
+       for (realm=0; realm<256; realm++) {
+               int ovfl = 0;
+               int i;
+               unsigned long long *val;
+               double             *rate;
+               unsigned long long rval[4];
+
+               if (!(rmap[realm>>5] & (1<<(realm&0x1f))))
+                       continue;
+
+               val = &kern_db->val[realm*4];
+               rate = &kern_db->rate[realm*4];
+
+               for (k=0; k<4; k++) {
+                       rval[k] = val[k];
+                       if (rval[k] < hist_db->val[realm*4+k])
+                               ovfl = 1;
+                       else
+                               rval[k] -= hist_db->val[realm*4+k];
+               }
+               if (ovfl) {
+                       for (k=0; k<4; k++)
+                               rval[k] = val[k];
+               }
+               if (hist_db) {
+                       memcpy(&hist_db->val[realm*4], val, sizeof(*val)*4);
+               }
+
+               if (no_output)
+                       continue;
+
+               if (!dump_zeros &&
+                   !rval[0] && !rate[0] &&
+                   !rval[1] && !rate[1] &&
+                   !rval[2] && !rate[2] &&
+                   !rval[3] && !rate[3])
+                       continue;
+
+
+               fprintf(fp, "%-10s", rtnl_rtrealm_n2a(realm, b1, sizeof(b1)));
+               for (i = 0; i < 4; i++)
+                       format_count(fp, rval[i]); 
+               fprintf(fp, "\n%-10s", "");
+               for (i = 0; i < 4; i++)
+                       format_rate(fp, rate[i]); 
+               fprintf(fp, "\n");
+       }
+}
+
+
+static int children;
+
+void sigchild(int signo)
+{
+}
+
+/* Server side only: read kernel data, update tables, calculate rates. */ 
+
+void update_db(int interval)
+{
+       int i;
+       __u32 *ival;
+       __u32 _ival[256*4];
+
+       ival = read_kern_table(_ival);
+
+       for (i=0; i<256*4; i++) {
+               double sample;
+               __u32 incr = ival[i] - kern_db->ival[i];
+
+               if (ival[i] == 0 && incr == 0 &&
+                   kern_db->val[i] == 0 && kern_db->rate[i] == 0)
+                       continue;
+
+               kern_db->val[i] += incr;
+               kern_db->ival[i] = ival[i];
+               sample = (double)(incr*1000)/interval;
+               if (interval >= scan_interval) {
+                       kern_db->rate[i] += W*(sample-kern_db->rate[i]);
+               } else if (interval >= 1000) {
+                       if (interval >= time_constant) {
+                               kern_db->rate[i] = sample;
+                       } else {
+                               double w = W*(double)interval/scan_interval;
+                               kern_db->rate[i] += w*(sample-kern_db->rate[i]);
+                       }
+               }
+       }
+}
+
+void send_db(int fd)
+{
+       int tot = 0;
+
+       while (tot < sizeof(*kern_db)) {
+               int n = write(fd, ((char*)kern_db) + tot, sizeof(*kern_db)-tot);
+               if (n < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       return;
+               }
+               tot += n;
+       }
+}
+
+
+
+#define T_DIFF(a,b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000)
+
+
+void pad_kern_table(struct rtacct_data *dat, __u32 *ival)
+{
+       int i;
+       memset(dat->rate, 0, sizeof(dat->rate));
+       if (dat->ival != ival)
+               memcpy(dat->ival, ival, sizeof(dat->ival));
+       for (i=0; i<256*4; i++)
+               dat->val[i] = ival[i];
+}
+
+void server_loop(int fd)
+{
+       struct timeval snaptime;
+       struct pollfd p;
+       p.fd = fd;
+       p.events = p.revents = POLLIN;
+
+       sprintf(kern_db->signature, "%d.%lu sampling_interval=%d time_const=%d",
+               getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
+
+       pad_kern_table(kern_db, read_kern_table(kern_db->ival));
+
+       for (;;) {
+               int status;
+               int tdiff;
+               struct timeval now;
+               gettimeofday(&now, NULL);
+               tdiff = T_DIFF(now, snaptime);
+               if (tdiff >= scan_interval) {
+                       update_db(tdiff);
+                       snaptime = now;
+                       tdiff = 0;
+               }
+               if (poll(&p, 1, tdiff + scan_interval) > 0
+                   && (p.revents&POLLIN)) {
+                       int clnt = accept(fd, NULL, NULL);
+                       if (clnt >= 0) {
+                               pid_t pid;
+                               if (children >= 5) {
+                                       close(clnt);
+                               } else if ((pid = fork()) != 0) {
+                                       if (pid>0)
+                                               children++;
+                                       close(clnt);
+                               } else {
+                                       if (tdiff > 0)
+                                               update_db(tdiff);
+                                       send_db(clnt);
+                                       exit(0);
+                               }
+                       }
+               }
+               while (children && waitpid(-1, &status, WNOHANG) > 0)
+                       children--;
+       }
+}
+
+int verify_forging(int fd)
+{
+       struct ucred cred;
+       int olen = sizeof(cred);
+       if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
+           olen < sizeof(cred))
+               return -1;
+       if (cred.uid == getuid() || cred.uid == 0)
+               return 0;
+       return -1;
+}
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr,
+"Usage: rtacct [ -h?vVzrnasd:t: ] [ ListOfRealms ]\n"
+               );
+       exit(-1);
+}
+
+int main(int argc, char *argv[])
+{
+       char hist_name[128];
+       struct sockaddr_un sun;
+       int ch;
+       int fd;
+
+       while ((ch = getopt(argc, argv, "h?vVzrM:nasd:t:")) != EOF) {
+               switch(ch) {
+               case 'z':
+                       dump_zeros = 1;
+                       break;
+               case 'r':
+                       reset_history = 1;
+                       break;
+               case 'a':
+                       ignore_history = 1;
+                       break;
+               case 's':
+                       no_update = 1;
+                       break;
+               case 'n':
+                       no_output = 1;
+                       break;
+               case 'd':
+                       scan_interval = 1000*atoi(optarg);
+                       break;
+               case 't':
+                       if (sscanf(optarg, "%d", &time_constant) != 1 ||
+                           time_constant <= 0) {
+                               fprintf(stderr, "rtacct: invalid time constant divisor\n");
+                               exit(-1);
+                       }
+                       break;
+               case 'v':
+               case 'V':
+                       printf("rtacct utility, iproute2-ss%s\n", SNAPSHOT);
+                       exit(0);
+               case 'M':
+                       /* Some secret undocumented option, nobody
+                        * is expected to ask about its sense. See?
+                        */
+                       sscanf(optarg, "%lx", &magic_number);
+                       break;
+               case 'h':
+               case '?':
+               default:
+                       usage();
+               }
+       }
+
+       argc -= optind;
+       argv += optind;
+
+       if (argc) {
+               while (argc > 0) {
+                       __u32 realm;
+                       if (rtnl_rtrealm_a2n(&realm, argv[0])) {
+                               fprintf(stderr, "Warning: realm \"%s\" does not exist.\n", argv[0]);
+                               exit(-1);
+                       }
+                       rmap[realm>>5] |= (1<<(realm&0x1f));
+                       argc--; argv++;
+               }
+       } else {
+               memset(rmap, ~0, sizeof(rmap));
+               /* Always suppress zeros. */
+               dump_zeros = 0;
+       }
+
+       sun.sun_family = AF_UNIX;
+       sun.sun_path[0] = 0;
+       sprintf(sun.sun_path+1, "rtacct%d", getuid());
+
+       if (scan_interval > 0) {
+               if (time_constant == 0)
+                       time_constant = 60;
+               time_constant *= 1000;
+               W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant);
+               if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
+                       perror("rtacct: socket");
+                       exit(-1);
+               }
+               if (bind(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) < 0) {
+                       perror("rtacct: bind");
+                       exit(-1);
+               }
+               if (listen(fd, 5) < 0) {
+                       perror("rtacct: listen");
+                       exit(-1);
+               }
+               if (fork())
+                       exit(0);
+               chdir("/");
+               close(0); close(1); close(2); setsid();
+               signal(SIGPIPE, SIG_IGN);
+               signal(SIGCHLD, sigchild);
+               server_loop(fd);
+               exit(0);
+       }
+
+       if (getenv("RTACCT_HISTORY"))
+               snprintf(hist_name, sizeof(hist_name), getenv("RTACCT_HISTORY"));
+       else
+               sprintf(hist_name, "/tmp/.rtacct.u%d", getuid());
+
+       if (reset_history)
+               unlink(hist_name);
+
+       if (!ignore_history || !no_update) {
+               struct stat stb;
+
+               fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600);
+               if (fd < 0) {
+                       perror("rtacct: open history file");
+                       exit(-1);
+               }
+               if (flock(fd, LOCK_EX)) {
+                       perror("rtacct: flock history file");
+                       exit(-1);
+               }
+               if (fstat(fd, &stb) != 0) {
+                       perror("rtacct: fstat history file");
+                       exit(-1);
+               }
+               if (stb.st_nlink != 1 || stb.st_uid != getuid()) {
+                       fprintf(stderr, "rtacct: something is so wrong with history file, that I prefer not to proceed.\n");
+                       exit(-1);
+               }
+               if (stb.st_size != sizeof(*hist_db))
+                       write(fd, kern_db, sizeof(*hist_db));
+
+               hist_db = mmap(NULL, sizeof(*hist_db),
+                              PROT_READ|PROT_WRITE,
+                              no_update ? MAP_PRIVATE : MAP_SHARED,
+                              fd, 0);
+
+               if ((unsigned long)hist_db == ~0UL) {
+                       perror("mmap");
+                       exit(-1);
+               }
+
+               if (!ignore_history) {
+                       FILE *tfp;
+                       long uptime;
+                       if ((tfp = fopen("/proc/uptime", "r")) != NULL) {
+                               if (fscanf(tfp, "%ld", &uptime) != 1)
+                                       uptime = -1;
+                               fclose(tfp);
+                       }
+
+                       if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) {
+                               fprintf(stderr, "rtacct: history is aged out, resetting\n");
+                               memset(hist_db, 0, sizeof(*hist_db));
+                       }
+               }
+
+               close(fd);
+       }
+
+       if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 &&
+           (connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0
+            || (strcpy(sun.sun_path+1, "rtacct0"),
+                connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0))
+           && verify_forging(fd) == 0) {
+               nread(fd, (char*)kern_db, sizeof(*kern_db));
+               if (hist_db && hist_db->signature[0] &&
+                   strcmp(kern_db->signature, hist_db->signature)) {
+                       fprintf(stderr, "rtacct: history is stale, ignoring it.\n");
+                       hist_db = NULL;
+               }
+               close(fd);
+       } else {
+               if (fd >= 0)
+                       close(fd);
+
+               if (hist_db && hist_db->signature[0] &&
+                   strcmp(hist_db->signature, "kernel")) {
+                       fprintf(stderr, "rtacct: history is stale, ignoring it.\n");
+                       hist_db = NULL;
+               }
+
+               pad_kern_table(kern_db, read_kern_table(kern_db->ival));
+               strcpy(kern_db->signature, "kernel");
+       }
+
+       if (ignore_history || hist_db == NULL)
+               dump_abs_db(stdout);
+       else
+               dump_incr_db(stdout);
+
+       exit(0);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..feed6cf2fe312f7042728334e6639868a9753443 100644 (file)
@@ -0,0 +1,172 @@
+/* rtstat.c:  A program for route cache monitoring
+ *
+ * Copyright 2001 by Robert Olsson <robert.olsson@its.uu.se>
+ *                                 Uppsala University, Sweden
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Additional credits:
+ * Martin Josefsson <gandalf@wlug.westbo.se> 010828 bug fix
+ * 
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <getopt.h>
+
+#define VERSION "0.33 010829"
+
+extern char *optarg;
+extern int optind, opterr, optopt;
+
+FILE *fp;
+unsigned rt_size, in_hit[2], in_slow_tot[2], in_slow_mc[2], 
+  in_no_rt[2], in_brd[2], in_martian_dst[2], in_martian_src[2],
+  out_hit[2], out_slow_tot[2], out_slow_mc[2];
+
+
+/* Read (and summarize for SMP) the different stats vars. */
+
+void scan_line(int i)
+{
+       unsigned temp[10];
+
+       in_hit[i] = 0;
+       in_slow_tot[i] = 0;
+       in_slow_mc[i] = 0;
+       in_no_rt[i] = 0;
+       in_brd[i] = 0;
+       in_martian_dst[i] = 0;
+       in_martian_src[i] = 0;
+       out_hit[i] = 0;
+       out_slow_tot[i] = 0;
+       out_slow_mc[i] = 0;
+
+       while(!feof(fp)) {
+               fscanf(fp, "%x %x %x %x %x %x %x %x %x %x %x\n", 
+                      &rt_size,
+                      &temp[0],     /* in_hit */
+                      &temp[1],     /* in_slow_tot */
+                      &temp[2],     /* in_slow_mc */
+                      &temp[3],     /* in_no_rt */
+                      &temp[4],     /* in_brd */
+                      &temp[5],     /* in_martian_dst */
+                      &temp[6],     /* in_martian_src */
+                      &temp[7],     /* out_hit */
+                      &temp[8],     /* out_slow_tot */
+                      &temp[9]      /* out_slow_mc */
+                      );
+
+               in_hit[i] += temp[0];
+               in_slow_tot[i] += temp[1];
+               in_slow_mc[i] += temp[2];
+               in_no_rt[i] += temp[3];
+               in_brd[i] += temp[4];
+               in_martian_dst[i] += temp[5];
+               in_martian_src[i] += temp[6];
+               out_hit[i] += temp[7];
+               out_slow_tot[i] += temp[8];
+               out_slow_mc[i] += temp[9];
+       }
+       return;
+}
+
+void print_hdr_line(void)
+{              
+       printf(" size   IN: hit     tot    mc no_rt bcast madst masrc  OUT: hit     tot     mc\n");
+}
+
+int usage(int exit_code)
+{
+       fprintf(stderr, "rtstat        Version %s\n", VERSION);
+       fprintf(stderr, "              -help\n");
+       fprintf(stderr, "              -i interval\n");
+       fprintf(stderr, "              -s subject [0-2]\n");    
+       fprintf(stderr, "\n");  
+       print_hdr_line();
+       fprintf(stderr, "\n");  
+       fprintf(stderr, "size   == route cache size\n");        
+       fprintf(stderr, "hit    == IN: total number of cache hits per sec\n");  
+       fprintf(stderr, "tot    == IN: total number of cache misses per sec\n");
+       fprintf(stderr, "mc     == IN: mulicast cache misses per sec\n");
+       fprintf(stderr, "no_rt  == IN: route table misses per sec\n");
+       fprintf(stderr, "bcast  == IN: broadcast cache misses per sec\n");
+       fprintf(stderr, "madst  == IN: dst martians per sec\n");
+       fprintf(stderr, "masrc  == IN: src martians per sec\n");
+
+       fprintf(stderr, "hit    == OUT: total number of cache hits per sec\n"); 
+       fprintf(stderr, "tot    == OUT: total number of cache misses per sec\n");
+       fprintf(stderr, "mc     == OUT: mulicast cache misses per sec\n");
+
+       exit(exit_code);
+}
+
+int main(int argc, char **argv)
+{
+       int c, i=1, interval=2, hdr=2;
+  
+       while ((c=getopt(argc, argv,"h?s:i:")) != EOF)
+               switch (c)
+               {
+
+               case '?':
+               case 'h':       usage(0);
+       
+               case 'i':      sscanf(optarg, "%u", &interval);
+                       break;
+       
+               case 's':      sscanf(optarg, "%u", &hdr);
+                       break;
+       
+               default:        usage(1);
+               }
+
+       if(interval < 1 ) interval=1;
+
+       if ((fp = fopen("/proc/net/rt_cache_stat", "r")));
+       else 
+       {
+               perror("fopen");
+               exit(-1);
+       }
+
+       if(hdr > 0) print_hdr_line();
+
+       for(;1;i++) {
+
+               if(hdr > 1 && (!  (i % 20)))  print_hdr_line();
+               
+               scan_line(0);
+               sleep(interval);
+               rewind(fp);
+               scan_line(1);
+               rewind(fp);                     
+                 
+               printf("%5u %9u %7u %5u %5u %5u %5u %5u %9u %7u %6u\n",
+                      rt_size,
+                      (in_hit[1] - in_hit[0])/interval,
+                      (in_slow_tot[1] - in_slow_tot[0])/interval,
+                      (in_slow_mc[1] - in_slow_mc[0])/interval,
+                      (in_no_rt[1] - in_no_rt[0])/interval,
+                      (in_brd[1] - in_brd[0])/interval,
+                      (in_martian_dst[1] - in_martian_dst[0])/interval,
+                      (in_martian_src[1] - in_martian_src[0])/interval,
+
+                      (out_hit[1] - out_hit[0])/interval,
+                      (out_slow_tot[1] - out_slow_tot[0])/interval,
+                      (out_slow_mc[1] - out_slow_mc[0])/interval
+                       );
+       }
+       return 1;
+}
+
+/*
+ * Compile: 
+  gcc -g -O2 -Wall -o rtstat  rtstat.c
+*/
+
+
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3918bdef9bb38ad77debac9ff4f7e360fb3e648c 100644 (file)
--- a/misc/ss.c
+++ b/misc/ss.c
+/*
+ * ss.c                "sockstat", socket statistics
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <netinet/in.h>
+#include <string.h>
+#include <errno.h>
+#include <netdb.h>
+#include <arpa/inet.h>
+#include <resolv.h>
+#include <dirent.h>
+#include <fnmatch.h>
+
+#include "utils.h"
+#include "rt_names.h"
+#include "ll_map.h"
+#include "libnetlink.h"
+#include "tcp_diag.h"
+#include "SNAPSHOT.h"
+
+#include <linux/tcp.h>
+
+int resolve_hosts = 0;
+int resolve_services = 1;
+int preferred_family = AF_UNSPEC;
+int show_options = 0;
+int show_details = 0;
+int show_users = 0;
+int show_mem = 0;
+int show_tcpinfo = 0;
+
+int netid_width;
+int state_width;
+int addrp_width;
+int addr_width;
+int serv_width;
+int screen_width;
+
+static const char *TCP_PROTO = "tcp";
+static const char *UDP_PROTO = "udp";
+static const char *RAW_PROTO = "raw";
+static const char *dg_proto = NULL;
+
+enum
+{
+       TCP_DB,
+       UDP_DB,
+       RAW_DB,
+       UNIX_DG_DB,
+       UNIX_ST_DB,
+       PACKET_DG_DB,
+       PACKET_R_DB,
+       NETLINK_DB,
+       MAX_DB
+};
+
+#define PACKET_DBM ((1<<PACKET_DG_DB)|(1<<PACKET_R_DB))
+#define UNIX_DBM ((1<<UNIX_DG_DB)|(1<<UNIX_ST_DB))
+#define ALL_DB ((1<<MAX_DB)-1)
+
+enum {
+  SS_UNKNOWN,
+  SS_ESTABLISHED,
+  SS_SYN_SENT,
+  SS_SYN_RECV,
+  SS_FIN_WAIT1,
+  SS_FIN_WAIT2,
+  SS_TIME_WAIT,
+  SS_CLOSE,
+  SS_CLOSE_WAIT,
+  SS_LAST_ACK,
+  SS_LISTEN,
+  SS_CLOSING,
+  SS_MAX
+};
+
+#define SS_ALL ((1<<SS_MAX)-1)
+
+#include "ssfilter.h"
+
+struct filter
+{
+       int dbs;
+       int states;
+       int families;
+       struct ssfilter *f;
+};
+
+struct filter default_filter = {
+       dbs: (1<<TCP_DB),
+       states: SS_ALL & ~((1<<SS_LISTEN)|(1<<SS_CLOSE)|(1<<SS_TIME_WAIT)|(1<<SS_SYN_RECV)),
+       families: (1<<AF_INET)|(1<<AF_INET6),
+};
+
+struct filter current_filter;
+
+int generic_proc_open(char *env, char *name)
+{
+       char store[128];
+       char *p = getenv(env);
+       if (!p) {
+               p = getenv("PROC_ROOT") ? : "/proc";
+               snprintf(store, sizeof(store)-1, "%s/%s", p, name);
+               p = store;
+       }
+       return open(store, O_RDONLY);
+}
+
+int net_tcp_open(void)
+{
+       return generic_proc_open("PROC_NET_TCP", "net/tcp");
+}
+
+int net_tcp6_open(void)
+{
+       return generic_proc_open("PROC_NET_TCP6", "net/tcp6");
+}
+
+int net_udp_open(void)
+{
+       return generic_proc_open("PROC_NET_UDP", "net/udp");
+}
+
+int net_udp6_open(void)
+{
+       return generic_proc_open("PROC_NET_UDP6", "net/udp6");
+}
+
+int net_raw_open(void)
+{
+       return generic_proc_open("PROC_NET_RAW", "net/raw");
+}
+
+int net_raw6_open(void)
+{
+       return generic_proc_open("PROC_NET_RAW6", "net/raw6");
+}
+
+int net_unix_open(void)
+{
+       return generic_proc_open("PROC_NET_UNIX", "net/unix");
+}
+
+int net_packet_open(void)
+{
+       return generic_proc_open("PROC_NET_PACKET", "net/packet");
+}
+
+int net_netlink_open(void)
+{
+       return generic_proc_open("PROC_NET_NETLINK", "net/netlink");
+}
+
+int slabinfo_open(void)
+{
+       return generic_proc_open("PROC_SLABINFO", "slabinfo");
+}
+
+int net_sockstat_open(void)
+{
+       return generic_proc_open("PROC_NET_SOCKSTAT", "net/sockstat");
+}
+
+int net_sockstat6_open(void)
+{
+       return generic_proc_open("PROC_NET_SOCKSTAT6", "net/sockstat6");
+}
+
+int net_snmp_open(void)
+{
+       return generic_proc_open("PROC_NET_SNMP", "net/snmp");
+}
+
+int net_netstat_open(void)
+{
+       return generic_proc_open("PROC_NET_NETSTAT", "net/netstat");
+}
+
+int ephemeral_ports_open(void)
+{
+       return generic_proc_open("PROC_IP_LOCAL_PORT_RANGE", "sys/net/ipv4/ip_local_port_range");
+}
+
+int find_users(int ino, char *buf, int buflen)
+{
+       char pattern[64];
+       int  pattern_len;
+       char *ptr = buf;
+       char name[1024];
+       DIR *dir;
+       struct dirent *d;
+       int cnt = 0;
+       int nameoff;
+
+       if (!ino)
+               return 0;
+
+       sprintf(pattern, "socket:[%d]", ino);
+       pattern_len = strlen(pattern);
+
+       strncpy(name, getenv("PROC_ROOT") ? : "/proc/", sizeof(name)/2);
+       name[sizeof(name)/2] = 0;
+       if (strlen(name) == 0 ||
+           name[strlen(name)-1] != '/')
+               strcat(name, "/");
+       nameoff = strlen(name);
+       if ((dir = opendir(name)) == NULL)
+               return 0;
+
+       while ((d = readdir(dir)) != NULL) {
+               DIR *dir1;
+               struct dirent *d1;
+               int pid;
+               int pos;
+               char crap;
+               char process[16];
+
+               if (sscanf(d->d_name, "%d%c", &pid, &crap) != 1)
+                       continue;
+
+               sprintf(name+nameoff, "%d/fd/", pid);
+               pos = strlen(name);
+               if ((dir1 = opendir(name)) == NULL)
+                       continue;
+
+               process[0] = 0;
+
+               while ((d1 = readdir(dir1)) != NULL) {
+                       int fd, n;
+                       char lnk[64];
+
+                       if (sscanf(d1->d_name, "%d%c", &fd, &crap) != 1)
+                               continue;
+
+                       sprintf(name+pos, "%d", fd);
+                       n = readlink(name, lnk, sizeof(lnk)-1);
+                       if (n != pattern_len ||
+                           memcmp(lnk, pattern, n))
+                               continue;
+
+                       if (ptr-buf >= buflen-1)
+                               break;
+
+                       if (process[0] == 0) {
+                               char tmp[1024];
+                               FILE *fp;
+                               snprintf(tmp, sizeof(tmp), "%s/%d/stat",
+                                        getenv("PROC_ROOT") ? : "/proc", pid);
+                               if ((fp = fopen(tmp, "r")) != NULL) {
+                                       fscanf(fp, "%*d (%[^)])", process);
+                                       fclose(fp);
+                               }
+                       }
+
+                       snprintf(ptr, buflen-(ptr-buf), "(\"%s\",%d,%d),", process, pid, fd);
+                       ptr += strlen(ptr);
+                       cnt++;
+               }
+               closedir(dir1);
+       }
+       closedir(dir);
+       if (ptr != buf)
+               ptr[-1] = 0;
+       return cnt;
+}
+
+
+/* Get stats from slab */
+
+struct slabstat
+{
+       int socks;
+       int tcp_ports;
+       int tcp_tws;
+       int tcp_syns;
+       int skbs;
+};
+
+struct slabstat slabstat;
+
+const char *slabstat_ids[] = 
+{
+       "sock",
+       "tcp_bind_bucket",
+       "tcp_tw_bucket",
+       "tcp_open_request",
+       "skbuff_head_cache",
+};
+
+int get_slabstat(struct slabstat *s)
+{
+       char buf[256];
+       FILE *fp;
+       int cnt;
+
+       memset(s, 0, sizeof(*s));
+
+       if ((fp = fdopen(slabinfo_open(), "r")) == NULL)
+               return -1;
+
+       cnt = sizeof(*s)/sizeof(int);
+
+       fgets(buf, sizeof(buf), fp);
+       while(fgets(buf, sizeof(buf), fp) != NULL) {
+               int i;
+               for (i=0; i<sizeof(slabstat_ids)/sizeof(slabstat_ids[0]); i++) {
+                       if (memcmp(buf, slabstat_ids[i], strlen(slabstat_ids[i])) == 0) {
+                               sscanf(buf, "%*s%d", ((int *)s) + i);
+                               cnt--;
+                               break;
+                       }
+               }
+               if (cnt <= 0)
+                       break;
+       }
+
+       fclose(fp);
+       return 0;
+}
+
+
+
+
+char *sstate_name[] = {
+  "UNKNOWN",
+  "ESTAB",
+  "SYN-SENT",
+  "SYN-RECV",
+  "FIN-WAIT-1",
+  "FIN-WAIT-2",
+  "TIME-WAIT",
+  "UNCONN",
+  "CLOSE-WAIT",
+  "LAST-ACK",
+  "LISTEN",
+  "CLOSING",
+};
+
+char *sstate_namel[] = {
+  "UNKNOWN",
+  "established",
+  "syn-sent",
+  "syn-recv",
+  "fin-wait-1",
+  "fin-wait-2",
+  "time-wait",
+  "unconnected",
+  "close-wait",
+  "last-ack",
+  "listening",
+  "closing",
+};
+
+struct tcpstat
+{
+       inet_prefix     local;
+       inet_prefix     remote;
+       int             lport;
+       int             rport;
+       int             state;
+       int             rq, wq;
+       int             timer;
+       int             timeout;
+       int             retrs;
+       int             ino;
+       int             probes;
+       int             uid;
+       int             refcnt;
+       unsigned long long sk;
+       int             rto, ato, qack, cwnd, ssthresh;
+};
+
+char *tmr_name[] = {
+       "off",
+       "on",
+       "keepalive",
+       "timewait",
+       "persist",
+       "unknown"
+};
+
+char *print_ms_timer(int timeout)
+{
+       static char buf[64];
+       int secs, msecs, minutes;
+       if (timeout < 0)
+               timeout = 0;
+       secs = timeout/1000;
+       minutes = secs/60;
+       secs = secs%60;
+       msecs = timeout%1000;
+       buf[0] = 0;
+       if (minutes) {
+               msecs = 0;
+               snprintf(buf, sizeof(buf)-16, "%dmin", minutes);
+               if (minutes > 9)
+                       secs = 0;
+       }
+       if (secs) {
+               if (secs > 9)
+                       msecs = 0;
+               sprintf(buf+strlen(buf), "%d%s", secs, msecs ? "." : "sec");
+       }
+       if (msecs)
+               sprintf(buf+strlen(buf), "%03dms", msecs);
+       return buf;
+};
+
+char *print_hz_timer(int timeout)
+{
+       int hz = get_hz();
+       return print_ms_timer(((timeout*1000) + hz-1)/hz);
+};
+
+struct scache
+{
+       struct scache *next;
+       int port;
+       char *name;
+       const char *proto;
+};
+
+struct scache *rlist;
+
+void init_service_resolver(void)
+{
+       char buf[128];
+       FILE *fp = popen("/usr/sbin/rpcinfo -p 2>/dev/null", "r");
+       if (fp) {
+               fgets(buf, sizeof(buf), fp);
+               while (fgets(buf, sizeof(buf), fp) != NULL) {
+                       unsigned int progn, port;
+                       char proto[128], prog[128];
+                       if (sscanf(buf, "%u %*d %s %u %s", &progn, proto,
+                                  &port, prog+4) == 4) {
+                               struct scache *c = malloc(sizeof(*c));
+                               if (c) {
+                                       c->port = port;
+                                       memcpy(prog, "rpc.", 4);
+                                       c->name = strdup(prog);
+                                       if (strcmp(proto, TCP_PROTO) == 0)
+                                               c->proto = TCP_PROTO;
+                                       else if (strcmp(proto, UDP_PROTO) == 0)
+                                               c->proto = UDP_PROTO;
+                                       else
+                                               c->proto = NULL;
+                                       c->next = rlist;
+                                       rlist = c;
+                               }
+                       }
+               }
+       }
+}
+
+const char *__resolve_service(int port)
+{
+       struct scache *c;
+
+       for (c = rlist; c; c = c->next) {
+               if (c->port == port && c->proto == dg_proto)
+                       return c->name;
+       }
+
+       /* Even do not try default linux ephemeral port ranges:
+        * default /etc/services contains so much of useless crap
+        * wouldbe "allocated" to this area that resolution
+        * is really harmful. I shrug each time when seeing
+        * "socks" or "cfinger" in dumps.
+        */
+       if (port < 32768 && (port < 1024 || port > 4999)) {
+               static int notfirst;
+               struct servent *se;
+               if (!notfirst) {
+                       setservent(1);
+                       notfirst = 1;
+               } 
+               se = getservbyport(htons(port), dg_proto);
+               if (se)
+                       return se->s_name;
+       }
+
+       return NULL;
+}
+
+
+const char *resolve_service(int port)
+{
+       static char buf[128];
+       static struct scache cache[256];
+
+       if (port == 0) {
+               buf[0] = '*';
+               buf[1] = 0;
+               return buf;
+       }
+
+       if (resolve_services) {
+               if (dg_proto == RAW_PROTO) {
+                       return inet_proto_n2a(port, buf, sizeof(buf));
+               } else {
+                       struct scache *c;
+                       const char *res;
+                       int hash = (port^(((unsigned long)dg_proto)>>2))&255;
+
+                       for (c = &cache[hash]; c; c = c->next) { 
+                               if (c->port == port &&
+                                   c->proto == dg_proto) {
+                                       if (c->name)
+                                               return c->name;
+                                       goto do_numeric;
+                               }
+                       }
+
+                       if ((res = __resolve_service(port)) != NULL) {
+                               if ((c = malloc(sizeof(*c))) == NULL)
+                                       goto do_numeric;
+                       } else {
+                               c = &cache[hash];
+                               if (c->name)
+                                       free(c->name);
+                       }
+                       c->port = port;
+                       c->name = NULL;
+                       c->proto = dg_proto;
+                       if (res) {
+                               c->name = strdup(res);
+                               c->next = cache[hash].next;
+                               cache[hash].next = c;
+                       }
+                       if (c->name)
+                               return c->name;
+               }
+       }
+
+       do_numeric:
+       sprintf(buf, "%u", port);
+       return buf;
+}
+
+void formatted_print(inet_prefix *a, int port)
+{
+       char buf[1024];
+       const char *ap = buf;
+       int est_len;
+
+       est_len = addr_width;
+
+       if (a->family == AF_INET) {
+               if (a->data[0] == 0) {
+                       buf[0] = '*';
+                       buf[1] = 0;
+               } else {
+                       ap = format_host(AF_INET, 4, a->data, buf, sizeof(buf));
+               }
+       } else {
+               ap = format_host(a->family, 16, a->data, buf, sizeof(buf));
+               est_len = strlen(ap);
+               if (est_len <= addr_width)
+                       est_len = addr_width;
+               else
+                       est_len = addr_width + ((est_len-addr_width+3)/4)*4;
+       }
+       printf("%*s:%-*s ", est_len, ap, serv_width, resolve_service(port));
+}
+
+struct aafilter
+{
+       inet_prefix     addr;
+       int             port;
+       struct aafilter *next;
+};
+
+int inet2_addr_match(inet_prefix *a, inet_prefix *p, int plen)
+{
+       if (!inet_addr_match(a, p, plen))
+               return 0;
+       /* Cursed "v4 mapped" addresses: v4 mapped socket matches
+        * pure IPv4 rule, but v4-mapped rule selects only v4-mapped
+        * sockets. Fair? */
+       if (p->family == AF_INET && a->family == AF_INET6) {
+               if (a->data[0] == 0 && a->data[1] == 0 &&
+                   a->data[2] == htonl(0xffff)) {
+                       inet_prefix tmp = *a;
+                       tmp.data[0] = a->data[3];
+                       return inet_addr_match(&tmp, p, plen);
+               }
+       }
+       return 1;
+}
+
+int unix_match(inet_prefix *a, inet_prefix *p)
+{
+       char *addr, *pattern;
+       memcpy(&addr, a->data, sizeof(addr));
+       memcpy(&pattern, p->data, sizeof(pattern));
+       if (pattern == NULL)
+               return 1;
+       if (addr == NULL)
+               addr = "";
+       return !fnmatch(pattern, addr, 0);
+}
+
+int run_ssfilter(struct ssfilter *f, struct tcpstat *s)
+{
+       switch (f->type) {
+               case SSF_S_AUTO:
+       {
+                static int low, high=65535;
+
+               if (s->local.family == AF_UNIX) {
+                       char *p;
+                       memcpy(&p, s->local.data, sizeof(p));
+                       return p == NULL || (p[0] == '@' && strlen(p) == 6 &&
+                                            strspn(p+1, "0123456789abcdef") == 5); 
+               }
+               if (s->local.family == AF_PACKET)
+                       return s->lport == 0 && s->local.data == 0;
+               if (s->local.family == AF_NETLINK)
+                       return s->lport < 0;
+
+                if (!low) {
+                       FILE *fp = fdopen(ephemeral_ports_open(), "r");
+                       if (fp) {
+                               fscanf(fp, "%d%d", &low, &high);
+                               fclose(fp);
+                       }
+               }
+               return s->lport >= low && s->lport <= high;
+       }
+               case SSF_DCOND:
+       {
+               struct aafilter *a = (void*)f->pred;
+               if (a->addr.family == AF_UNIX)
+                       return unix_match(&s->remote, &a->addr);
+               if (a->port != -1 && a->port != s->rport)
+                       return 0;
+               if (a->addr.bitlen) {
+                       do {
+                               if (!inet2_addr_match(&s->remote, &a->addr, a->addr.bitlen))
+                                       return 1;
+                       } while ((a = a->next) != NULL);
+                       return 0;
+               }
+               return 1;
+       }
+               case SSF_SCOND:
+       {
+               struct aafilter *a = (void*)f->pred;
+               if (a->addr.family == AF_UNIX)
+                       return unix_match(&s->local, &a->addr);
+               if (a->port != -1 && a->port != s->lport)
+                       return 0;
+               if (a->addr.bitlen) {
+                       do {
+                               if (!inet2_addr_match(&s->local, &a->addr, a->addr.bitlen))
+                                       return 1;
+                       } while ((a = a->next) != NULL); 
+                       return 0;
+               }
+               return 1;
+       }
+               case SSF_D_GE:
+       {
+               struct aafilter *a = (void*)f->pred;
+               return s->rport >= a->port;
+       }
+               case SSF_D_LE:
+       {
+               struct aafilter *a = (void*)f->pred;
+               return s->rport <= a->port;
+       }
+               case SSF_S_GE:
+       {
+               struct aafilter *a = (void*)f->pred;
+               return s->lport >= a->port;
+       }
+               case SSF_S_LE:
+       {
+               struct aafilter *a = (void*)f->pred;
+               return s->lport <= a->port;
+       }
+
+               /* Yup. It is recursion. Sorry. */
+               case SSF_AND:
+               return run_ssfilter(f->pred, s) && run_ssfilter(f->post, s);
+               case SSF_OR:
+               return run_ssfilter(f->pred, s) || run_ssfilter(f->post, s);
+               case SSF_NOT:
+               return !run_ssfilter(f->pred, s);
+               default:
+               abort();
+       }
+}
+
+/* Relocate external jumps by reloc. */ 
+void ssfilter_patch(char *a, int len, int reloc)
+{
+       while (len > 0) {
+               struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)a;
+               if (op->no == len+4)
+                       op->no += reloc;
+               len -= op->yes;
+               a += op->yes;
+       }
+       if (len < 0)
+               abort();
+}
+
+int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
+{
+       switch (f->type) {
+               case SSF_S_AUTO:
+       {
+               if (!(*bytecode=malloc(4))) abort();
+               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_AUTO, 4, 8 };
+               return 8;
+       }
+               case SSF_DCOND:
+               case SSF_SCOND:
+       {
+               struct aafilter *a = (void*)f->pred;
+               struct aafilter *b;
+               char *ptr;
+               int  code = (f->type == SSF_DCOND ? TCPDIAG_BC_D_COND : TCPDIAG_BC_S_COND);
+               int len = 0;
+
+               for (b=a; b; b=b->next) {
+                       len += 4 + sizeof(struct tcpdiag_hostcond);
+                       if (a->addr.family == AF_INET6)
+                               len += 16;
+                       else
+                               len += 4;
+                       if (b->next)
+                               len += 4;
+               }
+               if (!(ptr = malloc(len))) abort();
+               *bytecode = ptr;
+               for (b=a; b; b=b->next) {
+                       struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op *)ptr;
+                       int alen = (a->addr.family == AF_INET6 ? 16 : 4);
+                       int oplen = alen + 4 + sizeof(struct tcpdiag_hostcond);
+                       struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(ptr+4);
+
+                       *op = (struct tcpdiag_bc_op){ code, oplen, oplen+4 };
+                       cond->family = a->addr.family;
+                       cond->port = a->port;
+                       cond->prefix_len = a->addr.bitlen;
+                       memcpy(cond->addr, a->addr.data, alen);
+                       ptr += oplen;
+                       if (b->next) {
+                               op = (struct tcpdiag_bc_op *)ptr;
+                               *op = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, len - (ptr-*bytecode)};
+                               ptr += 4;
+                       }
+               }
+               return ptr - *bytecode;
+       }
+               case SSF_D_GE:
+       {
+               struct aafilter *x = (void*)f->pred;
+               if (!(*bytecode=malloc(8))) abort();
+               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_D_GE, 8, 12 };
+               ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port };
+               return 8;
+       }
+               case SSF_D_LE:
+       {
+               struct aafilter *x = (void*)f->pred;
+               if (!(*bytecode=malloc(8))) abort();
+               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_D_LE, 8, 12 };
+               ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port };
+               return 8;
+       }
+               case SSF_S_GE:
+       {
+               struct aafilter *x = (void*)f->pred;
+               if (!(*bytecode=malloc(8))) abort();
+               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_S_GE, 8, 12 };
+               ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port };
+               return 8;
+       }
+               case SSF_S_LE:
+       {
+               struct aafilter *x = (void*)f->pred;
+               if (!(*bytecode=malloc(8))) abort();
+               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_S_LE, 8, 12 };
+               ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port };
+               return 8;
+       }
+
+               case SSF_AND:
+       {
+               char *a1, *a2, *a, l1, l2;
+               l1 = ssfilter_bytecompile(f->pred, &a1);
+               l2 = ssfilter_bytecompile(f->post, &a2);
+               if (!(a = malloc(l1+l2))) abort();
+               memcpy(a, a1, l1);
+               memcpy(a+l1, a2, l2);
+               free(a1); free(a2);
+               ssfilter_patch(a, l1, l2);
+               *bytecode = a;
+               return l1+l2;
+       }
+               case SSF_OR:
+       {
+               char *a1, *a2, *a, l1, l2;
+               l1 = ssfilter_bytecompile(f->pred, &a1);
+               l2 = ssfilter_bytecompile(f->post, &a2);
+               if (!(a = malloc(l1+l2+4))) abort();
+               memcpy(a, a1, l1);
+               memcpy(a+l1+4, a2, l2);
+               free(a1); free(a2);
+               *(struct tcpdiag_bc_op*)(a+l1) = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, l2+4 };
+               *bytecode = a;
+               return l1+l2+4;
+       }
+               case SSF_NOT:
+       {
+               char *a1, *a, l1;
+               l1 = ssfilter_bytecompile(f->pred, &a1);
+               if (!(a = malloc(l1+4))) abort();
+               memcpy(a, a1, l1);
+               free(a1);
+               *(struct tcpdiag_bc_op*)(a+l1) = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, 8 };
+               *bytecode = a;
+               return l1+4;
+       }
+               default:
+               abort();
+       }
+}
+
+int remember_he(struct aafilter *a, struct hostent *he)
+{
+       char **ptr = he->h_addr_list; 
+       int cnt = 0;
+       int len;
+
+       if (he->h_addrtype == AF_INET)
+               len = 4;
+       else if (he->h_addrtype == AF_INET6)
+               len = 16;
+       else
+               return 0;
+
+       while (*ptr) {
+               struct aafilter *b = a;
+               if (a->addr.bitlen) {
+                       if ((b = malloc(sizeof(*b))) == NULL)
+                               return cnt;
+                       *b = *a;
+                       b->next = a->next;
+                       a->next = b;
+               }
+               memcpy(b->addr.data, *ptr, len);
+               b->addr.bytelen = len;
+               b->addr.bitlen = len*8;
+               b->addr.family = he->h_addrtype;
+               ptr++;
+               cnt++;
+       }
+       return cnt;
+}
+
+int get_dns_host(struct aafilter *a, char *addr, int fam)
+{
+       static int notfirst;
+       int cnt = 0;
+       struct hostent *he;
+
+       a->addr.bitlen = 0;
+       if (!notfirst) {
+               sethostent(1);
+               notfirst = 1;
+       }
+       he = gethostbyname2(addr, fam == AF_UNSPEC ? AF_INET : fam);
+       if (he)
+               cnt = remember_he(a, he);
+       if (fam == AF_UNSPEC) {
+               he = gethostbyname2(addr, AF_INET6);
+               if (he)
+                       cnt += remember_he(a, he);
+       }
+       return !cnt;
+}
+
+int xll_initted = 0;
+
+void xll_init(void)
+{
+       struct rtnl_handle rth;
+       rtnl_open(&rth, 0);
+       ll_init_map(&rth);
+       rtnl_close(&rth);
+       xll_initted = 1;
+}
+
+const char *xll_index_to_name(int index)
+{
+       if (!xll_initted)
+               xll_init();
+       return ll_index_to_name(index);
+}
+
+int xll_name_to_index(char *dev)
+{
+       if (!xll_initted)
+               xll_init();
+       return ll_name_to_index(dev);
+}
+
+void *parse_hostcond(char *addr)
+{
+       char *port = NULL;
+       struct aafilter a;
+       struct aafilter *res;
+       int fam = preferred_family;
+
+       memset(&a, 0, sizeof(a));
+       a.port = -1;
+
+       if (fam == AF_UNIX || strncmp(addr, "unix:", 5) == 0) {
+               char *p;
+               a.addr.family = AF_UNIX;
+               if (strncmp(addr, "unix:", 5) == 0)
+                       addr+=5;
+               p = strdup(addr);
+               a.addr.bitlen = 8*strlen(p);
+               memcpy(a.addr.data, &p, sizeof(p));
+               goto out;
+       }
+
+       if (fam == AF_PACKET || strncmp(addr, "link:", 5) == 0) {
+               a.addr.family = AF_PACKET;
+               a.addr.bitlen = 0;
+               if (strncmp(addr, "link:", 5) == 0)
+                       addr+=5;
+               port = strchr(addr, ':');
+               if (port) {
+                       *port = 0;
+                       if (port[1] && strcmp(port+1, "*")) {
+                               if (get_integer(&a.port, port+1, 0)) {
+                                       if ((a.port = xll_name_to_index(port+1)) <= 0)
+                                               return NULL;
+                               }
+                       }
+               }
+               if (addr[0] && strcmp(addr, "*")) {
+                       unsigned short tmp;
+                       a.addr.bitlen = 32;
+                       if (ll_proto_a2n(&tmp, addr))
+                               return NULL;
+                       a.addr.data[0] = ntohs(tmp);
+               }
+               goto out;
+       }
+
+       if (fam == AF_NETLINK || strncmp(addr, "netlink:", 8) == 0) {
+               a.addr.family = AF_NETLINK;
+               a.addr.bitlen = 0;
+               if (strncmp(addr, "netlink:", 8) == 0)
+                       addr+=8;
+               port = strchr(addr, ':');
+               if (port) {
+                       *port = 0;
+                       if (port[1] && strcmp(port+1, "*")) {
+                               if (get_integer(&a.port, port+1, 0)) {
+                                       if (strcmp(port+1, "kernel") == 0)
+                                               a.port = 0;
+                                       else
+                                               return NULL;
+                               }
+                       }
+               }
+               if (addr[0] && strcmp(addr, "*")) {
+                       a.addr.bitlen = 32;
+                       if (get_u32(a.addr.data, addr, 0)) {
+                               if (strcmp(addr, "rtnl") == 0)
+                                       a.addr.data[0] = 0;
+                               else if (strcmp(addr, "fw") == 0)
+                                       a.addr.data[0] = 3;
+                               else if (strcmp(addr, "tcpdiag") == 0)
+                                       a.addr.data[0] = 4;
+                               else
+                                       return NULL;
+                       }
+               }
+               goto out;
+       }
+
+       if (strncmp(addr, "inet:", 5) == 0) {
+               addr += 5;
+               fam = AF_INET;
+       } else if (strncmp(addr, "inet6:", 6) == 0) {
+               addr += 6;
+               fam = AF_INET6;
+       }
+
+       /* URL-like literal [] */
+       if (addr[0] == '[') {
+               addr++;
+               if ((port = strchr(addr, ']')) == NULL)
+                       return NULL;
+               *port++ = 0;
+       } else if (addr[0] == '*') {
+               port = addr+1;
+       } else {
+               port = strrchr(strchr(addr, '/') ? : addr, ':');
+       }
+       if (port && *port) {
+               if (*port != ':')
+                       return NULL;
+               *port++ = 0;
+               if (*port && *port != '*') {
+                       if (get_integer(&a.port, port, 0)) {
+                               struct servent *se1 = NULL;
+                               struct servent *se2 = NULL;
+                               if (current_filter.dbs&(1<<UDP_DB))
+                                       se1 = getservbyname(port, UDP_PROTO);
+                               if (current_filter.dbs&(1<<TCP_DB))
+                                       se2 = getservbyname(port, TCP_PROTO);
+                               if (se1 && se2 && se1->s_port != se2->s_port) {
+                                       fprintf(stderr, "Error: ambiguous port \"%s\".\n", port);
+                                       return NULL;
+                               }
+                               if (!se1)
+                                       se1 = se2;
+                               if (se1) {
+                                       a.port = ntohs(se1->s_port);
+                               } else {
+                                       struct scache *s;
+                                       for (s = rlist; s; s = s->next) {
+                                               if ((s->proto == UDP_PROTO &&
+                                                    (current_filter.dbs&(1<<UDP_DB))) ||
+                                                   (s->proto == TCP_PROTO &&
+                                                    (current_filter.dbs&(1<<TCP_DB)))) {
+                                                       if (s->name && strcmp(s->name, port) == 0) {
+                                                               if (a.port > 0 && a.port != s->port) {
+                                                                       fprintf(stderr, "Error: ambiguous port \"%s\".\n", port);
+                                                                       return NULL;
+                                                               }
+                                                               a.port = s->port;
+                                                       }
+                                               }
+                                       }
+                                       if (a.port <= 0) {
+                                               fprintf(stderr, "Error: \"%s\" does not look like a port.\n", port);
+                                               return NULL;
+                                       }
+                               }
+                       }
+               }
+       }
+       if (addr && *addr && *addr != '*') {
+               if (get_prefix_1(&a.addr, addr, fam)) {
+                       if (get_dns_host(&a, addr, fam)) {
+                               fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", addr);
+                               return NULL;
+                       }
+               }
+       }
+
+       out:
+       res = malloc(sizeof(*res));
+       if (res)
+               memcpy(res, &a, sizeof(a));
+       return res;
+}
+
+int tcp_show_line(char *line, struct filter *f, int family)
+{
+       struct tcpstat s;
+       char *loc, *rem, *data;
+       char opt[256];
+       int n;
+       char *p;
+       
+       if ((p = strchr(line, ':')) == NULL)
+               return -1;
+       loc = p+2;
+       
+       if ((p = strchr(loc, ':')) == NULL)
+               return -1;
+       p[5] = 0;
+       rem = p+6;
+       
+       if ((p = strchr(rem, ':')) == NULL)
+               return -1;
+       p[5] = 0;
+       data = p+6;
+       
+       do {
+               int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0');
+
+               if (!(f->states & (1<<state)))
+                       return 0;
+       } while (0);
+       
+       s.local.family = s.remote.family = family;
+       if (family == AF_INET) {
+               sscanf(loc, "%x:%x", s.local.data, (unsigned*)&s.lport);
+               sscanf(rem, "%x:%x", s.remote.data, (unsigned*)&s.rport);
+               s.local.bytelen = s.remote.bytelen = 4;
+       } else {
+               sscanf(loc, "%08x%08x%08x%08x:%x",
+                      s.local.data,
+                      s.local.data+1,
+                      s.local.data+2,
+                      s.local.data+3,
+                      &s.lport);
+               sscanf(rem, "%08x%08x%08x%08x:%x",
+                      s.remote.data,
+                      s.remote.data+1,
+                      s.remote.data+2,
+                      s.remote.data+3,
+                      &s.rport);
+               s.local.bytelen = s.remote.bytelen = 16;
+       }
+       
+       if (f->f && run_ssfilter(f->f, &s) == 0)
+               return 0;
+       
+       opt[0] = 0;
+       n = sscanf(data, "%x %x:%x %x:%x %x %d %d %d %d %llx %d %d %d %d %d %[^\n]\n",
+                  &s.state, &s.wq, &s.rq,
+                  &s.timer, &s.timeout, &s.retrs, &s.uid, &s.probes, &s.ino,
+                  &s.refcnt, &s.sk, &s.rto, &s.ato, &s.qack,
+                  &s.cwnd, &s.ssthresh, opt);
+       
+       if (n < 17)
+               opt[0] = 0;
+       
+       if (n < 12) {
+               s.rto = 0;
+               s.cwnd = 2;
+               s.ssthresh = -1;
+               s.ato = s.qack = 0;
+       }
+       
+       if (netid_width)
+               printf("%-*s ", netid_width, "tcp");
+       if (state_width)
+               printf("%-*s ", state_width, sstate_name[s.state]);
+       
+       printf("%-6d %-6d ", s.rq, s.wq);
+       
+       formatted_print(&s.local, s.lport);
+       formatted_print(&s.remote, s.rport);
+       
+       if (show_options) {
+               if (s.timer) {
+                       if (s.timer > 4)
+                               s.timer = 5;
+                       printf(" timer:(%s,%s,%d)",
+                              tmr_name[s.timer],
+                              print_hz_timer(s.timeout),
+                              s.timer != 1 ? s.probes : s.retrs);
+               }
+       }
+       if (show_tcpinfo) {
+               if (s.rto && s.rto != 3*get_hz())
+                       printf(" rto:%g", (double)s.rto/get_hz());
+               if (s.ato)
+                       printf(" ato:%g", (double)s.ato/get_hz());
+               if (s.cwnd != 2)
+                       printf(" cwnd:%d", s.cwnd);
+               if (s.ssthresh != -1)
+                       printf(" ssthresh:%d", s.ssthresh);
+               if (s.qack/2)
+                       printf(" qack:%d", s.qack/2);
+               if (s.qack&1)
+                       printf(" bidir");
+       }
+       if (show_users) {
+               char ubuf[4096];
+               if (find_users(s.ino, ubuf, sizeof(ubuf)) > 0)
+                       printf(" users:(%s)", ubuf);
+       }
+       if (show_details) {
+               if (s.uid)
+                       printf(" uid:%u", (unsigned)s.uid);
+               printf(" ino:%u", (unsigned)s.ino);
+               printf(" sk:%llx", s.sk);
+               if (opt[0])
+                       printf(" opt:\"%s\"", opt);
+       }
+       printf("\n");
+
+       return 0;
+}
+
+int generic_record_read(int fd, char *buf, int bufsize,
+                       int (*worker)(char*, struct filter *, int),
+                       struct filter *f, int fam)
+{
+       int n;
+       int recsize;
+       int eof = 0;
+       char *p;
+
+       /* Load the first chunk and calculate record length from it. */
+       n = read(fd, buf, bufsize);
+       if (n < 0)
+               goto outerr;
+       /* I _know_ that this is wrong, do not remind. :-)
+        * But this works nowadays. */
+       if (n < bufsize)
+               eof = 1;
+       p = memchr(buf, '\n', n);
+       if (p == NULL || (p-buf) >= n)
+               goto outwrongformat;
+       recsize = (p-buf)+1;
+       p = buf+recsize;
+
+       for (;;) {
+               while ((p+recsize) - buf <= n) {
+                       if (p[recsize-1] != '\n')
+                               goto outwrongformat;
+                       p[recsize-1] = 0;
+                       if (worker(p, f, fam) < 0)
+                               goto done;
+                       p += recsize;
+               }
+               if (!eof) {
+                       int remains = (buf+bufsize) - p;
+                       memcpy(buf, p, remains);
+                       p = buf+remains;
+                       n = read(fd, p, (buf+bufsize) - p);
+                       if (n < 0)
+                               goto outerr;
+                       if (n < (buf+bufsize) - p) {
+                               eof = 1;
+                               if (n == 0) {
+                                       if (remains)
+                                               goto outwrongformat;
+                                       goto done;
+                               }
+                       }
+                       n += remains;
+                       p = buf;
+               } else {
+                       if (p != buf+n)
+                               goto outwrongformat;
+                       goto done;
+               }
+       }
+done:
+       return 0;
+
+outwrongformat:
+       errno = EINVAL;
+outerr:
+       return -1;
+}
+
+
+int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f)
+{
+       struct tcpdiagmsg *r = NLMSG_DATA(nlh);
+       struct tcpstat s;
+
+       s.state = r->tcpdiag_state;
+       s.local.family = s.remote.family = r->tcpdiag_family;
+       s.lport = ntohs(r->id.tcpdiag_sport);
+       s.rport = ntohs(r->id.tcpdiag_dport);
+       if (s.local.family == AF_INET) {
+               s.local.bytelen = s.remote.bytelen = 4;
+       } else {
+               s.local.bytelen = s.remote.bytelen = 16;
+       }
+       memcpy(s.local.data, r->id.tcpdiag_src, s.local.bytelen);
+       memcpy(s.remote.data, r->id.tcpdiag_dst, s.local.bytelen);
+
+       if (f && f->f && run_ssfilter(f->f, &s) == 0)
+               return 0;
+
+       if (netid_width)
+               printf("%-*s ", netid_width, "tcp");
+       if (state_width)
+               printf("%-*s ", state_width, sstate_name[s.state]);
+
+       printf("%-6d %-6d ", r->tcpdiag_rqueue, r->tcpdiag_wqueue);
+
+       formatted_print(&s.local, s.lport);
+       formatted_print(&s.remote, s.rport);
+
+       if (show_options) {
+               if (r->tcpdiag_timer) {
+                       if (r->tcpdiag_timer > 4)
+                               r->tcpdiag_timer = 5;
+                       printf(" timer:(%s,%s,%d)",
+                              tmr_name[r->tcpdiag_timer],
+                              print_ms_timer(r->tcpdiag_expires),
+                              r->tcpdiag_retrans);
+               }
+       }
+       if (show_users) {
+               char ubuf[4096];
+               if (find_users(r->tcpdiag_inode, ubuf, sizeof(ubuf)) > 0)
+                       printf(" users:(%s)", ubuf);
+       }
+       if (show_details) {
+               if (r->tcpdiag_uid)
+                       printf(" uid:%u", (unsigned)r->tcpdiag_uid);
+               printf(" ino:%u", (unsigned)r->tcpdiag_inode);
+               printf(" sk:%08x", r->id.tcpdiag_cookie[0]);
+               if (r->id.tcpdiag_cookie[1] != 0)
+                       printf("%08x", r->id.tcpdiag_cookie[1]);
+       }
+       if (show_mem || show_tcpinfo) {
+               struct rtattr * tb[TCPDIAG_MAX+1];
+               struct tcpdiag_meminfo *minfo = NULL;
+               struct tcp_info *info = NULL;
+
+               memset(tb, 0, sizeof(tb));
+               parse_rtattr(tb, TCPDIAG_MAX, (struct rtattr*)(r+1),
+                            nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+               if (tb[TCPDIAG_MEMINFO])
+                       minfo = RTA_DATA(tb[TCPDIAG_MEMINFO]);
+               if (tb[TCPDIAG_INFO])
+                       info = RTA_DATA(tb[TCPDIAG_INFO]);
+               if (minfo) {
+                       printf(" mem:(r%u,w%u,f%u,t%u)",
+                              minfo->tcpdiag_rmem,
+                              minfo->tcpdiag_wmem,
+                              minfo->tcpdiag_fmem,
+                              minfo->tcpdiag_tmem);
+               }
+               if (info) {
+#ifdef TCP_INFO
+                       if (info->tcpi_rto && info->tcpi_rto != 3000000)
+                               printf(" rto:%g", (double)info->tcpi_rto/1000);
+                       if (info->tcpi_rtt)
+                               printf(" rtt:%g/%g", (double)info->tcpi_rtt/1000,
+                                      (double)info->tcpi_rttvar/1000);
+                       if (info->tcpi_ato)
+                               printf(" ato:%g", (double)info->tcpi_ato/1000);
+                       if (info->tcpi_snd_cwnd != 2)
+                               printf(" cwnd:%d", info->tcpi_snd_cwnd);
+                       if (info->tcpi_snd_ssthresh < 0xFFFF)
+                               printf(" ssthresh:%d", info->tcpi_snd_ssthresh);
+#else
+#warning No TCP_INFO. Please, do not repeat this experiment, use right kernel.
+                       printf(" MORE_INFO_PROVIDED_YOU_COMPILED_SS_RIGHT");
+#endif
+               }
+       }
+       printf("\n");
+
+       return 0;
+
+}
+
+int tcp_show_netlink(struct filter *f, FILE *dump_fp)
+{
+       int fd;
+       struct sockaddr_nl nladdr;
+       struct {
+               struct nlmsghdr nlh;
+               struct tcpdiagreq r;
+       } req;
+       char    *bc = NULL;
+       int     bclen;
+       struct msghdr msg;
+       struct rtattr rta;
+       char    buf[8192];
+       struct iovec iov[3];
+
+       if ((fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_TCPDIAG)) < 0)
+               return -1;
+
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+
+       req.nlh.nlmsg_len = sizeof(req);
+       req.nlh.nlmsg_type = TCPDIAG_GETSOCK;
+       req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
+       req.nlh.nlmsg_pid = 0;
+       req.nlh.nlmsg_seq = 123456;
+       memset(&req.r, 0, sizeof(req.r));
+       req.r.tcpdiag_family = AF_INET;
+       req.r.tcpdiag_states = f->states;
+       if (show_mem)
+               req.r.tcpdiag_ext |= (1<<(TCPDIAG_MEMINFO-1)); 
+       if (show_tcpinfo)
+               req.r.tcpdiag_ext |= (1<<(TCPDIAG_INFO-1));
+
+       iov[0] = (struct iovec){ &req, sizeof(req) };
+       if (f->f) {
+               bclen = ssfilter_bytecompile(f->f, &bc);
+               rta.rta_type = TCPDIAG_REQ_BYTECODE;
+               rta.rta_len = RTA_LENGTH(bclen);
+               iov[1] = (struct iovec){ &rta, sizeof(rta) };
+               iov[2] = (struct iovec){ bc, bclen };
+               req.nlh.nlmsg_len += RTA_LENGTH(bclen);
+       }
+
+       msg = (struct msghdr) {
+               (void*)&nladdr, sizeof(nladdr),
+               iov,    f->f ? 3 : 1,
+               NULL,   0,
+               0
+       };
+
+       if (sendmsg(fd, &msg, 0) < 0)
+               return -1;
+
+
+       iov[0] = (struct iovec){ buf, sizeof(buf) };
+
+       while (1) {
+               int status;
+               struct nlmsghdr *h;
+
+               msg = (struct msghdr) {
+                       (void*)&nladdr, sizeof(nladdr),
+                       iov,    1,
+                       NULL,   0,
+                       0
+               };
+
+               status = recvmsg(fd, &msg, 0);
+
+               if (status < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       perror("OVERRUN");
+                       continue;
+               }
+               if (status == 0) {
+                       fprintf(stderr, "EOF on netlink\n");
+                       return 0;
+               }
+
+               if (dump_fp)
+                       fwrite(buf, 1, NLMSG_ALIGN(status), dump_fp);
+
+               h = (struct nlmsghdr*)buf;
+               while (NLMSG_OK(h, status)) {
+                       int err;
+
+                       if (/*h->nlmsg_pid != rth->local.nl_pid ||*/
+                           h->nlmsg_seq != 123456)
+                               goto skip_it;
+
+                       if (h->nlmsg_type == NLMSG_DONE)
+                               return 0;
+                       if (h->nlmsg_type == NLMSG_ERROR) {
+                               struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
+                               if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
+                                       fprintf(stderr, "ERROR truncated\n");
+                               } else {
+                                       errno = -err->error;
+                                       perror("TCPDIAG answers");
+                               }
+                               return 0;
+                       }
+                       if (!dump_fp) {
+                               err = tcp_show_sock(h, NULL);
+                               if (err < 0)
+                                       return err;
+                       }
+
+skip_it:
+                       h = NLMSG_NEXT(h, status);
+               }
+               if (msg.msg_flags & MSG_TRUNC) {
+                       fprintf(stderr, "Message truncated\n");
+                       continue;
+               }
+               if (status) {
+                       fprintf(stderr, "!!!Remnant of size %d\n", status);
+                       exit(1);
+               }
+       }
+       return 0;
+}
+
+int tcp_show_netlink_file(struct filter *f)
+{
+       FILE    *fp;
+       char    buf[8192];
+
+       if ((fp = fopen(getenv("TCPDIAG_FILE"), "r")) == NULL) {
+               perror("fopen($TCPDIAG_FILE)");
+               return -1;
+       }
+
+       while (1) {
+               int status, err;
+               struct nlmsghdr *h = (struct nlmsghdr*)buf;
+
+               status = fread(buf, 1, sizeof(*h), fp);
+               if (status < 0) {
+                       perror("Reading header from $TCPDIAG_FILE");
+                       return -1;
+               }
+               if (status != sizeof(*h)) {
+                       perror("Unexpected EOF reading $TCPDIAG_FILE");
+                       return -1;
+               }
+
+               status = fread(h+1, 1, NLMSG_ALIGN(h->nlmsg_len-sizeof(*h)), fp);
+
+               if (status < 0) {
+                       perror("Reading $TCPDIAG_FILE");
+                       return -1;
+               }
+               if (status + sizeof(*h) < h->nlmsg_len) {
+                       perror("Unexpected EOF reading $TCPDIAG_FILE");
+                       return -1;
+               }
+
+               /* The only legal exit point */
+               if (h->nlmsg_type == NLMSG_DONE)
+                       return 0;
+
+               if (h->nlmsg_type == NLMSG_ERROR) {
+                       struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
+                       if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
+                               fprintf(stderr, "ERROR truncated\n");
+                       } else {
+                               errno = -err->error;
+                               perror("TCPDIAG answered");
+                       }
+                       return -1;
+               }
+
+               err = tcp_show_sock(h, f);
+               if (err < 0)
+                       return err;
+       }
+}
+
+int tcp_show(struct filter *f)
+{
+       int fd = -1;
+       char *buf = NULL;
+       int bufsize = 64*1024;
+
+       dg_proto = TCP_PROTO;
+
+       if (getenv("TCPDIAG_FILE"))
+               return tcp_show_netlink_file(f);
+
+       if (!getenv("PROC_NET_TCP") && !getenv("PROC_ROOT")
+           && tcp_show_netlink(f, NULL) == 0)
+               return 0;
+
+       /* Sigh... We have to parse /proc/net/tcp... */
+
+       /* Estimate amount of sockets and try to allocate
+        * huge buffer to read all the table at one read.
+        * Limit it by 16MB though. The assumption is: as soon as
+        * kernel was able to hold information about N connections,
+        * it is able to give us some memory for snapshot.
+        */
+       if (1) {
+               int guess = slabstat.socks+slabstat.tcp_syns;
+               if (f->states&(1<<SS_TIME_WAIT))
+                       guess += slabstat.tcp_tws;
+               if (guess > (16*1024*1024)/128)
+                       guess = (16*1024*1024)/128;
+               guess *= 128;
+               if (guess > bufsize)
+                       bufsize = guess;
+       }
+       while (bufsize >= 64*1024) {
+               if ((buf = malloc(bufsize)) != NULL)
+                       break;
+               bufsize /= 2;
+       }
+       if (buf == NULL) {
+               errno = ENOMEM;
+               return -1;
+       }
+
+       if (f->families & (1<<AF_INET)) {
+               if ((fd = net_tcp_open()) < 0)
+                       goto outerr;
+               if (generic_record_read(fd, buf, bufsize, tcp_show_line, f, AF_INET))
+                       goto outerr;
+               close(fd);
+       }
+
+       if ((f->families & (1<<AF_INET6)) &&
+           (fd = net_tcp6_open()) >= 0) {
+               if (generic_record_read(fd, buf, bufsize, tcp_show_line, f, AF_INET6))
+                       goto outerr;
+               close(fd);
+       }
+
+       free(buf);
+       return 0;
+
+outerr:
+       do {
+               int saved_errno = errno;
+               if (buf)
+                       free(buf);
+               if (fd >= 0)
+                       close(fd);
+               errno = saved_errno;
+               return -1;
+       } while (0);
+}
+
+
+int dgram_show_line(char *line, struct filter *f, int family)
+{
+       struct tcpstat s;
+       char *loc, *rem, *data;
+       char opt[256];
+       int n;
+       char *p;
+
+       if ((p = strchr(line, ':')) == NULL)
+               return -1;
+       loc = p+2;
+
+       if ((p = strchr(loc, ':')) == NULL)
+               return -1;
+       p[5] = 0;
+       rem = p+6;
+
+       if ((p = strchr(rem, ':')) == NULL)
+               return -1;
+       p[5] = 0;
+       data = p+6;
+
+       do {
+               int state = (data[1] >= 'A') ? (data[1] - 'A' + 10) : (data[1] - '0');
+
+               if (!(f->states & (1<<state)))
+                       return 0;
+       } while (0);
+
+       s.local.family = s.remote.family = family;
+       if (family == AF_INET) {
+               sscanf(loc, "%x:%x", s.local.data, (unsigned*)&s.lport);
+               sscanf(rem, "%x:%x", s.remote.data, (unsigned*)&s.rport);
+               s.local.bytelen = s.remote.bytelen = 4;
+       } else {
+               sscanf(loc, "%08x%08x%08x%08x:%x",
+                      s.local.data,
+                      s.local.data+1,
+                      s.local.data+2,
+                      s.local.data+3,
+                      &s.lport);
+               sscanf(rem, "%08x%08x%08x%08x:%x",
+                      s.remote.data,
+                      s.remote.data+1,
+                      s.remote.data+2,
+                      s.remote.data+3,
+                      &s.rport);
+               s.local.bytelen = s.remote.bytelen = 16;
+       }
+
+       if (f->f && run_ssfilter(f->f, &s) == 0)
+               return 0;
+
+       opt[0] = 0;
+       n = sscanf(data, "%x %x:%x %*x:%*x %*x %d %*d %d %d %llx %[^\n]\n",
+              &s.state, &s.wq, &s.rq,
+              &s.uid, &s.ino,
+              &s.refcnt, &s.sk, opt);
+
+       if (n < 9)
+               opt[0] = 0;
+
+       if (netid_width)
+               printf("%-*s ", netid_width, dg_proto);
+       if (state_width)
+               printf("%-*s ", state_width, sstate_name[s.state]);
+
+       printf("%-6d %-6d ", s.rq, s.wq);
+
+       formatted_print(&s.local, s.lport);
+       formatted_print(&s.remote, s.rport);
+
+       if (show_users) {
+               char ubuf[4096];
+               if (find_users(s.ino, ubuf, sizeof(ubuf)) > 0)
+                       printf(" users:(%s)", ubuf);
+       }
+
+       if (show_details) {
+               if (s.uid)
+                       printf(" uid=%u", (unsigned)s.uid);
+               printf(" ino=%u", (unsigned)s.ino);
+               printf(" sk=%llx", s.sk);
+               if (opt[0])
+                       printf(" opt:\"%s\"", opt);
+       }
+       printf("\n");
+
+       return 0;
+}
+
+
+int udp_show(struct filter *f)
+{
+       int fd = -1;
+       char buf[8192];
+       int  bufsize = sizeof(buf);
+
+       dg_proto = UDP_PROTO;
+
+       if (f->families&(1<<AF_INET)) {
+               if ((fd = net_udp_open()) < 0)
+                       goto outerr;
+               if (generic_record_read(fd, buf, bufsize, dgram_show_line, f, AF_INET))
+                       goto outerr;
+               close(fd);
+       }
+
+       if ((f->families&(1<<AF_INET6)) &&
+           (fd = net_udp6_open()) >= 0) {
+               if (generic_record_read(fd, buf, bufsize, dgram_show_line, f, AF_INET6))
+                       goto outerr;
+               close(fd);
+       }
+       return 0;
+
+outerr:
+       do {
+               int saved_errno = errno;
+               if (fd >= 0)
+                       close(fd);
+               errno = saved_errno;
+               return -1;
+       } while (0);
+}
+
+int raw_show(struct filter *f)
+{
+       int fd = -1;
+       char buf[8192];
+       int  bufsize = sizeof(buf);
+
+       dg_proto = RAW_PROTO;
+
+       if (f->families&(1<<AF_INET)) {
+               if ((fd = net_raw_open()) < 0)
+                       goto outerr;
+               if (generic_record_read(fd, buf, bufsize, dgram_show_line, f, AF_INET))
+                       goto outerr;
+               close(fd);
+       }
+
+       if ((f->families&(1<<AF_INET6)) &&
+           (fd = net_raw6_open()) >= 0) {
+               if (generic_record_read(fd, buf, bufsize, dgram_show_line, f, AF_INET6))
+                       goto outerr;
+               close(fd);
+       }
+       return 0;
+
+outerr:
+       do {
+               int saved_errno = errno;
+               if (fd >= 0)
+                       close(fd);
+               errno = saved_errno;
+               return -1;
+       } while (0);
+}
+
+
+struct unixstat
+{
+       struct unixstat *next;
+       int ino;
+       int peer;
+       int rq;
+       int wq;
+       int state;
+       int type;
+       char *name;
+};
+
+
+
+int unix_state_map[] = { SS_CLOSE, SS_SYN_SENT,
+                        SS_ESTABLISHED, SS_CLOSING };
+
+
+#define MAX_UNIX_REMEMBER (1024*1024/sizeof(struct unixstat))
+
+void unix_list_free(struct unixstat *list)
+{
+       while (list) {
+               struct unixstat *s = list;
+               list = list->next;
+               if (s->name)
+                       free(s->name);
+               free(s);
+       }
+}
+
+void unix_list_print(struct unixstat *list, struct filter *f)
+{
+       struct unixstat *s;
+       char *peer;
+
+       for (s = list; s; s = s->next) {
+               if (!(f->states & (1<<s->state)))
+                       continue;
+               if (s->type == SOCK_STREAM && !(f->dbs&(1<<UNIX_ST_DB)))
+                       continue;
+               if (s->type == SOCK_DGRAM && !(f->dbs&(1<<UNIX_DG_DB)))
+                       continue;
+
+               peer = "*";
+               if (s->peer) {
+                       struct unixstat *p;
+                       for (p = list; p; p = p->next) {
+                               if (s->peer == p->ino)
+                                       break;
+                       }
+                       if (!p) {
+                               peer = "?";
+                       } else {
+                               peer = p->name ? : "*";
+                       }
+               }
+
+               if (f->f) {
+                       struct tcpstat tst;
+                       tst.local.family = AF_UNIX;
+                       tst.remote.family = AF_UNIX;
+                       memcpy(tst.local.data, &s->name, sizeof(s->name));
+                       if (strcmp(peer, "*") == 0)
+                               memset(tst.remote.data, 0, sizeof(peer));
+                       else
+                               memcpy(tst.remote.data, &peer, sizeof(peer));  
+                       if (run_ssfilter(f->f, &tst) == 0)
+                               continue;
+               }
+
+               if (netid_width)
+                       printf("%-*s ", netid_width, 
+                              s->type == SOCK_STREAM ? "u_str" : "u_dgr");
+               if (state_width)
+                       printf("%-*s ", state_width, sstate_name[s->state]);
+               printf("%-6d %-6d ", s->rq, s->wq);
+               printf("%*s %-*d %*s %-*d",
+                      addr_width, s->name ? : "*", serv_width, s->ino,
+                      addr_width, peer, serv_width, s->peer);
+               if (show_users) {
+                       char ubuf[4096];
+                       if (find_users(s->ino, ubuf, sizeof(ubuf)) > 0)
+                               printf(" users:(%s)", ubuf);
+               }
+               printf("\n");
+       }
+}
+
+int unix_show(struct filter *f)
+{
+       FILE *fp;
+       char buf[256];
+       char name[128];
+       int  newformat = 0;
+       int  cnt;
+       struct unixstat *list = NULL;
+
+       if ((fp = fdopen(net_unix_open(), "r")) == NULL)
+               return -1;
+       fgets(buf, sizeof(buf)-1, fp);
+
+       if (memcmp(buf, "Peer", 4) == 0) 
+               newformat = 1;
+       cnt = 0;
+
+       while (fgets(buf, sizeof(buf)-1, fp)) {
+               struct unixstat *u, **insp;
+               int flags;
+
+               if (!(u = malloc(sizeof(*u))))
+                       break;
+               u->name = NULL;
+
+               if (sscanf(buf, "%x: %x %x %x %x %x %d %s",
+                          &u->peer, &u->rq, &u->wq, &flags, &u->type,
+                          &u->state, &u->ino, name) < 8)
+                       name[0] = 0;
+
+               if (flags&(1<<16)) {
+                       u->state = SS_LISTEN;
+               } else {
+                       u->state = unix_state_map[u->state-1];
+                       if (u->type == SOCK_DGRAM &&
+                           u->state == SS_CLOSE &&
+                           u->peer)
+                               u->state = SS_ESTABLISHED;
+               }
+
+               if (!newformat) {
+                       u->peer = 0;
+                       u->rq = 0;
+                       u->wq = 0;
+               }
+
+               insp = &list;
+               while (*insp) {
+                       if (u->type < (*insp)->type ||
+                           (u->type == (*insp)->type &&
+                            u->ino < (*insp)->ino))
+                               break;
+                       insp = &(*insp)->next;
+               }
+               u->next = *insp;
+               *insp = u;
+
+               if (name[0]) {
+                       if ((u->name = malloc(strlen(name)+1)) == NULL)
+                               break;
+                       strcpy(u->name, name);
+               }
+               if (++cnt > MAX_UNIX_REMEMBER) {
+                       unix_list_print(list, f);
+                       unix_list_free(list);
+                       list = NULL;
+                       cnt = 0;
+               }
+       }
+
+       if (list) {
+               unix_list_print(list, f);
+               unix_list_free(list);
+               list = NULL;
+               cnt = 0;
+       }
+
+       return 0;
+}
+
+
+int packet_show(struct filter *f)
+{
+       FILE *fp;
+       char buf[256];
+       int type;
+       int prot;
+       int iface;
+       int state;
+       int rq;
+       int uid;
+       int ino;
+       unsigned long long sk;
+
+       if (!(f->states & (1<<SS_CLOSE)))
+               return 0;
+
+       if ((fp = fdopen(net_packet_open(), "r")) == NULL)
+               return -1;
+       fgets(buf, sizeof(buf)-1, fp);
+
+       while (fgets(buf, sizeof(buf)-1, fp)) {
+               sscanf(buf, "%llx %*d %d %x %d %d %u %u %u",
+                      &sk,
+                      &type, &prot, &iface, &state,
+                      &rq, &uid, &ino);
+
+               if (type == SOCK_RAW && !(f->dbs&(1<<PACKET_R_DB)))
+                       continue;
+               if (type == SOCK_DGRAM && !(f->dbs&(1<<PACKET_DG_DB)))
+                       continue;
+               if (f->f) {
+                       struct tcpstat tst;
+                       tst.local.family = AF_PACKET;
+                       tst.remote.family = AF_PACKET;
+                       tst.rport = 0;
+                       tst.lport = iface;
+                       tst.local.data[0] = prot;
+                       tst.remote.data[0] = 0;
+                       if (run_ssfilter(f->f, &tst) == 0)
+                               continue;
+               }
+
+               if (netid_width)
+                       printf("%-*s ", netid_width, 
+                              type == SOCK_RAW ? "p_raw" : "p_dgr");
+               if (state_width)
+                       printf("%-*s ", state_width, "UNCONN");
+               printf("%-6d %-6d ", rq, 0);
+               if (prot == 3) {
+                       printf("%*s:", addr_width, "*");
+               } else {
+                       char tb[16];
+                       printf("%*s:", addr_width, 
+                              ll_proto_n2a(htons(prot), tb, sizeof(tb)));
+               }
+               if (iface == 0) {
+                       printf("%-*s ", serv_width, "*");
+               } else {
+                       printf("%-*s ", serv_width, xll_index_to_name(iface));
+               }
+               printf("%*s*%-*s",
+                      addr_width, "", serv_width, "");
+
+               if (show_users) {
+                       char ubuf[4096];
+                       if (find_users(ino, ubuf, sizeof(ubuf)) > 0)
+                               printf(" users:(%s)", ubuf);
+               }
+               if (show_details) {
+                       printf(" ino=%u uid=%u sk=%llx", ino, uid, sk);
+               }
+               printf("\n");
+       }
+
+       return 0;
+}
+
+int netlink_show(struct filter *f)
+{
+       FILE *fp;
+       char buf[256];
+       int prot, pid;
+       unsigned groups;
+       int rq, wq, rc;
+       unsigned long long sk, cb;
+
+       if (!(f->states & (1<<SS_CLOSE)))
+               return 0;
+
+       if ((fp = fdopen(net_netlink_open(), "r")) == NULL)
+               return -1;
+       fgets(buf, sizeof(buf)-1, fp);
+
+       while (fgets(buf, sizeof(buf)-1, fp)) {
+               sscanf(buf, "%llx %d %d %x %d %d %llx %d",
+                      &sk,
+                      &prot, &pid, &groups, &rq, &wq, &cb, &rc);
+
+               if (f->f) {
+                       struct tcpstat tst;
+                       tst.local.family = AF_NETLINK;
+                       tst.remote.family = AF_NETLINK;
+                       tst.rport = -1;
+                       tst.lport = pid;
+                       tst.local.data[0] = prot;
+                       tst.remote.data[0] = 0;
+                       if (run_ssfilter(f->f, &tst) == 0)
+                               continue;
+               }
+
+               if (netid_width)
+                       printf("%-*s ", netid_width, "nl"); 
+               if (state_width)
+                       printf("%-*s ", state_width, "UNCONN");
+               printf("%-6d %-6d ", rq, wq);
+               if (resolve_services && prot == 0)
+                       printf("%*s:", addr_width, "rtnl");
+               else if (resolve_services && prot == 3)
+                       printf("%*s:", addr_width, "fw");
+               else if (resolve_services && prot == 4)
+                       printf("%*s:", addr_width, "tcpdiag");
+               else
+                       printf("%*d:", addr_width, prot);
+               if (pid == -1) {
+                       printf("%-*s ", serv_width, "*");
+               } else if (resolve_services) {
+                       int done = 0;
+                       if (!pid) {
+                               done = 1;
+                               printf("%-*s ", serv_width, "kernel");
+                       } else if (pid > 0) {
+                               char procname[64];
+                               FILE *fp;
+                               sprintf(procname, "%s/%d/stat",
+                                       getenv("PROC_ROOT") ? : "/proc", pid);
+                               if ((fp = fopen(procname, "r")) != NULL) {
+                                       if (fscanf(fp, "%*d (%[^)])", procname) == 1) {
+                                               sprintf(procname+strlen(procname), "/%d", pid);  
+                                               printf("%-*s ", serv_width, procname);
+                                               done = 1;
+                                       }
+                                       fclose(fp);
+                               }
+                       }
+                       if (!done)
+                               printf("%-*d ", serv_width, pid);
+               } else {
+                       printf("%-*d ", serv_width, pid);
+               }
+               printf("%*s*%-*s",
+                      addr_width, "", serv_width, "");
+
+               if (show_details) {
+                       printf(" sk=%llx cb=%llx groups=0x%08x", sk, cb, groups);
+               }
+               printf("\n");
+       }
+
+       return 0;
+}
+
+struct snmpstat
+{
+       int tcp_estab;
+};
+
+int get_snmp_int(char *proto, char *key, int *result)
+{
+       char buf[1024];
+       FILE *fp;
+       int protolen = strlen(proto);
+       int keylen = strlen(key);
+
+       *result = 0;
+
+       if ((fp = fdopen(net_snmp_open(), "r")) == NULL)
+               return -1;
+
+       while (fgets(buf, sizeof(buf), fp) != NULL) {
+               char *p = buf;
+               int  pos = 0;
+               if (memcmp(buf, proto, protolen))
+                       continue;
+               while ((p = strchr(p, ' ')) != NULL) {
+                       pos++;
+                       p++;
+                       if (memcmp(p, key, keylen) == 0 &&
+                           (p[keylen] == ' ' || p[keylen] == '\n'))
+                               break;
+               }
+               if (fgets(buf, sizeof(buf), fp) == NULL)
+                       break;
+               if (memcmp(buf, proto, protolen))
+                       break;
+               p = buf;
+               while ((p = strchr(p, ' ')) != NULL) {
+                       p++;
+                       if (--pos == 0) {
+                               sscanf(p, "%d", result);
+                               fclose(fp);
+                               return 0;
+                       }
+               }
+       }
+
+       fclose(fp);
+       errno = ESRCH;
+       return -1;
+}
+
+
+/* Get stats from sockstat */
+
+struct sockstat
+{
+       int socks;
+       int tcp_mem;
+       int tcp_total;
+       int tcp_orphans;
+       int tcp_tws;
+       int tcp4_hashed;
+       int udp4;
+       int raw4;
+       int frag4;
+       int frag4_mem;
+       int tcp6_hashed;
+       int udp6;
+       int raw6;
+       int frag6;
+       int frag6_mem;
+};
+
+static void get_sockstat_line(char *line, struct sockstat *s)
+{
+       char id[256], rem[256];
+
+       if (sscanf(line, "%[^ ] %[^\n]\n", id, rem) != 2)
+               return;
+
+       if (strcmp(id, "sockets:") == 0)
+               sscanf(rem, "%*s%d", &s->socks);
+       else if (strcmp(id, "UDP:") == 0)
+               sscanf(rem, "%*s%d", &s->udp4);
+       else if (strcmp(id, "UDP6:") == 0)
+               sscanf(rem, "%*s%d", &s->udp6);
+       else if (strcmp(id, "RAW:") == 0)
+               sscanf(rem, "%*s%d", &s->raw4);
+       else if (strcmp(id, "RAW6:") == 0)
+               sscanf(rem, "%*s%d", &s->raw6);
+       else if (strcmp(id, "TCP6:") == 0)
+               sscanf(rem, "%*s%d", &s->tcp6_hashed);
+       else if (strcmp(id, "FRAG:") == 0)
+               sscanf(rem, "%*s%d%*s%d", &s->frag4, &s->frag4_mem);
+       else if (strcmp(id, "FRAG6:") == 0)
+               sscanf(rem, "%*s%d%*s%d", &s->frag6, &s->frag6_mem);
+       else if (strcmp(id, "TCP:") == 0)
+               sscanf(rem, "%*s%d%*s%d%*s%d%*s%d%*s%d",
+                      &s->tcp4_hashed,
+                      &s->tcp_orphans, &s->tcp_tws, &s->tcp_total, &s->tcp_mem);
+}
+
+int get_sockstat(struct sockstat *s)
+{
+       char buf[256];
+       FILE *fp;
+
+       memset(s, 0, sizeof(*s));
+
+       if ((fp = fdopen(net_sockstat_open(), "r")) == NULL)
+               return -1;
+       while(fgets(buf, sizeof(buf), fp) != NULL)
+               get_sockstat_line(buf, s);
+       fclose(fp);
+
+       if ((fp = fdopen(net_sockstat6_open(), "r")) == NULL)
+               return 0;
+       while(fgets(buf, sizeof(buf), fp) != NULL)
+               get_sockstat_line(buf, s);
+       fclose(fp);
+
+       return 0;
+}
+
+int print_summary(void)
+{
+       struct sockstat s;
+       struct snmpstat sn;
+
+       if (get_sockstat(&s) < 0)
+               perror("ss: get_sockstat");
+       if (get_snmp_int("Tcp:", "CurrEstab", &sn.tcp_estab) < 0)
+               perror("ss: get_snmpstat");
+
+       printf("Total: %d (kernel %d)\n", s.socks, slabstat.socks);
+
+       printf("TCP:   %d (estab %d, closed %d, orphaned %d, synrecv %d, timewait %d/%d), ports %d\n",
+              s.tcp_total + slabstat.tcp_syns + s.tcp_tws,
+              sn.tcp_estab,
+              s.tcp_total - (s.tcp4_hashed+s.tcp6_hashed-s.tcp_tws),
+              s.tcp_orphans,
+              slabstat.tcp_syns,
+              s.tcp_tws, slabstat.tcp_tws,
+              slabstat.tcp_ports
+              );
+
+       printf("\n");
+       printf("Transport Total     IP        IPv6\n");
+       printf("*         %-9d %-9s %-9s\n", slabstat.socks, "-", "-");
+       printf("RAW       %-9d %-9d %-9d\n", s.raw4+s.raw6, s.raw4, s.raw6);
+       printf("UDP       %-9d %-9d %-9d\n", s.udp4+s.udp6, s.udp4, s.udp6);
+       printf("TCP       %-9d %-9d %-9d\n", s.tcp4_hashed+s.tcp6_hashed, s.tcp4_hashed, s.tcp6_hashed);
+       printf("INET      %-9d %-9d %-9d\n", 
+              s.raw4+s.udp4+s.tcp4_hashed+
+              s.raw6+s.udp6+s.tcp6_hashed,
+              s.raw4+s.udp4+s.tcp4_hashed,
+              s.raw6+s.udp6+s.tcp6_hashed);
+       printf("FRAG      %-9d %-9d %-9d\n", s.frag4+s.frag6, s.frag4, s.frag6);
+
+       printf("\n");
+
+       return 0;
+}
+
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr,
+"Usage: ss [ OPTIONS ]\n"
+"       ss [ OPTIONS ] [ FILTER ]\n"
+"where  OPTIONS := { -h[elp] | -V[ersion] | -n[umeric] | -r[esolve] |\n"
+"                    -a[ll] -l[istening] -o[ptions] -e[xtended] -p[rocesses]\n"
+"                    -A QUERY } -s[ummary]\n"
+"                    -f[amily] { inet | inet6 | link | unix } }\n"
+"       QUERY := {all|inet|tcp|udp|raw|unix|packet|netlink}[,QUERY]\n"
+"       FILTER := [ state TCP-STATE ] [ EXPRESSION ]\n"
+);
+       exit(-1);
+}
+
+
+int scan_state(char *state)
+{
+       int i;
+       if (strcasecmp(state, "close") == 0 ||
+           strcasecmp(state, "closed") == 0)
+               return (1<<SS_CLOSE);
+       if (strcasecmp(state, "syn-rcv") == 0)
+               return (1<<SS_SYN_RECV);
+       if (matches(state, "established") == 0)
+               return (1<<SS_ESTABLISHED);
+       if (strcasecmp(state, "all") == 0)
+               return SS_ALL;
+       if (strcasecmp(state, "connected") == 0)
+               return SS_ALL & ~((1<<SS_CLOSE)|(1<<SS_LISTEN));
+       if (matches(state, "synchronized") == 0)
+               return SS_ALL & ~((1<<SS_CLOSE)|(1<<SS_LISTEN)|(1<<SS_SYN_SENT));
+       if (strcasecmp(state, "bucket") == 0)
+               return (1<<SS_SYN_RECV)|(1<<SS_TIME_WAIT);
+       if (strcasecmp(state, "big") == 0)
+               return SS_ALL & ~((1<<SS_SYN_RECV)|(1<<SS_TIME_WAIT));
+       for (i=0; i<SS_MAX; i++) {
+               if (matches(state, sstate_namel[i]) == 0)
+                       return (1<<i);
+       }
+       return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+       int do_default = 1;
+       int saw_states = 0;
+       int saw_query = 0;
+       int do_summary = 0;
+       char *dump_tcpdiag = NULL;
+       FILE *filter_fp = NULL;
+       int ch;
+
+       memset(&current_filter, 0, sizeof(current_filter));
+
+       current_filter.states = default_filter.states;
+
+       while ((ch = getopt(argc, argv, "h?aletuwxnro460spfmiA:D:F:vV")) != EOF) {
+               switch(ch) {
+               case 'n':
+                       resolve_services = 0;
+                       break;
+               case 'r':
+                       resolve_hosts = 1;
+                       break;
+               case 'o':
+                       show_options = 1;
+                       break;
+               case 'e':
+                       show_options = 1;
+                       show_details++;
+                       break;
+               case 'm':
+                       show_mem = 1;
+                       break;
+               case 'i':
+                       show_tcpinfo = 1;
+                       break;
+               case 'p':
+                       show_users++;
+                       break;
+               case 't':
+                       current_filter.dbs |= (1<<TCP_DB);
+                       do_default = 0;
+                       break;
+               case 'u':
+                       current_filter.dbs |= (1<<UDP_DB);
+                       do_default = 0;
+                       break;
+               case 'w':
+                       current_filter.dbs |= (1<<RAW_DB);
+                       do_default = 0;
+                       break;
+               case 'x':
+                       current_filter.dbs |= UNIX_DBM;
+                       do_default = 0;
+                       break;
+               case 'a':
+                       current_filter.states = SS_ALL;
+                       break;
+               case 'l':
+                       current_filter.states = (1<<SS_LISTEN);
+                       break;
+               case '4':
+                       preferred_family = AF_INET;
+                       break;
+               case '6':
+                       preferred_family = AF_INET6;
+                       break;
+               case '0':
+                       preferred_family = AF_PACKET;
+                       break;
+               case 'f':
+                       if (strcmp(optarg, "inet") == 0)
+                               preferred_family = AF_INET;
+                       else if (strcmp(optarg, "inet6") == 0)
+                               preferred_family = AF_INET6;
+                       else if (strcmp(optarg, "link") == 0)
+                               preferred_family = AF_PACKET;
+                       else if (strcmp(optarg, "unix") == 0)
+                               preferred_family = AF_UNIX;
+                       else if (strcmp(optarg, "netlink") == 0)
+                               preferred_family = AF_NETLINK;
+                       else if (strcmp(optarg, "help") == 0)
+                               usage();
+                       else {
+                               fprintf(stderr, "ss: \"%s\" is invalid family\n", optarg);
+                               usage();
+                       }
+                       break;
+               case 'A':
+               {
+                       char *p, *p1;
+                       if (!saw_query) {
+                               current_filter.dbs = 0;
+                               saw_query = 1;
+                               do_default = 0;
+                       }
+                       p = p1 = optarg;
+                       do {
+                               if ((p1 = strchr(p, ',')) != NULL)
+                                       *p1 = 0; 
+                               if (strcmp(p, "all") == 0) {
+                                       current_filter.dbs = ALL_DB;
+                               } else if (strcmp(p, "inet") == 0) {
+                                       current_filter.dbs |= (1<<TCP_DB)|(1<<UDP_DB)|(1<<RAW_DB);
+                               } else if (strcmp(p, "udp") == 0) {
+                                       current_filter.dbs |= (1<<UDP_DB);
+                               } else if (strcmp(p, "tcp") == 0) {
+                                       current_filter.dbs |= (1<<TCP_DB);
+                               } else if (strcmp(p, "raw") == 0) {
+                                       current_filter.dbs |= (1<<RAW_DB);
+                               } else if (strcmp(p, "unix") == 0) {
+                                       current_filter.dbs |= UNIX_DBM;
+                               } else if (matches(p, "unix_stream") == 0 ||
+                                          strcmp(p, "u_str") == 0) {
+                                       current_filter.dbs |= (1<<UNIX_ST_DB);
+                               } else if (matches(p, "unix_dgram") == 0 ||
+                                          strcmp(p, "u_dgr") == 0) {
+                                       current_filter.dbs |= (1<<UNIX_DG_DB);
+                               } else if (strcmp(p, "packet") == 0) {
+                                       current_filter.dbs |= PACKET_DBM;
+                               } else if (strcmp(p, "packet_raw") == 0 ||
+                                          strcmp(p, "p_raw") == 0) {
+                                       current_filter.dbs |= (1<<PACKET_R_DB);
+                               } else if (strcmp(p, "packet_dgram") == 0 ||
+                                          strcmp(p, "p_dgr") == 0) {
+                                       current_filter.dbs |= (1<<PACKET_DG_DB);
+                               } else if (strcmp(p, "netlink") == 0) {
+                                       current_filter.dbs |= (1<<NETLINK_DB);
+                               } else {
+                                       fprintf(stderr, "ss: \"%s\" is illegal socket table id\n", p);
+                                       usage();
+                               }
+                               p = p1 + 1;
+                       } while (p1);
+                       break;
+               }
+               case 's':
+                       do_summary = 1;
+                       break;
+               case 'D':
+                       dump_tcpdiag = optarg;
+                       break;
+               case 'F':
+                       if (filter_fp) {
+                               fprintf(stderr, "More than one filter file\n");
+                               exit(-1);
+                       }
+                       if (optarg[0] == '-')
+                               filter_fp = stdin;
+                       else
+                               filter_fp = fopen(optarg, "r");
+                       if (!filter_fp) {
+                               perror("fopen filter file");
+                               exit(-1);
+                       }
+                       break;
+               case 'v':
+               case 'V':
+                       printf("ss utility, iproute2-ss%s\n", SNAPSHOT);
+                       exit(0);
+               case 'h':
+               case '?':
+               default:
+                       usage();
+               }
+       }
+
+       argc -= optind;
+       argv += optind;
+
+       get_slabstat(&slabstat);
+
+       if (do_summary) {
+               print_summary();
+               if (do_default && argc == 0)
+                       exit(0);
+       }
+
+       if (do_default)
+               current_filter.dbs = default_filter.dbs;
+
+       if (preferred_family == AF_UNSPEC) {
+               if (!(current_filter.dbs&~UNIX_DBM))
+                       preferred_family = AF_UNIX;
+               else if (!(current_filter.dbs&~PACKET_DBM))
+                       preferred_family = AF_PACKET;
+               else if (!(current_filter.dbs&~(1<<NETLINK_DB)))
+                       preferred_family = AF_NETLINK;
+       }
+
+       if (preferred_family != AF_UNSPEC) {
+               int mask2;
+               if (preferred_family == AF_INET ||
+                   preferred_family == AF_INET6) {
+                       mask2= (1<<TCP_DB);
+                       if (!do_default)
+                               mask2 = (1<<UDP_DB)|(1<<RAW_DB);
+               } else if (preferred_family == AF_PACKET) {
+                       mask2 = PACKET_DBM;
+               } else if (preferred_family == AF_UNIX) {
+                       mask2 = UNIX_DBM;
+               } else if (preferred_family == AF_NETLINK) {
+                       mask2 = (1<<NETLINK_DB);
+               } else {
+                       mask2 = 0;
+               }
+
+               if (do_default)
+                       current_filter.dbs = mask2;
+               else
+                       current_filter.dbs &= mask2;
+               current_filter.families = (1<<preferred_family);
+       } else {
+               if (!do_default)
+                       current_filter.families = ~0;
+               else
+                       current_filter.families = default_filter.families;
+       }
+       if (current_filter.dbs == 0) {
+               fprintf(stderr, "ss: no socket tables to show with such filter.\n");
+               exit(0);
+       }
+       if (current_filter.families == 0) {
+               fprintf(stderr, "ss: no families to show with such filter.\n");
+               exit(0);
+       }
+
+       if (resolve_services && resolve_hosts &&
+           (current_filter.dbs&(UNIX_DBM|(1<<TCP_DB)|(1<<UDP_DB))))
+               init_service_resolver();
+
+       /* Now parse filter... */
+       if (argc == 0 && filter_fp) {
+               if (ssfilter_parse(&current_filter.f, 0, NULL, filter_fp))
+                       usage();
+       }
+
+       while (argc > 0) {
+               if (strcmp(*argv, "state") == 0) {
+                       NEXT_ARG();
+                       if (!saw_states)
+                               current_filter.states = 0;
+                       current_filter.states |= scan_state(*argv);
+                       saw_states = 1;
+               } else if (strcmp(*argv, "exclude") == 0 ||
+                          strcmp(*argv, "excl") == 0) {
+                       NEXT_ARG();
+                       if (!saw_states)
+                               current_filter.states = SS_ALL;
+                       current_filter.states &= ~scan_state(*argv);
+                       saw_states = 1;
+               } else {
+                       if (ssfilter_parse(&current_filter.f, argc, argv, filter_fp))
+                               usage();
+                       break;
+               }
+               argc--; argv++;
+       }
+
+       if (current_filter.states == 0) {
+               fprintf(stderr, "ss: no socket states to show with such filter.\n");
+               exit(0);
+       }
+
+       if (dump_tcpdiag) {
+               FILE *dump_fp = stdout;
+               if (!(current_filter.dbs & (1<<TCP_DB))) {
+                       fprintf(stderr, "ss: tcpdiag dump requested and no tcp in filter.\n");
+                       exit(0);
+               }
+               if (dump_tcpdiag[0] != '-') {
+                       dump_fp = fopen(dump_tcpdiag, "w");
+                       if (!dump_tcpdiag) {
+                               perror("fopen dump file");
+                               exit(-1);
+                       }
+               }
+               tcp_show_netlink(&current_filter, dump_fp);
+               fflush(dump_fp);
+               exit(0);
+       }
+
+       netid_width = 0;
+       if (current_filter.dbs&(current_filter.dbs-1))
+               netid_width = 5;
+
+       state_width = 0;
+       if (current_filter.states&(current_filter.states-1))
+               state_width = 10;
+
+       screen_width = 80;
+       if (isatty(STDOUT_FILENO)) {
+               struct winsize w;
+
+               if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) {
+                       if (w.ws_col > 0)
+                               screen_width = w.ws_col;
+               }
+       }
+
+       addrp_width = screen_width;
+       addrp_width -= netid_width+1;
+       addrp_width -= state_width+1;
+       addrp_width -= 14;
+
+       if (addrp_width&1) {
+               if (netid_width)
+                       netid_width++;
+               else if (state_width)
+                       state_width++;
+       }
+
+       addrp_width /= 2;
+       addrp_width--;
+
+       serv_width = resolve_services ? 7 : 5;
+
+       if (addrp_width < 15+serv_width+1)
+               addrp_width = 15+serv_width+1;
+
+       addr_width = addrp_width - serv_width - 1; 
+
+       if (netid_width)
+               printf("%-*s ", netid_width, "Netid");
+       if (state_width)
+               printf("%-*s ", state_width, "State");
+       printf("%-6s %-6s ", "Recv-Q", "Send-Q");
+
+       printf("%*s:%-*s %*s:%-*s\n",
+              addr_width, "Local Address", serv_width, "Port",
+              addr_width, "Peer Address", serv_width, "Port");
+
+//printf("%08x %08x %08x\n", current_filter.dbs, current_filter.states, current_filter.families);
+       fflush(stdout);
+
+       if (current_filter.dbs & (1<<NETLINK_DB))
+               netlink_show(&current_filter);
+       if (current_filter.dbs & PACKET_DBM)
+               packet_show(&current_filter);
+       if (current_filter.dbs & UNIX_DBM)
+               unix_show(&current_filter);
+       if (current_filter.dbs & (1<<RAW_DB))
+               raw_show(&current_filter);
+       if (current_filter.dbs & (1<<UDP_DB))
+               udp_show(&current_filter);
+       if (current_filter.dbs & (1<<TCP_DB))
+               tcp_show(&current_filter);
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..00b92e3dcc6a7ffd31dfa46fd7575a8879359ed0 100644 (file)
@@ -0,0 +1,21 @@
+#define SSF_DCOND 0
+#define SSF_SCOND 1
+#define SSF_OR   2
+#define SSF_AND          3
+#define SSF_NOT          4
+#define SSF_D_GE  5
+#define SSF_D_LE  6
+#define SSF_S_GE  7
+#define SSF_S_LE  8
+#define SSF_S_AUTO  9
+
+struct ssfilter
+{
+       int type;
+       struct ssfilter *post;
+       struct ssfilter *pred;
+};
+
+int ssfilter_parse(struct ssfilter **f, int argc, char **argv, FILE *fp);
+void *parse_hostcond(char*);
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f47ab2fdcacd01cfd84d146887c41498832fb842 100644 (file)
@@ -0,0 +1,274 @@
+%{
+
+#include <stdio.h>
+#include <malloc.h>
+#include <string.h>
+#include "ssfilter.h"
+
+typedef struct ssfilter * ssfilter_t;
+
+#define YYSTYPE ssfilter_t
+
+static struct ssfilter * alloc_node(int type, void *pred)
+{
+       struct ssfilter *n = malloc(sizeof(*n));
+       if (n == NULL)
+               abort();
+       n->type = type;
+       n->pred = pred;
+       n->post = NULL;
+       return n;
+}
+
+static char            **yy_argv;
+static int             yy_argc;
+static FILE            *yy_fp;
+static ssfilter_t      *yy_ret;
+
+static int yylex(void);
+
+static void yyerror(char *s)
+{
+       fprintf(stderr, "ss: bison bellows (while parsing filter): \"%s!\"", s);
+}
+
+%}
+
+%token HOSTCOND DCOND SCOND DPORT SPORT LEQ GEQ NEQ AUTOBOUND
+%left '|'
+%left '&'
+%nonassoc '!'
+
+%%
+applet: null expr
+        {
+                *yy_ret = $2;
+                $$ = $2;
+        }
+        | null
+        ;
+null:   /* NOTHING */ { $$ = NULL; }
+        ;
+expr:  DCOND HOSTCOND
+        {
+               $$ = alloc_node(SSF_DCOND, $2);
+        }
+        | SCOND HOSTCOND
+        { 
+               $$ = alloc_node(SSF_SCOND, $2);
+        }
+        | DPORT GEQ HOSTCOND
+        {
+                $$ = alloc_node(SSF_D_GE, $3);
+        }
+        | DPORT LEQ HOSTCOND
+        {
+                $$ = alloc_node(SSF_D_LE, $3);
+        }
+        | DPORT '>' HOSTCOND
+        {
+                $$ = alloc_node(SSF_NOT, alloc_node(SSF_D_LE, $3));
+        }
+        | DPORT '<' HOSTCOND
+        {
+                $$ = alloc_node(SSF_NOT, alloc_node(SSF_D_GE, $3));
+        }
+        | DPORT '=' HOSTCOND
+        {
+               $$ = alloc_node(SSF_DCOND, $3);
+        }
+        | DPORT NEQ HOSTCOND
+        {
+               $$ = alloc_node(SSF_NOT, alloc_node(SSF_DCOND, $3));
+        }
+
+        | SPORT GEQ HOSTCOND
+        {
+                $$ = alloc_node(SSF_S_GE, $3);
+        }
+        | SPORT LEQ HOSTCOND
+        {
+                $$ = alloc_node(SSF_S_LE, $3);
+        }
+        | SPORT '>' HOSTCOND
+        {
+                $$ = alloc_node(SSF_NOT, alloc_node(SSF_S_LE, $3));
+        }
+        | SPORT '<' HOSTCOND
+        {
+                $$ = alloc_node(SSF_NOT, alloc_node(SSF_S_GE, $3));
+        }
+        | SPORT '=' HOSTCOND
+        {
+               $$ = alloc_node(SSF_SCOND, $3);
+        }
+        | SPORT NEQ HOSTCOND
+        {
+               $$ = alloc_node(SSF_NOT, alloc_node(SSF_SCOND, $3));
+        }
+
+        | AUTOBOUND
+        {
+                $$ = alloc_node(SSF_S_AUTO, NULL);
+        }
+        | expr '|' expr
+        {
+                $$ = alloc_node(SSF_OR, $1);
+               $$->post = $3;
+        }
+        | expr expr
+        {
+                $$ = alloc_node(SSF_AND, $1);
+               $$->post = $2;
+        }
+        | expr '&' expr
+
+        {
+                $$ = alloc_node(SSF_AND, $1);
+               $$->post = $3;
+        }
+        | '!' expr
+        {
+                $$ = alloc_node(SSF_NOT, $2);
+        }
+        | '(' expr ')'
+        {
+                $$ = $2;
+        }
+;
+%%
+
+static char *get_token_from_line(char **ptr)
+{
+       char *tok, *cp = *ptr;
+
+       while (*cp == ' ' || *cp == '\t') cp++;
+
+       if (*cp == 0) {
+               *ptr = cp;
+               return NULL;
+       }
+
+       tok = cp;
+
+       while (*cp != 0 && *cp != ' ' && *cp != '\t') {
+               /* Backslash escapes everything. */
+               if (*cp == '\\') {
+                       char *tp;
+                       for (tp = cp; tp != tok; tp--)
+                               *tp = *(tp-1);
+                       cp++;
+                       tok++;
+                       if (*cp == 0)
+                               break;
+               }
+               cp++;
+       }
+       if (*cp)
+               *cp++ = 0;
+       *ptr = cp;
+       return tok;
+}
+
+int yylex(void)
+{
+       static char argbuf[1024];
+       static char *tokptr = argbuf;
+       static int argc;
+       char *curtok;
+
+       do {
+               while (*tokptr == 0) {
+                       tokptr = NULL;
+                       if (argc < yy_argc) {
+                               tokptr = yy_argv[argc];
+                               argc++;
+                       } else if (yy_fp) {
+                               while (tokptr == NULL) {
+                                       if (fgets(argbuf, sizeof(argbuf)-1, yy_fp) == NULL)
+                                               return 0;
+                                       argbuf[sizeof(argbuf)-1] = 0;
+                                       if (strlen(argbuf) == sizeof(argbuf) - 1) {
+                                               fprintf(stderr, "Too long line in filter");
+                                               exit(-1);
+                                       }
+                                       if (argbuf[strlen(argbuf)-1] == '\n')
+                                               argbuf[strlen(argbuf)-1] = 0;
+                                       if (argbuf[0] == '#' || argbuf[0] == '0')
+                                               continue;
+                                       tokptr = argbuf;
+                               }
+                       } else {
+                               return 0;
+                       }
+               }
+       } while ((curtok = get_token_from_line(&tokptr)) == NULL);
+
+       if (strcmp(curtok, "!") == 0 ||
+           strcmp(curtok, "not") == 0)
+               return '!';
+       if (strcmp(curtok, "&") == 0 ||
+           strcmp(curtok, "&&") == 0 ||
+           strcmp(curtok, "and") == 0)
+               return '&';
+       if (strcmp(curtok, "|") == 0 ||
+           strcmp(curtok, "||") == 0 ||
+           strcmp(curtok, "or") == 0)
+               return '|';
+       if (strcmp(curtok, "(") == 0)
+               return '(';
+       if (strcmp(curtok, ")") == 0)
+               return ')';
+       if (strcmp(curtok, "dst") == 0)
+               return DCOND;
+       if (strcmp(curtok, "src") == 0)
+               return SCOND;
+       if (strcmp(curtok, "dport") == 0)
+               return DPORT;
+       if (strcmp(curtok, "sport") == 0)
+               return SPORT;
+       if (strcmp(curtok, ">=") == 0 ||
+           strcmp(curtok, "ge") == 0 ||
+           strcmp(curtok, "geq") == 0)
+               return GEQ;
+       if (strcmp(curtok, "<=") == 0 ||
+           strcmp(curtok, "le") == 0 ||
+           strcmp(curtok, "leq") == 0)
+               return LEQ;
+       if (strcmp(curtok, "!=") == 0 ||
+           strcmp(curtok, "ne") == 0 ||
+           strcmp(curtok, "neq") == 0)
+               return NEQ;
+       if (strcmp(curtok, "=") == 0 ||
+           strcmp(curtok, "==") == 0 ||
+           strcmp(curtok, "eq") == 0)
+               return '=';
+       if (strcmp(curtok, ">") == 0 ||
+           strcmp(curtok, "gt") == 0)
+               return '>';
+       if (strcmp(curtok, "<") == 0 ||
+           strcmp(curtok, "lt") == 0)
+               return '<';
+       if (strcmp(curtok, "autobound") == 0)
+               return AUTOBOUND;
+       yylval = (void*)parse_hostcond(curtok);
+       if (yylval == NULL) {
+               fprintf(stderr, "Cannot parse dst/src address.\n");
+               exit(1);
+       }
+       return HOSTCOND;
+}
+
+int ssfilter_parse(struct ssfilter **f, int argc, char **argv, FILE *fp)
+{
+       yy_argc = argc;
+       yy_argv = argv;
+       yy_fp   = fp;
+       yy_ret  = f;
+
+       if (yyparse()) {
+               fprintf(stderr, " Sorry.\n");
+               return -1;
+       }
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ec1d3399193ddf17e47ec385edd5d074cb892d83 100644 (file)
@@ -0,0 +1,54 @@
+TCOBJ=tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o m_police.o m_estimator.o
+
+include ../Config
+
+TCMODULES :=
+TCMODULES += q_fifo.o
+TCMODULES += q_sfq.o
+TCMODULES += q_red.o
+TCMODULES += q_prio.o
+TCMODULES += q_tbf.o
+TCMODULES += q_cbq.o
+TCMODULES += f_rsvp.o
+TCMODULES += f_u32.o
+TCMODULES += f_route.o
+TCMODULES += f_fw.o
+ifeq ($(TC_CONFIG_DIFFSERV),y)
+  TCMODULES += q_dsmark.o
+  TCMODULES += q_gred.o
+  TCMODULES += f_tcindex.o
+  TCMODULES += q_ingress.o
+endif
+ifeq ($(TC_CONFIG_ATM),y)
+  TCMODULES += q_atm.o
+  LDLIBS += -latm
+endif
+
+#TCMODULES += q_csz.o
+#TCMODULES += q_hpfq.o
+#TCMODULES += q_hfsc.o
+
+TCOBJ += $(TCMODULES)
+
+TCLIB := tc_core.o
+TCLIB += tc_red.o
+TCLIB += tc_cbq.o
+TCLIB += tc_estimator.o
+
+LDLIBS += -L. -ltc -lm -ldl
+LDFLAGS += -Wl,-export-dynamic
+
+all: libtc.a tc
+
+tc: $(TCOBJ) $(LIBNETLINK) $(LIBUTIL) $(TCLIB)
+
+libtc.a: $(TCLIB)
+       $(AR) rcs $@ $(TCLIB)
+
+install: all
+       install -m 0755 -s tc $(DESTDIR)$(SBINDIR)
+
+
+clean:
+       rm -f $(TCOBJ) $(TCLIB) libtc.a tc
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9400438a1d86884ad4ae350771221ef67a8c1b4a 100644 (file)
@@ -0,0 +1,47 @@
+Kernel code and interface.
+--------------------------
+
+* Compile time switches
+
+There is only one, but very important, compile time switch.
+It is not settable by "make config", but should be selected
+manually and after a bit of thinking in <include/net/pkt_sched.h>
+
+PSCHED_CLOCK_SOURCE can take three values:
+
+       PSCHED_GETTIMEOFDAY
+       PSCHED_JIFFIES
+       PSCHED_CPU
+
+
+ PSCHED_GETTIMEOFDAY
+
+Default setting is the most conservative PSCHED_GETTIMEOFDAY.
+It is very slow both because of weird slowness of do_gettimeofday()
+and because it forces code to use unnatural "timeval" format,
+where microseconds and seconds fields are separate.
+Besides that, it will misbehave, when delays exceed 2 seconds
+(f.e. very slow links or classes bounded to small slice of bandwidth)
+To resume: as only you will get it working, select correct clock
+source and forget about PSCHED_GETTIMEOFDAY forever.
+
+
+ PSCHED_JIFFIES
+
+Clock is derived from jiffies. On architectures with HZ=100
+granularity of this clock is not enough to make reasonable
+bindings to real time. However, taking into account Linux
+architecture problems, which force us to use artificial
+integrated clock in any case, this switch is not so bad
+for schduling even on high speed networks, though policing
+is not reliable.
+
+
+ PSCHED_CPU
+
+It is available only for alpha and pentiums with correct
+CPU timestamp. It is the fastest way, use it when it is available,
+but remember: not all pentiums have this facility, and
+a lot of them have clock, broken by APM etc. etc.
+
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3c5e3e2fafaf66683598f08e59a2c5e06b7460b4 100644 (file)
--- a/tc/f_fw.c
+++ b/tc/f_fw.c
@@ -0,0 +1,116 @@
+/*
+ * f_fw.c              FW filter.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... fw [ classid CLASSID ] [ police POLICE_SPEC ]\n");
+       fprintf(stderr, "       POLICE_SPEC := ... look at TBF\n");
+       fprintf(stderr, "       CLASSID := X:Y\n");
+}
+
+#define usage() return(-1)
+
+static int fw_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n)
+{
+       struct tc_police tp;
+       struct tcmsg *t = NLMSG_DATA(n);
+       struct rtattr *tail;
+
+       memset(&tp, 0, sizeof(tp));
+
+       if (handle) {
+               if (get_u32(&t->tcm_handle, handle, 0)) {
+                       fprintf(stderr, "Illegal \"handle\"\n");
+                       return -1;
+               }
+       }
+
+       if (argc == 0)
+               return 0;
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, 4096, TCA_OPTIONS, NULL, 0);
+
+       while (argc > 0) {
+               if (matches(*argv, "classid") == 0 ||
+                   matches(*argv, "flowid") == 0) {
+                       unsigned handle;
+                       NEXT_ARG();
+                       if (get_tc_classid(&handle, *argv)) {
+                               fprintf(stderr, "Illegal \"classid\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_FW_CLASSID, &handle, 4);
+               } else if (matches(*argv, "police") == 0) {
+                       NEXT_ARG();
+                       if (parse_police(&argc, &argv, TCA_FW_POLICE, n)) {
+                               fprintf(stderr, "Illegal \"police\"\n");
+                               return -1;
+                       }
+                       continue;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+       tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
+       return 0;
+}
+
+static int fw_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, __u32 handle)
+{
+       struct rtattr *tb[TCA_FW_MAX+1];
+
+       if (opt == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       if (opt)
+               parse_rtattr(tb, TCA_FW_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (handle)
+               fprintf(f, "handle 0x%x ", handle);
+
+       if (tb[TCA_FW_CLASSID]) {
+               SPRINT_BUF(b1);
+               fprintf(f, "classid %s ", sprint_tc_classid(*(__u32*)RTA_DATA(tb[TCA_FW_CLASSID]), b1));
+       }
+
+       if (tb[TCA_FW_POLICE])
+               tc_print_police(f, tb[TCA_FW_POLICE]);
+       return 0;
+}
+
+struct filter_util fw_util = {
+       NULL,
+       "fw",
+       fw_parse_opt,
+       fw_print_opt,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f13c28b5d3dcbb2c922218eefe96d0da7608dd99 100644 (file)
@@ -0,0 +1,175 @@
+/*
+ * f_route.c           ROUTE filter.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "rt_names.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... route [ from REALM | fromif TAG ] [ to REALM ]\n");
+       fprintf(stderr, "                [ flowid CLASSID ] [ police POLICE_SPEC ]\n");
+       fprintf(stderr, "       POLICE_SPEC := ... look at TBF\n");
+       fprintf(stderr, "       CLASSID := X:Y\n");
+}
+
+#define usage() return(-1)
+
+static int route_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n)
+{
+       struct tc_police tp;
+       struct tcmsg *t = NLMSG_DATA(n);
+       struct rtattr *tail;
+       __u32 fh = 0xFFFF8000;
+       __u32 order = 0;
+
+       memset(&tp, 0, sizeof(tp));
+
+       if (handle) {
+               if (get_u32(&t->tcm_handle, handle, 0)) {
+                       fprintf(stderr, "Illegal \"handle\"\n");
+                       return -1;
+               }
+       }
+
+       if (argc == 0)
+               return 0;
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, 4096, TCA_OPTIONS, NULL, 0);
+
+       while (argc > 0) {
+               if (matches(*argv, "to") == 0) {
+                       __u32 id;
+                       NEXT_ARG();
+                       if (rtnl_rtrealm_a2n(&id, *argv)) {
+                               fprintf(stderr, "Illegal \"to\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_ROUTE4_TO, &id, 4);
+                       fh &= ~0x80FF;
+                       fh |= id&0xFF;
+               } else if (matches(*argv, "from") == 0) {
+                       __u32 id;
+                       NEXT_ARG();
+                       if (rtnl_rtrealm_a2n(&id, *argv)) {
+                               fprintf(stderr, "Illegal \"from\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_ROUTE4_FROM, &id, 4);
+                       fh &= 0xFFFF;
+                       fh |= id<<16;
+               } else if (matches(*argv, "fromif") == 0) {
+                       struct rtnl_handle rth;
+                       __u32 id;
+                       NEXT_ARG();
+                       if (rtnl_open(&rth, 0) == 0) {
+                               ll_init_map(&rth);
+                               rtnl_close(&rth);
+                       }
+                       if ((id=ll_name_to_index(*argv)) <= 0) {
+                               fprintf(stderr, "Illegal \"fromif\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_ROUTE4_IIF, &id, 4);
+                       fh &= 0xFFFF;
+                       fh |= (0x8000|id)<<16;
+               } else if (matches(*argv, "classid") == 0 ||
+                          strcmp(*argv, "flowid") == 0) {
+                       unsigned handle;
+                       NEXT_ARG();
+                       if (get_tc_classid(&handle, *argv)) {
+                               fprintf(stderr, "Illegal \"classid\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_ROUTE4_CLASSID, &handle, 4);
+               } else if (matches(*argv, "police") == 0) {
+                       NEXT_ARG();
+                       if (parse_police(&argc, &argv, TCA_ROUTE4_POLICE, n)) {
+                               fprintf(stderr, "Illegal \"police\"\n");
+                               return -1;
+                       }
+                       continue;
+               } else if (matches(*argv, "order") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&order, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"order\"\n");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+       tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
+       if (order) {
+               fh &= ~0x7F00;
+               fh |= (order<<8)&0x7F00;
+       }
+       if (!t->tcm_handle)
+               t->tcm_handle = fh;
+       return 0;
+}
+
+static int route_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, __u32 handle)
+{
+       struct rtattr *tb[TCA_ROUTE4_MAX+1];
+       SPRINT_BUF(b1);
+
+       if (opt == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       if (opt)
+               parse_rtattr(tb, TCA_ROUTE4_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (handle)
+               fprintf(f, "fh 0x%08x ", handle);
+       if (handle&0x7F00)
+               fprintf(f, "order %d ", (handle>>8)&0x7F);
+
+       if (tb[TCA_ROUTE4_CLASSID]) {
+               SPRINT_BUF(b1);
+               fprintf(f, "flowid %s ", sprint_tc_classid(*(__u32*)RTA_DATA(tb[TCA_ROUTE4_CLASSID]), b1));
+       }
+       if (tb[TCA_ROUTE4_TO])
+               fprintf(f, "to %s ", rtnl_rtrealm_n2a(*(__u32*)RTA_DATA(tb[TCA_ROUTE4_TO]), b1, sizeof(b1)));
+       if (tb[TCA_ROUTE4_FROM])
+               fprintf(f, "from %s ", rtnl_rtrealm_n2a(*(__u32*)RTA_DATA(tb[TCA_ROUTE4_FROM]), b1, sizeof(b1)));
+       if (tb[TCA_ROUTE4_IIF])
+               fprintf(f, "fromif %s", ll_index_to_name(*(int*)RTA_DATA(tb[TCA_ROUTE4_IIF])));
+       if (tb[TCA_ROUTE4_POLICE])
+               tc_print_police(f, tb[TCA_ROUTE4_POLICE]);
+       return 0;
+}
+
+struct filter_util route_util = {
+       NULL,
+       "route",
+       route_parse_opt,
+       route_print_opt,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3d9b5283be8c41df2c75d1e1cb72031050cd662e 100644 (file)
@@ -0,0 +1,408 @@
+/*
+ * q_rsvp.c            RSVP filter.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... rsvp ipproto PROTOCOL session DST[/PORT | GPI ]\n");
+       fprintf(stderr, "                [ sender SRC[/PORT | GPI ]\n");
+       fprintf(stderr, "                [ classid CLASSID ] [ police POLICE_SPEC ]\n");
+       fprintf(stderr, "                [ tunnelid ID ] [ tunnel ID skip NUMBER ]\n");
+       fprintf(stderr, "Where: GPI := { flowlabel NUMBER | spi/ah SPI | spi/esp SPI |\n");
+       fprintf(stderr, "                u{8|16|32} NUMBER mask MASK at OFFSET}\n");
+       fprintf(stderr, "       POLICE_SPEC := ... look at TBF\n");
+       fprintf(stderr, "       FILTERID := X:Y\n");
+}
+
+#define usage() return(-1)
+
+int get_addr_and_pi(int *argc_p, char ***argv_p, inet_prefix * addr,
+                   struct tc_rsvp_pinfo *pinfo, int dir, int family)
+{
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       char *p = strchr(*argv, '/');
+       struct tc_rsvp_gpi *pi = dir ? &pinfo->dpi : &pinfo->spi;
+
+       if (p) {
+               __u16 tmp;
+
+               if (get_u16(&tmp, p+1, 0))
+                       return -1;
+
+               if (dir == 0) {
+                       /* Source port: u16 at offset 0 */
+                       pi->key = htonl(((__u32)tmp)<<16);
+                       pi->mask = htonl(0xFFFF0000);
+               } else {
+                       /* Destination port: u16 at offset 2 */
+                       pi->key = htonl(((__u32)tmp));
+                       pi->mask = htonl(0x0000FFFF);
+               }
+               pi->offset = 0;
+               *p = 0;
+       }
+       if (get_addr_1(addr, *argv, family))
+               return -1;
+       if (p)
+               *p = '/';
+
+       argc--; argv++;
+
+       if (pi->mask || argc <= 0)
+               goto done;
+
+       if (strcmp(*argv, "spi/ah") == 0 ||
+           strcmp(*argv, "gpi/ah") == 0) {
+               __u32 gpi;
+               NEXT_ARG();
+               if (get_u32(&gpi, *argv, 0))
+                       return -1;
+               pi->mask = htonl(0xFFFFFFFF);
+               pi->key = htonl(gpi);
+               pi->offset = 4;
+               if (pinfo->protocol == 0)
+                       pinfo->protocol = IPPROTO_AH;
+               argc--; argv++;
+       } else if (strcmp(*argv, "spi/esp") == 0 ||
+                  strcmp(*argv, "gpi/esp") == 0) {
+               __u32 gpi;
+               NEXT_ARG();
+               if (get_u32(&gpi, *argv, 0))
+                       return -1;
+               pi->mask = htonl(0xFFFFFFFF);
+               pi->key = htonl(gpi);
+               pi->offset = 0;
+               if (pinfo->protocol == 0)
+                       pinfo->protocol = IPPROTO_ESP;
+               argc--; argv++;
+       } else if (strcmp(*argv, "flowlabel") == 0) {
+               __u32 flabel;
+               NEXT_ARG();
+               if (get_u32(&flabel, *argv, 0))
+                       return -1;
+               if (family != AF_INET6)
+                       return -1;
+               pi->mask = htonl(0x000FFFFF);
+               pi->key = htonl(flabel) & pi->mask;
+               pi->offset = -40;
+               argc--; argv++;
+       } else if (strcmp(*argv, "u32") == 0 ||
+                  strcmp(*argv, "u16") == 0 ||
+                  strcmp(*argv, "u8") == 0) {
+               int sz = 1;
+               __u32 tmp;
+               __u32 mask = 0xff;
+               if (strcmp(*argv, "u32") == 0) {
+                       sz = 4;
+                       mask = 0xffff;
+               } else if (strcmp(*argv, "u16") == 0) {
+                       mask = 0xffffffff;
+                       sz = 2;
+               }
+               NEXT_ARG();
+               if (get_u32(&tmp, *argv, 0))
+                       return -1;
+               argc--; argv++;
+               if (strcmp(*argv, "mask") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&mask, *argv, 16))
+                               return -1;
+                       argc--; argv++;
+               }
+               if (strcmp(*argv, "at") == 0) {
+                       NEXT_ARG();
+                       if (get_integer(&pi->offset, *argv, 0))
+                               return -1;
+                       argc--; argv++;
+               }
+               if (sz == 1) {
+                       if ((pi->offset & 3) == 0) {
+                               mask <<= 24;
+                               tmp <<= 24;
+                       } else if ((pi->offset & 3) == 1) {
+                               mask <<= 16;
+                               tmp <<= 16;
+                       } else if ((pi->offset & 3) == 3) {
+                               mask <<= 8;
+                               tmp <<= 8;
+                       }
+               } else if (sz == 2) {
+                       if ((pi->offset & 3) == 0) {
+                               mask <<= 16;
+                               tmp <<= 16;
+                       }
+               }
+               pi->offset &= ~3;
+               pi->mask = htonl(mask);
+               pi->key = htonl(tmp) & pi->mask;
+       }
+
+done:
+       *argc_p = argc;
+       *argv_p = argv;
+       return 0;
+}
+
+
+static int rsvp_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n)
+{
+       int family = strcmp(qu->id, "rsvp") == 0 ? AF_INET : AF_INET6;
+       struct tc_rsvp_pinfo pinfo;
+       struct tc_police tp;
+       struct tcmsg *t = NLMSG_DATA(n);
+       int pinfo_ok = 0;
+       struct rtattr *tail;
+
+       memset(&pinfo, 0, sizeof(pinfo));
+       memset(&tp, 0, sizeof(tp));
+
+       if (handle) {
+               if (get_u32(&t->tcm_handle, handle, 0)) {
+                       fprintf(stderr, "Illegal \"handle\"\n");
+                       return -1;
+               }
+       }
+
+       if (argc == 0)
+               return 0;
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, 4096, TCA_OPTIONS, NULL, 0);
+
+       while (argc > 0) {
+               if (matches(*argv, "session") == 0) {
+                       inet_prefix addr;
+                       NEXT_ARG();
+                       if (get_addr_and_pi(&argc, &argv, &addr, &pinfo, 1, family)) {
+                               fprintf(stderr, "Illegal \"session\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_RSVP_DST, &addr.data, addr.bytelen);
+                       if (pinfo.dpi.mask || pinfo.protocol)
+                               pinfo_ok++;
+                       continue;
+               } else if (matches(*argv, "sender") == 0 ||
+                          matches(*argv, "flowspec") == 0) {
+                       inet_prefix addr;
+                       NEXT_ARG();
+                       if (get_addr_and_pi(&argc, &argv, &addr, &pinfo, 0, family)) {
+                               fprintf(stderr, "Illegal \"sender\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_RSVP_SRC, &addr.data, addr.bytelen);
+                       if (pinfo.spi.mask || pinfo.protocol)
+                               pinfo_ok++;
+                       continue;
+               } else if (matches("ipproto", *argv) == 0) {
+                       int num;
+                       NEXT_ARG();
+                       num = inet_proto_a2n(*argv);
+                       if (num < 0) {
+                               fprintf(stderr, "Illegal \"ipproto\"\n");
+                               return -1;
+                       }
+                       pinfo.protocol = num;
+                       pinfo_ok++;
+               } else if (matches(*argv, "classid") == 0 ||
+                          strcmp(*argv, "flowid") == 0) {
+                       unsigned handle;
+                       NEXT_ARG();
+                       if (get_tc_classid(&handle, *argv)) {
+                               fprintf(stderr, "Illegal \"classid\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_RSVP_CLASSID, &handle, 4);
+               } else if (strcmp(*argv, "tunnelid") == 0) {
+                       unsigned tid;
+                       NEXT_ARG();
+                       if (get_unsigned(&tid, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"tunnelid\"\n");
+                               return -1;
+                       }
+                       pinfo.tunnelid = tid;
+                       pinfo_ok++;
+               } else if (strcmp(*argv, "tunnel") == 0) {
+                       unsigned tid;
+                       NEXT_ARG();
+                       if (get_unsigned(&tid, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"tunnel\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_RSVP_CLASSID, &tid, 4);
+                       NEXT_ARG();
+                       if (strcmp(*argv, "skip") == 0) {
+                               NEXT_ARG();
+                       }
+                       if (get_unsigned(&tid, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"skip\"\n");
+                               return -1;
+                       }
+                       pinfo.tunnelhdr = tid;
+                       pinfo_ok++;
+               } else if (matches(*argv, "police") == 0) {
+                       NEXT_ARG();
+                       if (parse_police(&argc, &argv, TCA_RSVP_POLICE, n)) {
+                               fprintf(stderr, "Illegal \"police\"\n");
+                               return -1;
+                       }
+                       continue;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       if (pinfo_ok)
+               addattr_l(n, 4096, TCA_RSVP_PINFO, &pinfo, sizeof(pinfo));
+       tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
+       return 0;
+}
+
+static char * sprint_spi(struct tc_rsvp_gpi *pi, int dir, char *buf)
+{
+       if (pi->offset == 0) {
+               if (dir && pi->mask == htonl(0xFFFF)) {
+                       snprintf(buf, SPRINT_BSIZE-1, "/%d", htonl(pi->key));
+                       return buf;
+               }
+               if (!dir && pi->mask == htonl(0xFFFF0000)) {
+                       snprintf(buf, SPRINT_BSIZE-1, "/%d", htonl(pi->key)>>16);
+                       return buf;
+               }
+               if (pi->mask == htonl(0xFFFFFFFF)) {
+                       snprintf(buf, SPRINT_BSIZE-1, " spi/esp 0x%08x", htonl(pi->key));
+                       return buf;
+               }
+       } else if (pi->offset == 4 && pi->mask == htonl(0xFFFFFFFF)) {
+               snprintf(buf, SPRINT_BSIZE-1, " spi/ah 0x%08x", htonl(pi->key));
+               return buf;
+       } else if (pi->offset == -40 && pi->mask == htonl(0x000FFFFF)) {
+               snprintf(buf, SPRINT_BSIZE-1, " flowlabel 0x%05x", htonl(pi->key));
+               return buf;
+       }
+       snprintf(buf, SPRINT_BSIZE-1, " u32 0x%08x mask %08x at %d",
+                htonl(pi->key), htonl(pi->mask), pi->offset);
+       return buf;
+}
+
+static int rsvp_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, __u32 handle)
+{
+       int family = strcmp(qu->id, "rsvp") == 0 ? AF_INET : AF_INET6;
+       struct rtattr *tb[TCA_RSVP_MAX+1];
+       struct tc_rsvp_pinfo *pinfo = NULL;
+
+       if (opt == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       if (opt)
+               parse_rtattr(tb, TCA_RSVP_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (handle)
+               fprintf(f, "fh 0x%08x ", handle);
+
+       if (tb[TCA_RSVP_PINFO]) {
+               if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO])  < sizeof(*pinfo))
+                       return -1;
+
+               pinfo = RTA_DATA(tb[TCA_RSVP_PINFO]);
+       }
+
+       if (tb[TCA_RSVP_CLASSID]) {
+               SPRINT_BUF(b1);
+               if (!pinfo || pinfo->tunnelhdr == 0)
+                       fprintf(f, "flowid %s ", sprint_tc_classid(*(__u32*)RTA_DATA(tb[TCA_RSVP_CLASSID]), b1));
+               else
+                       fprintf(f, "tunnel %d skip %d ", *(__u32*)RTA_DATA(tb[TCA_RSVP_CLASSID]), pinfo->tunnelhdr);
+       } else if (pinfo && pinfo->tunnelhdr)
+               fprintf(f, "tunnel [BAD] skip %d ", pinfo->tunnelhdr);
+
+       if (tb[TCA_RSVP_DST]) {
+               char buf[128];
+               fprintf(f, "session ");
+               if (inet_ntop(family, RTA_DATA(tb[TCA_RSVP_DST]), buf, sizeof(buf)) == 0)
+                       fprintf(f, " [INVALID DADDR] ");
+               else
+                       fprintf(f, "%s", buf);
+               if (pinfo && pinfo->dpi.mask) {
+                       SPRINT_BUF(b2);
+                       fprintf(f, "%s ", sprint_spi(&pinfo->dpi, 1, b2));
+               } else
+                       fprintf(f, " ");
+       } else {
+               if (pinfo && pinfo->dpi.mask) {
+                       SPRINT_BUF(b2);
+                       fprintf(f, "session [NONE]%s ", sprint_spi(&pinfo->dpi, 1, b2));
+               } else
+                       fprintf(f, "session NONE ");
+       }
+
+       if (pinfo && pinfo->protocol) {
+               SPRINT_BUF(b1);
+               fprintf(f, "ipproto %s ", inet_proto_n2a(pinfo->protocol, b1, sizeof(b1)));
+       }
+       if (pinfo && pinfo->tunnelid)
+               fprintf(f, "tunnelid %d ", pinfo->tunnelid);
+       if (tb[TCA_RSVP_SRC]) {
+               char buf[128];
+               fprintf(f, "sender ");
+               if (inet_ntop(family, RTA_DATA(tb[TCA_RSVP_SRC]), buf, sizeof(buf)) == 0) {
+                       fprintf(f, "[BAD]");
+               } else {
+                       fprintf(f, " %s", buf);
+               }
+               if (pinfo && pinfo->spi.mask) {
+                       SPRINT_BUF(b2);
+                       fprintf(f, "%s ", sprint_spi(&pinfo->spi, 0, b2));
+               } else
+                       fprintf(f, " ");
+       } else if (pinfo && pinfo->spi.mask) {
+               SPRINT_BUF(b2);
+               fprintf(f, "sender [NONE]%s ", sprint_spi(&pinfo->spi, 0, b2));
+       }
+       if (tb[TCA_RSVP_POLICE])
+               tc_print_police(f, tb[TCA_RSVP_POLICE]);
+       return 0;
+}
+
+struct filter_util rsvp_util = {
+       NULL,
+       "rsvp",
+       rsvp_parse_opt,
+       rsvp_print_opt,
+};
+
+struct filter_util rsvp6_util = {
+       NULL,
+       "rsvp6",
+       rsvp_parse_opt,
+       rsvp_print_opt,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..59397487df4674cd0599a71f5a7d386f91534663 100644 (file)
@@ -0,0 +1,186 @@
+/*
+ * f_tcindex.c         Traffic control index filter
+ *
+ * Written 1998,1999 by Werner Almesberger
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <string.h>
+#include <netinet/in.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr," Usage: ... tcindex [ hash SIZE ] [ mask MASK ]"
+           " [ shift SHIFT ]\n");
+       fprintf(stderr,"                    [ pass_on | fall_through ]\n");
+       fprintf(stderr,"                    [ classid CLASSID ] "
+           "[ police POLICE_SPEC ]\n");
+}
+
+
+#define usage() return(-1)
+
+
+static int tcindex_parse_opt(struct filter_util *qu, char *handle, int argc,
+    char **argv, struct nlmsghdr *n)
+{
+       struct tcmsg *t = NLMSG_DATA(n);
+       struct rtattr *tail;
+       char *end;
+
+       if (handle) {
+               t->tcm_handle = strtoul(handle,&end,0);
+               if (*end) {
+                       fprintf(stderr, "Illegal filter ID\n");
+                       return -1;
+               }
+       }
+       if (!argc) return 0;
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n,4096,TCA_OPTIONS,NULL,0);
+       while (argc) {
+               if (!strcmp(*argv,"hash")) {
+                       int hash;
+
+                       NEXT_ARG();
+                       hash = strtoul(*argv,&end,0);
+                       if (*end || !hash || hash > 0x10000) {
+                               explain();
+                               return -1;
+                       }
+                       addattr_l(n,4096,TCA_TCINDEX_HASH,&hash,sizeof(hash));
+               }
+               else if (!strcmp(*argv,"mask")) {
+                       __u16 mask;
+
+                       NEXT_ARG();
+                       mask = strtoul(*argv,&end,0);
+                       if (*end) {
+                               explain();
+                               return -1;
+                       }
+                       addattr_l(n,4096,TCA_TCINDEX_MASK,&mask,sizeof(mask));
+               }
+               else if (!strcmp(*argv,"shift")) {
+                       int shift;
+
+                       NEXT_ARG();
+                       shift = strtoul(*argv,&end,0);
+                       if (*end) {
+                               explain();
+                               return -1;
+                       }
+                       addattr_l(n,4096,TCA_TCINDEX_SHIFT,&shift,
+                           sizeof(shift));
+               }
+               else if (!strcmp(*argv,"fall_through")) {
+                       int value = 1;
+
+                       addattr_l(n,4096,TCA_TCINDEX_FALL_THROUGH,&value,
+                           sizeof(value));
+               }
+               else if (!strcmp(*argv,"pass_on")) {
+                       int value = 0;
+
+                       addattr_l(n,4096,TCA_TCINDEX_FALL_THROUGH,&value,
+                           sizeof(value));
+               }
+               else if (!strcmp(*argv,"classid")) {
+                       __u32 handle;
+
+                       NEXT_ARG();
+                       if (get_tc_classid(&handle,*argv)) {
+                               fprintf(stderr, "Illegal \"classid\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_TCINDEX_CLASSID, &handle, 4);
+               }
+               else if (!strcmp(*argv,"police")) {
+                       NEXT_ARG();
+                       if (parse_police(&argc, &argv, TCA_TCINDEX_POLICE, n)) {
+                               fprintf(stderr, "Illegal \"police\"\n");
+                               return -1;
+                       }
+                       continue;
+               }
+               else {
+                       explain();
+                       return -1;
+               }
+               argc--;
+               argv++;
+       }
+       tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
+       return 0;
+}
+
+
+static int tcindex_print_opt(struct filter_util *qu, FILE *f,
+     struct rtattr *opt, __u32 handle)
+{
+       struct rtattr *tb[TCA_TCINDEX_MAX+1];
+
+       if (!opt) return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_TCINDEX_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (handle != ~0) fprintf(f,"handle 0x%04x ",handle);
+       if (tb[TCA_TCINDEX_HASH]) {
+               __u16 hash;
+
+               if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH]) < sizeof(hash))
+                       return -1;
+               hash = *(__u16 *) RTA_DATA(tb[TCA_TCINDEX_HASH]);
+               fprintf(f,"hash %d ",hash);
+       }
+       if (tb[TCA_TCINDEX_MASK]) {
+               __u16 mask;
+
+               if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK]) < sizeof(mask))
+                       return -1;
+               mask = *(__u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK]);
+               fprintf(f,"mask 0x%04x ",mask);
+       }
+       if (tb[TCA_TCINDEX_SHIFT]) {
+               int shift;
+
+               if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT]) < sizeof(shift))
+                       return -1;
+               shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT]);
+               fprintf(f,"shift %d ",shift);
+       }
+       if (tb[TCA_TCINDEX_FALL_THROUGH]) {
+               int fall_through;
+
+               if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH]) <
+                   sizeof(fall_through))
+                       return -1;
+               fall_through = *(int *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH]);
+               fprintf(f,fall_through ? "fall_through " : "pass_on ");
+       }
+       if (tb[TCA_TCINDEX_CLASSID]) {
+               SPRINT_BUF(b1);
+               fprintf(f, "classid %s ",sprint_tc_classid(*(__u32 *)
+                   RTA_DATA(tb[TCA_TCINDEX_CLASSID]), b1));
+       }
+       if (tb[TCA_TCINDEX_POLICE]) {
+               fprintf(f, "\n");
+               tc_print_police(f, tb[TCA_TCINDEX_POLICE]);
+       }
+       return 0;
+}
+
+struct filter_util tcindex_util = {
+       NULL,
+       "tcindex",
+       tcindex_parse_opt,
+       tcindex_print_opt,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3e76e9cfb76f38d6322d002ff6ab1cea80ea711a 100644 (file)
@@ -0,0 +1,977 @@
+/*
+ * q_u32.c             U32 filter.
+ *
+ *             This program is free software; you can u32istribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... u32 [ match SELECTOR ... ] [ link HTID ] [ classid CLASSID ]\n");
+       fprintf(stderr, "               [ police POLICE_SPEC ] [ offset OFFSET_SPEC ]\n");
+       fprintf(stderr, "               [ ht HTID ] [ hashkey HASHKEY_SPEC ]\n");
+       fprintf(stderr, "               [ sample SAMPLE ]\n");
+       fprintf(stderr, "or         u32 divisor DIVISOR\n");
+       fprintf(stderr, "\n");
+       fprintf(stderr, "Where: SELECTOR := SAMPLE SAMPLE ...\n");
+       fprintf(stderr, "       SAMPLE := { ip | ip6 | udp | tcp | icmp | u{32|16|8} } SAMPLE_ARGS\n");
+       fprintf(stderr, "       FILTERID := X:Y:Z\n");
+}
+
+#define usage() return(-1)
+
+int get_u32_handle(__u32 *handle, char *str)
+{
+       __u32 htid=0, hash=0, nodeid=0;
+       char *tmp = strchr(str, ':');
+
+       if (tmp == NULL) {
+               if (memcmp("0x", str, 2) == 0)
+                       return get_u32(handle, str, 16);
+               return -1;
+       }
+       htid = strtoul(str, &tmp, 16);
+       if (tmp == str && *str != ':' && *str != 0)
+               return -1;
+       if (htid>=0x1000)
+               return -1;
+       if (*tmp) {
+               str = tmp+1;
+               hash = strtoul(str, &tmp, 16);
+               if (tmp == str && *str != ':' && *str != 0)
+                       return -1;
+               if (hash>=0x100)
+                       return -1;
+               if (*tmp) {
+                       str = tmp+1;
+                       nodeid = strtoul(str, &tmp, 16);
+                       if (tmp == str && *str != 0)
+                               return -1;
+                       if (nodeid>=0x1000)
+                               return -1;
+               }
+       }
+       *handle = (htid<<20)|(hash<<12)|nodeid;
+       return 0;
+}
+
+char * sprint_u32_handle(__u32 handle, char *buf)
+{
+       int bsize = SPRINT_BSIZE-1;
+       __u32 htid = TC_U32_HTID(handle);
+       __u32 hash = TC_U32_HASH(handle);
+       __u32 nodeid = TC_U32_NODE(handle);
+       char *b = buf;
+
+       if (handle == 0) {
+               snprintf(b, bsize, "none");
+               return b;
+       }
+       if (htid) {
+               int l = snprintf(b, bsize, "%x:", htid>>20);
+               bsize -= l;
+               b += l;
+       }
+       if (nodeid|hash) {
+               if (hash) {
+                       int l = snprintf(b, bsize, "%x", hash);
+                       bsize -= l;
+                       b += l;
+               }
+               if (nodeid) {
+                       int l = snprintf(b, bsize, ":%x", nodeid);
+                       bsize -= l;
+                       b += l;
+               }
+       }
+       if (show_raw)
+               snprintf(b, bsize, "[%08x] ", handle);
+       return buf;
+}
+
+static int pack_key(struct tc_u32_sel *sel, __u32 key, __u32 mask, int off, int offmask)
+{
+       int i;
+       int hwm = sel->nkeys;
+
+       key &= mask;
+
+       for (i=0; i<hwm; i++) {
+               if (sel->keys[i].off == off && sel->keys[i].offmask == offmask) {
+                       __u32 intersect = mask&sel->keys[i].mask;
+
+                       if ((key^sel->keys[i].val) & intersect)
+                               return -1;
+                       sel->keys[i].val |= key;
+                       sel->keys[i].mask |= mask;
+                       return 0;
+               }
+       }
+
+       if (hwm >= 128)
+               return -1;
+       if (off % 4)
+               return -1;
+       sel->keys[hwm].val = key;
+       sel->keys[hwm].mask = mask;
+       sel->keys[hwm].off = off;
+       sel->keys[hwm].offmask = offmask;
+       sel->nkeys++;
+       return 0;
+}
+
+static int pack_key32(struct tc_u32_sel *sel, __u32 key, __u32 mask, int off, int offmask)
+{
+       key = htonl(key);
+       mask = htonl(mask);
+       return pack_key(sel, key, mask, off, offmask);
+}
+
+static int pack_key16(struct tc_u32_sel *sel, __u32 key, __u32 mask, int off, int offmask)
+{
+       if (key > 0xFFFF || mask > 0xFFFF)
+               return -1;
+
+       if ((off & 3) == 0) {
+               key <<= 16;
+               mask <<= 16;
+       }
+       off &= ~3;
+       key = htonl(key);
+       mask = htonl(mask);
+
+       return pack_key(sel, key, mask, off, offmask);
+}
+
+static int pack_key8(struct tc_u32_sel *sel, __u32 key, __u32 mask, int off, int offmask)
+{
+       if (key > 0xFF || mask > 0xFF)
+               return -1;
+
+       if ((off & 3) == 0) {
+               key <<= 24;
+               mask <<= 24;
+       } else if ((off & 3) == 1) {
+               key <<= 16;
+               mask <<= 16;
+       } else if ((off & 3) == 2) {
+               key <<= 8;
+               mask <<= 8;
+       }
+       off &= ~3;
+       key = htonl(key);
+       mask = htonl(mask);
+
+       return pack_key(sel, key, mask, off, offmask);
+}
+
+
+int parse_at(int *argc_p, char ***argv_p, int *off, int *offmask)
+{
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       char *p = *argv;
+
+       if (argc <= 0)
+               return -1;
+
+       if (strlen(p) > strlen("nexthdr+") &&
+           memcmp(p, "nexthdr+", strlen("nexthdr+")) == 0) {
+               *offmask = -1;
+               p += strlen("nexthdr+");
+       } else if (matches(*argv, "nexthdr+") == 0) {
+               NEXT_ARG();
+               *offmask = -1;
+               p = *argv;
+       }
+
+       if (get_integer(off, p, 0))
+               return -1;
+       argc--; argv++;
+
+       *argc_p = argc;
+       *argv_p = argv;
+       return 0;
+}
+
+
+static int parse_u32(int *argc_p, char ***argv_p, struct tc_u32_sel *sel, int off, int offmask)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       __u32 key;
+       __u32 mask;
+
+       if (argc < 2)
+               return -1;
+
+       if (get_u32(&key, *argv, 0))
+               return -1;
+       argc--; argv++;
+
+       if (get_u32(&mask, *argv, 16))
+               return -1;
+       argc--; argv++;
+
+       if (argc > 0 && strcmp(argv[0], "at") == 0) {
+               NEXT_ARG();
+               if (parse_at(&argc, &argv, &off, &offmask))
+                       return -1;
+       }
+
+       res = pack_key32(sel, key, mask, off, offmask);
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+static int parse_u16(int *argc_p, char ***argv_p, struct tc_u32_sel *sel, int off, int offmask)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       __u32 key;
+       __u32 mask;
+
+       if (argc < 2)
+               return -1;
+
+       if (get_u32(&key, *argv, 0))
+               return -1;
+       argc--; argv++;
+
+       if (get_u32(&mask, *argv, 16))
+               return -1;
+       argc--; argv++;
+
+       if (argc > 0 && strcmp(argv[0], "at") == 0) {
+               NEXT_ARG();
+               if (parse_at(&argc, &argv, &off, &offmask))
+                       return -1;
+       }
+       res = pack_key16(sel, key, mask, off, offmask);
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+static int parse_u8(int *argc_p, char ***argv_p, struct tc_u32_sel *sel, int off, int offmask)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       __u32 key;
+       __u32 mask;
+
+       if (argc < 2)
+               return -1;
+
+       if (get_u32(&key, *argv, 0))
+               return -1;
+       argc--; argv++;
+
+       if (get_u32(&mask, *argv, 16))
+               return -1;
+       argc--; argv++;
+
+       if (key > 0xFF || mask > 0xFF)
+               return -1;
+
+       if (argc > 0 && strcmp(argv[0], "at") == 0) {
+               NEXT_ARG();
+               if (parse_at(&argc, &argv, &off, &offmask))
+                       return -1;
+       }
+
+       res = pack_key8(sel, key, mask, off, offmask);
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+static int parse_ip_addr(int *argc_p, char ***argv_p, struct tc_u32_sel *sel, int off)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       inet_prefix addr;
+       __u32 mask;
+       int offmask = 0;
+
+       if (argc < 1)
+               return -1;
+
+       if (get_prefix_1(&addr, *argv, AF_INET))
+               return -1;
+       argc--; argv++;
+
+       if (argc > 0 && strcmp(argv[0], "at") == 0) {
+               NEXT_ARG();
+               if (parse_at(&argc, &argv, &off, &offmask))
+                       return -1;
+       }
+
+       mask = 0;
+       if (addr.bitlen)
+               mask = htonl(0xFFFFFFFF<<(32-addr.bitlen));
+       if (pack_key(sel, addr.data[0], mask, off, offmask) < 0)
+               return -1;
+       res = 0;
+
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+static int parse_ip6_addr(int *argc_p, char ***argv_p, struct tc_u32_sel *sel, int off)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       int plen = 128;
+       int i;
+       inet_prefix addr;
+       int offmask = 0;
+
+       if (argc < 1)
+               return -1;
+
+       if (get_prefix_1(&addr, *argv, AF_INET6))
+               return -1;
+       argc--; argv++;
+
+       if (argc > 0 && strcmp(argv[0], "at") == 0) {
+               NEXT_ARG();
+               if (parse_at(&argc, &argv, &off, &offmask))
+                       return -1;
+       }
+
+       plen = addr.bitlen;
+       for (i=0; i<plen; i+=32) {
+               if (((i+31)&~0x1F)<=plen) {
+                       if ((res = pack_key(sel, addr.data[i/32], 0xFFFFFFFF, off+4*(i/32), offmask)) < 0)
+                               return -1;
+               } else if (i<plen) {
+                       __u32 mask = htonl(0xFFFFFFFF<<(32-(plen-i)));
+                       if ((res = pack_key(sel, addr.data[i/32], mask, off+4*(i/32), offmask)) < 0)
+                               return -1;
+               }
+       }
+       res = 0;
+
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+static int parse_ip(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+
+       if (argc < 2)
+               return -1;
+
+       if (strcmp(*argv, "src") == 0) {
+               NEXT_ARG();
+               res = parse_ip_addr(&argc, &argv, sel, 12);
+               goto done;
+       }
+       if (strcmp(*argv, "dst") == 0) {
+               NEXT_ARG();
+               res = parse_ip_addr(&argc, &argv, sel, 16);
+               goto done;
+       }
+       if (strcmp(*argv, "tos") == 0 ||
+           matches(*argv, "dsfield") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 1, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "ihl") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 0, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "protocol") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 9, 0);
+               goto done;
+       }
+       if (matches(*argv, "precedence") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 1, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "nofrag") == 0) {
+               argc--; argv++;
+               res = pack_key16(sel, 0, 0x3FFF, 6, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "firstfrag") == 0) {
+               argc--; argv++;
+               res = pack_key16(sel, 0, 0x1FFF, 6, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "df") == 0) {
+               argc--; argv++;
+               res = pack_key16(sel, 0x4000, 0x4000, 6, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "mf") == 0) {
+               argc--; argv++;
+               res = pack_key16(sel, 0x2000, 0x2000, 6, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "dport") == 0) {
+               NEXT_ARG();
+               res = parse_u16(&argc, &argv, sel, 22, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "sport") == 0) {
+               NEXT_ARG();
+               res = parse_u16(&argc, &argv, sel, 20, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "icmp_type") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 20, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "icmp_code") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 20, 1);
+               goto done;
+       }
+       return -1;
+
+done:
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+static int parse_ip6(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+
+       if (argc < 2)
+               return -1;
+
+       if (strcmp(*argv, "src") == 0) {
+               NEXT_ARG();
+               res = parse_ip6_addr(&argc, &argv, sel, 8);
+               goto done;
+       }
+       if (strcmp(*argv, "dst") == 0) {
+               NEXT_ARG();
+               res = parse_ip6_addr(&argc, &argv, sel, 24);
+               goto done;
+       }
+       if (strcmp(*argv, "priority") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 0, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "protocol") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 6, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "flowlabel") == 0) {
+               NEXT_ARG();
+               res = parse_u32(&argc, &argv, sel, 0, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "dport") == 0) {
+               NEXT_ARG();
+               res = parse_u16(&argc, &argv, sel, 42, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "sport") == 0) {
+               NEXT_ARG();
+               res = parse_u16(&argc, &argv, sel, 40, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "icmp_type") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 40, 0);
+               goto done;
+       }
+       if (strcmp(*argv, "icmp_code") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 41, 1);
+               goto done;
+       }
+       return -1;
+
+done:
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+#define parse_tcp parse_udp
+static int parse_udp(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+
+       if (argc < 2)
+               return -1;
+
+       if (strcmp(*argv, "src") == 0) {
+               NEXT_ARG();
+               res = parse_u16(&argc, &argv, sel, 0, -1);
+               goto done;
+       }
+       if (strcmp(*argv, "dst") == 0) {
+               NEXT_ARG();
+               res = parse_u16(&argc, &argv, sel, 2, -1);
+               goto done;
+       }
+       return -1;
+
+done:
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+static int parse_icmp(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
+{
+       int res = -1;
+       int argc = *argc_p;
+       char **argv = *argv_p;
+
+       if (argc < 2)
+               return -1;
+
+       if (strcmp(*argv, "type") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 0, -1);
+               goto done;
+       }
+       if (strcmp(*argv, "code") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 1, -1);
+               goto done;
+       }
+       return -1;
+
+done:
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+
+
+static int parse_selector(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
+{
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       int res = -1;
+
+       if (argc <= 0)
+               return -1;
+
+       if (matches(*argv, "u32") == 0) {
+               NEXT_ARG();
+               res = parse_u32(&argc, &argv, sel, 0, 0);
+               goto done;
+       }
+       if (matches(*argv, "u16") == 0) {
+               NEXT_ARG();
+               res = parse_u16(&argc, &argv, sel, 0, 0);
+               goto done;
+       }
+       if (matches(*argv, "u8") == 0) {
+               NEXT_ARG();
+               res = parse_u8(&argc, &argv, sel, 0, 0);
+               goto done;
+       }
+       if (matches(*argv, "ip") == 0) {
+               NEXT_ARG();
+               res = parse_ip(&argc, &argv, sel);
+               goto done;
+       }
+       if (matches(*argv, "ip6") == 0) {
+               NEXT_ARG();
+               res = parse_ip6(&argc, &argv, sel);
+               goto done;
+       }
+       if (matches(*argv, "udp") == 0) {
+               NEXT_ARG();
+               res = parse_udp(&argc, &argv, sel);
+               goto done;
+       }
+       if (matches(*argv, "tcp") == 0) {
+               NEXT_ARG();
+               res = parse_tcp(&argc, &argv, sel);
+               goto done;
+       }
+       if (matches(*argv, "icmp") == 0) {
+               NEXT_ARG();
+               res = parse_icmp(&argc, &argv, sel);
+               goto done;
+       }
+       return -1;
+
+done:
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+static int parse_offset(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
+{
+       int argc = *argc_p;
+       char **argv = *argv_p;
+
+       while (argc > 0) {
+               if (matches(*argv, "plus") == 0) {
+                       int off;
+                       NEXT_ARG();
+                       if (get_integer(&off, *argv, 0))
+                               return -1;
+                       sel->off = off;
+                       sel->flags |= TC_U32_OFFSET;
+               } else if (matches(*argv, "at") == 0) {
+                       int off;
+                       NEXT_ARG();
+                       if (get_integer(&off, *argv, 0))
+                               return -1;
+                       sel->offoff = off;
+                       if (off%2) {
+                               fprintf(stderr, "offset \"at\" must be even\n");
+                               return -1;
+                       }
+                       sel->flags |= TC_U32_VAROFFSET;
+               } else if (matches(*argv, "mask") == 0) {
+                       __u16 mask;
+                       NEXT_ARG();
+                       if (get_u16(&mask, *argv, 16))
+                               return -1;
+                       sel->offmask = htons(mask);
+                       sel->flags |= TC_U32_VAROFFSET;
+               } else if (matches(*argv, "shift") == 0) {
+                       int shift;
+                       NEXT_ARG();
+                       if (get_integer(&shift, *argv, 0))
+                               return -1;
+                       sel->offshift = shift;
+                       sel->flags |= TC_U32_VAROFFSET;
+               } else if (matches(*argv, "eat") == 0) {
+                       sel->flags |= TC_U32_EAT;
+               } else {
+                       break;
+               }
+               argc--; argv++;
+       }
+
+       *argc_p = argc;
+       *argv_p = argv;
+       return 0;
+}
+
+static int parse_hashkey(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
+{
+       int argc = *argc_p;
+       char **argv = *argv_p;
+
+       while (argc > 0) {
+               if (matches(*argv, "mask") == 0) {
+                       __u32 mask;
+                       NEXT_ARG();
+                       if (get_u32(&mask, *argv, 16))
+                               return -1;
+                       sel->hmask = htonl(mask);
+               } else if (matches(*argv, "at") == 0) {
+                       int num;
+                       NEXT_ARG();
+                       if (get_integer(&num, *argv, 0))
+                               return -1;
+                       if (num%4)
+                               return -1;
+                       sel->hoff = num;
+               } else {
+                       break;
+               }
+               argc--; argv++;
+       }
+
+       *argc_p = argc;
+       *argv_p = argv;
+       return 0;
+}
+
+static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char **argv, struct nlmsghdr *n)
+{
+       struct {
+               struct tc_u32_sel sel;
+               struct tc_u32_key keys[128];
+       } sel;
+       struct tcmsg *t = NLMSG_DATA(n);
+       struct rtattr *tail;
+       int sel_ok = 0;
+       int sample_ok = 0;
+       __u32 htid = 0;
+       __u32 order = 0;
+
+       memset(&sel, 0, sizeof(sel));
+
+       if (handle && get_u32_handle(&t->tcm_handle, handle)) {
+               fprintf(stderr, "Illegal filter ID\n");
+               return -1;
+       }
+
+       if (argc == 0)
+               return 0;
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, 4096, TCA_OPTIONS, NULL, 0);
+
+       while (argc > 0) {
+               if (matches(*argv, "match") == 0) {
+                       NEXT_ARG();
+                       if (parse_selector(&argc, &argv, &sel.sel)) {
+                               fprintf(stderr, "Illegal \"match\"\n");
+                               return -1;
+                       }
+                       sel_ok++;
+                       continue;
+               } else if (matches(*argv, "offset") == 0) {
+                       NEXT_ARG();
+                       if (parse_offset(&argc, &argv, &sel.sel)) {
+                               fprintf(stderr, "Illegal \"offset\"\n");
+                               return -1;
+                       }
+                       continue;
+               } else if (matches(*argv, "hashkey") == 0) {
+                       NEXT_ARG();
+                       if (parse_hashkey(&argc, &argv, &sel.sel)) {
+                               fprintf(stderr, "Illegal \"hashkey\"\n");
+                               return -1;
+                       }
+                       continue;
+               } else if (matches(*argv, "classid") == 0 ||
+                          strcmp(*argv, "flowid") == 0) {
+                       unsigned handle;
+                       NEXT_ARG();
+                       if (get_tc_classid(&handle, *argv)) {
+                               fprintf(stderr, "Illegal \"classid\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_U32_CLASSID, &handle, 4);
+                       sel.sel.flags |= TC_U32_TERMINAL;
+               } else if (matches(*argv, "divisor") == 0) {
+                       unsigned divisor;
+                       NEXT_ARG();
+                       if (get_unsigned(&divisor, *argv, 0) || divisor == 0 ||
+                           divisor > 0x100) {
+                               fprintf(stderr, "Illegal \"divisor\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_U32_DIVISOR, &divisor, 4);
+               } else if (matches(*argv, "order") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&order, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"order\"\n");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "link") == 0) {
+                       unsigned handle;
+                       NEXT_ARG();
+                       if (get_u32_handle(&handle, *argv)) {
+                               fprintf(stderr, "Illegal \"link\"\n");
+                               return -1;
+                       }
+                       if (handle && TC_U32_NODE(handle)) {
+                               fprintf(stderr, "\"link\" must be a hash table.\n");
+                               return -1;
+                       }
+                       addattr_l(n, 4096, TCA_U32_LINK, &handle, 4);
+               } else if (strcmp(*argv, "ht") == 0) {
+                       unsigned handle;
+                       NEXT_ARG();
+                       if (get_u32_handle(&handle, *argv)) {
+                               fprintf(stderr, "Illegal \"ht\"\n");
+                               return -1;
+                       }
+                       if (handle && TC_U32_NODE(handle)) {
+                               fprintf(stderr, "\"ht\" must be a hash table.\n");
+                               return -1;
+                       }
+                       if (sample_ok)
+                               htid = (htid&0xFF000)|(handle&0xFFF00000);
+                       else
+                               htid = (handle&0xFFFFF000);
+               } else if (strcmp(*argv, "sample") == 0) {
+                       __u32 hash;
+                       struct {
+                               struct tc_u32_sel sel;
+                               struct tc_u32_key keys[4];
+                       } sel2;
+                       NEXT_ARG();
+                       if (parse_selector(&argc, &argv, &sel2.sel)) {
+                               fprintf(stderr, "Illegal \"sample\"\n");
+                               return -1;
+                       }
+                       if (sel2.sel.nkeys != 1) {
+                               fprintf(stderr, "\"sample\" must contain exactly ONE key.\n");
+                               return -1;
+                       }
+                       hash = sel2.sel.keys[0].val&sel2.sel.keys[0].mask;
+                       hash ^= hash>>16;
+                       hash ^= hash>>8;
+                       htid = ((hash<<12)&0xFF000)|(htid&0xFFF00000);
+                       sample_ok = 1;
+                       continue;
+               } else if (matches(*argv, "police") == 0) {
+                       NEXT_ARG();
+                       if (parse_police(&argc, &argv, TCA_U32_POLICE, n)) {
+                               fprintf(stderr, "Illegal \"police\"\n");
+                               return -1;
+                       }
+                       continue;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       if (order) {
+               if (TC_U32_NODE(t->tcm_handle) && order != TC_U32_NODE(t->tcm_handle)) {
+                       fprintf(stderr, "\"order\" contradicts \"handle\"\n");
+                       return -1;
+               }
+               t->tcm_handle |= order;
+       }
+
+       if (htid)
+               addattr_l(n, 4096, TCA_U32_HASH, &htid, 4);
+       if (sel_ok)
+               addattr_l(n, 4096, TCA_U32_SEL, &sel, sizeof(sel.sel)+sel.sel.nkeys*sizeof(struct tc_u32_key));
+       tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
+       return 0;
+}
+
+static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, __u32 handle)
+{
+       struct rtattr *tb[TCA_U32_MAX+1];
+       struct tc_u32_sel *sel = NULL;
+
+       if (opt == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       if (opt)
+               parse_rtattr(tb, TCA_U32_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (handle) {
+               SPRINT_BUF(b1);
+               fprintf(f, "fh %s ", sprint_u32_handle(handle, b1));
+       }
+       if (TC_U32_NODE(handle)) {
+               fprintf(f, "order %d ", TC_U32_NODE(handle));
+       }
+
+       if (tb[TCA_U32_SEL]) {
+               if (RTA_PAYLOAD(tb[TCA_U32_SEL])  < sizeof(*sel))
+                       return -1;
+
+               sel = RTA_DATA(tb[TCA_U32_SEL]);
+       }
+
+       if (tb[TCA_U32_DIVISOR]) {
+               fprintf(f, "ht divisor %d ", *(__u32*)RTA_DATA(tb[TCA_U32_DIVISOR]));
+       } else if (tb[TCA_U32_HASH]) {
+               __u32 htid = *(__u32*)RTA_DATA(tb[TCA_U32_HASH]);
+               fprintf(f, "key ht %x bkt %x ", TC_U32_USERHTID(htid), TC_U32_HASH(htid));
+       } else {
+               fprintf(f, "??? ");
+       }
+       if (tb[TCA_U32_CLASSID]) {
+               SPRINT_BUF(b1);
+               fprintf(f, "%sflowid %s ",
+                       !sel || !(sel->flags&TC_U32_TERMINAL) ? "*" : "",
+                       sprint_tc_classid(*(__u32*)RTA_DATA(tb[TCA_U32_CLASSID]), b1));
+       } else if (sel && sel->flags&TC_U32_TERMINAL) {
+               fprintf(f, "terminal flowid ??? ");
+       }
+       if (tb[TCA_U32_LINK]) {
+               SPRINT_BUF(b1);
+               fprintf(f, "link %s ", sprint_u32_handle(*(__u32*)RTA_DATA(tb[TCA_U32_LINK]), b1));
+       }
+       if (tb[TCA_U32_POLICE]) {
+               fprintf(f, "\n");
+               tc_print_police(f, tb[TCA_U32_POLICE]);
+       }
+
+       if (sel) {
+               int i;
+               struct tc_u32_key *key = sel->keys;
+
+               if (sel->nkeys) {
+                       for (i=0; i<sel->nkeys; i++, key++)
+                               fprintf(f, "\n  match %08x/%08x at %s%d",
+                                       (unsigned int)ntohl(key->val),
+                                       (unsigned int)ntohl(key->mask),
+                                       key->offmask ? "nexthdr+" : "",
+                                       key->off);
+               }
+
+               if (sel->flags&(TC_U32_VAROFFSET|TC_U32_OFFSET)) {
+                       fprintf(f, "\n    offset ");
+                       if (sel->flags&TC_U32_VAROFFSET)
+                               fprintf(f, "%04x>>%d at %d ", ntohs(sel->offmask), sel->offshift,  sel->offoff);
+                       if (sel->off)
+                               fprintf(f, "plus %d ", sel->off);
+               }
+               if (sel->flags&TC_U32_EAT)
+                       fprintf(f, " eat ");
+
+               if (sel->hmask) {
+                       fprintf(f, "\n    hash mask %08x at %d ",
+                               (unsigned int)htonl(sel->hmask), sel->hoff);
+               }
+       }
+
+       return 0;
+}
+
+struct filter_util u32_util = {
+       NULL,
+       "u32",
+       u32_parse_opt,
+       u32_print_opt,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0f9808e5be6d76a4c3d50cd60c6e34eadef21ddb 100644 (file)
@@ -0,0 +1,64 @@
+/*
+ * m_estimator.c       Parse/print estimator module options.
+ *
+ *             This program is free software; you can u32istribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void est_help(void) __attribute__((noreturn));
+
+static void est_help(void)
+{
+       fprintf(stderr, "Usage: ... estimator INTERVAL TIME-CONST\n");
+       fprintf(stderr, "  INTERVAL is interval between measurements\n");
+       fprintf(stderr, "  TIME-CONST is averaging time constant\n");
+       fprintf(stderr, "Example: ... est 1sec 8sec\n");
+       exit(-1);
+}
+
+int parse_estimator(int *p_argc, char ***p_argv, struct tc_estimator *est)
+{
+       int argc = *p_argc;
+       char **argv = *p_argv;
+       unsigned A, time_const;
+       
+       NEXT_ARG();
+       if (est->ewma_log)
+               duparg("estimator", *argv);
+       if (matches(*argv, "help") == 0)
+               est_help();
+       if (get_usecs(&A, *argv))
+               invarg("estimator", "invalid estimator interval");
+       NEXT_ARG();
+       if (matches(*argv, "help") == 0)
+               est_help();
+       if (get_usecs(&time_const, *argv))
+               invarg("estimator", "invalid estimator time constant");
+       if (tc_setup_estimator(A, time_const, est) < 0) {
+               fprintf(stderr, "Error: estimator parameters are out of range.\n");
+               exit(-1);
+       }
+       if (show_raw)
+               fprintf(stderr, "[estimator i=%u e=%u]\n", est->interval, est->ewma_log);
+       *p_argc = argc;
+       *p_argv = argv;
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0e76efc50502ba60efd5df850fd8833df1b8d73e 100644 (file)
@@ -0,0 +1,328 @@
+/*
+ * m_police.c          Parse/print policing module options.
+ *
+ *             This program is free software; you can u32istribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * FIXES:       19990619 - J Hadi Salim (hadi@cyberus.ca) 
+ *             simple addattr packaging fix.            
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... police rate BPS burst BYTES[/BYTES] [ mtu BYTES[/BYTES] ]\n");
+       fprintf(stderr, "                [ peakrate BPS ] [ avrate BPS ]\n");
+       fprintf(stderr, "                [ ACTION ]\n");
+       fprintf(stderr, "Where: ACTION := reclassify | drop | continue \n");
+}
+
+static void explain1(char *arg)
+{
+       fprintf(stderr, "Illegal \"%s\"\n", arg);
+}
+
+#define usage() return(-1)
+
+
+char *police_action_n2a(int action, char *buf, int len)
+{
+       switch (action) {
+       case -1:
+               return "continue";
+               break;
+       case TC_POLICE_OK:
+               return "pass";
+               break;
+       case TC_POLICE_SHOT:
+               return "drop";
+               break;
+       case TC_POLICE_RECLASSIFY:
+               return "reclassify";
+       default:
+               snprintf(buf, len, "%d", action);
+               return buf;
+       }
+}
+
+int police_action_a2n(char *arg, int *result)
+{
+       int res;
+
+       if (matches(arg, "continue") == 0)
+               res = -1;
+       else if (matches(arg, "drop") == 0)
+               res = TC_POLICE_SHOT;
+       else if (matches(arg, "shot") == 0)
+               res = TC_POLICE_SHOT;
+       else if (matches(arg, "pass") == 0)
+               res = TC_POLICE_OK;
+       else if (strcmp(arg, "ok") == 0)
+               res = TC_POLICE_OK;
+       else if (matches(arg, "reclassify") == 0)
+               res = TC_POLICE_RECLASSIFY;
+       else {
+               char dummy;
+               if (sscanf(arg, "%d%c", &res, &dummy) != 1)
+                       return -1;
+       }
+       *result = res;
+       return 0;
+}
+
+
+int get_police_result(int *action, int *result, char *arg)
+{
+       char *p = strchr(arg, '/');
+
+       if (p)
+               *p = 0;
+
+       if (police_action_a2n(arg, action)) {
+               if (p)
+                       *p = '/';
+               return -1;
+       }
+
+       if (p) {
+               *p = '/';
+               if (police_action_a2n(p+1, result))
+                       return -1;
+       }
+       return 0;
+}
+
+int parse_police(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n)
+{
+       int argc = *argc_p;
+       char **argv = *argv_p;
+       int res = -1;
+       int ok=0;
+       struct tc_police p;
+       __u32 rtab[256];
+       __u32 ptab[256];
+       __u32 avrate = 0;
+       int presult = 0;
+       unsigned buffer=0, mtu=0, mpu=0;
+       int Rcell_log=-1, Pcell_log = -1; 
+       struct rtattr *tail;
+
+       memset(&p, 0, sizeof(p));
+       p.action = TC_POLICE_RECLASSIFY;
+
+       if (argc <= 0)
+               return -1;
+
+       while (argc > 0) {
+               if (matches(*argv, "index") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&p.index, *argv, 16)) {
+                               fprintf(stderr, "Illegal \"index\"\n");
+                               return -1;
+                       }
+               } else if (matches(*argv, "burst") == 0 ||
+                       strcmp(*argv, "buffer") == 0 ||
+                       strcmp(*argv, "maxburst") == 0) {
+                       NEXT_ARG();
+                       if (buffer) {
+                               fprintf(stderr, "Double \"buffer/burst\" spec\n");
+                               return -1;
+                       }
+                       if (get_size_and_cell(&buffer, &Rcell_log, *argv) < 0) {
+                               explain1("buffer");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "mtu") == 0 ||
+                          strcmp(*argv, "minburst") == 0) {
+                       NEXT_ARG();
+                       if (mtu) {
+                               fprintf(stderr, "Double \"mtu/minburst\" spec\n");
+                               return -1;
+                       }
+                       if (get_size_and_cell(&mtu, &Pcell_log, *argv) < 0) {
+                               explain1("mtu");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "mpu") == 0) {
+                       NEXT_ARG();
+                       if (mpu) {
+                               fprintf(stderr, "Double \"mpu\" spec\n");
+                               return -1;
+                       }
+                       if (get_size(&mpu, *argv)) {
+                               explain1("mpu");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "rate") == 0) {
+                       NEXT_ARG();
+                       if (p.rate.rate) {
+                               fprintf(stderr, "Double \"rate\" spec\n");
+                               return -1;
+                       }
+                       if (get_rate(&p.rate.rate, *argv)) {
+                               explain1("rate");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "avrate") == 0) {
+                       NEXT_ARG();
+                       if (avrate) {
+                               fprintf(stderr, "Double \"avrate\" spec\n");
+                               return -1;
+                       }
+                       if (get_rate(&avrate, *argv)) {
+                               explain1("avrate");
+                               return -1;
+                       }
+               } else if (matches(*argv, "peakrate") == 0) {
+                       NEXT_ARG();
+                       if (p.peakrate.rate) {
+                               fprintf(stderr, "Double \"peakrate\" spec\n");
+                               return -1;
+                       }
+                       if (get_rate(&p.peakrate.rate, *argv)) {
+                               explain1("peakrate");
+                               return -1;
+                       }
+               } else if (matches(*argv, "reclassify") == 0) {
+                       p.action = TC_POLICE_RECLASSIFY;
+               } else if (matches(*argv, "drop") == 0 ||
+                          matches(*argv, "shot") == 0) {
+                       p.action = TC_POLICE_SHOT;
+               } else if (matches(*argv, "continue") == 0) {
+                       p.action = TC_POLICE_UNSPEC;
+               } else if (matches(*argv, "pass") == 0) {
+                       p.action = TC_POLICE_OK;
+               } else if (strcmp(*argv, "action") == 0) {
+                       NEXT_ARG();
+                       if (get_police_result(&p.action, &presult, *argv)) {
+                               fprintf(stderr, "Illegal \"action\"\n");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       break;
+               }
+               ok++;
+               argc--; argv++;
+       }
+
+       if (!ok)
+               return -1;
+
+       if (p.rate.rate && !buffer) {
+               fprintf(stderr, "\"burst\" requires \"rate\".\n");
+               return -1;
+       }
+       if (p.peakrate.rate) {
+               if (!p.rate.rate) {
+                       fprintf(stderr, "\"peakrate\" requires \"rate\".\n");
+                       return -1;
+               }
+               if (!mtu) {
+                       fprintf(stderr, "\"mtu\" is required, if \"peakrate\" is requested.\n");
+                       return -1;
+               }
+       }
+
+       if (p.rate.rate) {
+               if ((Rcell_log = tc_calc_rtable(p.rate.rate, rtab, Rcell_log, mtu, mpu)) < 0) {
+                       fprintf(stderr, "TBF: failed to calculate rate table.\n");
+                       return -1;
+               }
+               p.burst = tc_calc_xmittime(p.rate.rate, buffer);
+               p.rate.cell_log = Rcell_log;
+               p.rate.mpu = mpu;
+       }
+       p.mtu = mtu;
+       if (p.peakrate.rate) {
+               if ((Pcell_log = tc_calc_rtable(p.peakrate.rate, ptab, Pcell_log, mtu, mpu)) < 0) {
+                       fprintf(stderr, "POLICE: failed to calculate peak rate table.\n");
+                       return -1;
+               }
+               p.peakrate.cell_log = Pcell_log;
+               p.peakrate.mpu = mpu;
+       }
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, 1024, tca_id, NULL, 0);
+       addattr_l(n, 2024, TCA_POLICE_TBF, &p, sizeof(p));
+       if (p.rate.rate)
+               addattr_l(n, 3024, TCA_POLICE_RATE, rtab, 1024);
+       if (p.peakrate.rate)
+                addattr_l(n, 4096, TCA_POLICE_PEAKRATE, ptab, 1024);
+       if (avrate)
+               addattr32(n, 4096, TCA_POLICE_AVRATE, avrate);
+       if (presult)
+               addattr32(n, 4096, TCA_POLICE_RESULT, presult);
+#if 0
+#endif
+
+       tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail;
+       res = 0;
+
+       *argc_p = argc;
+       *argv_p = argv;
+       return res;
+}
+
+
+int tc_print_police(FILE *f, struct rtattr *arg)
+{
+       SPRINT_BUF(b1);
+       struct tc_police *p;
+       struct rtattr *tb[TCA_POLICE_MAX+1];
+       unsigned buffer;
+
+       if (arg == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_POLICE_MAX, RTA_DATA(arg), RTA_PAYLOAD(arg));
+
+       if (tb[TCA_POLICE_TBF] == NULL) {
+               fprintf(f, "[NULL police tbf]");
+               return 0;
+       }
+       if (RTA_PAYLOAD(tb[TCA_POLICE_TBF])  < sizeof(*p)) {
+               fprintf(f, "[truncated police tbf]");
+               return -1;
+       }
+       p = RTA_DATA(tb[TCA_POLICE_TBF]);
+
+       fprintf(f, "police %x ", p->index);
+       fprintf(f, "action %s", police_action_n2a(p->action, b1, sizeof(b1)));
+       if (tb[TCA_POLICE_RESULT]) {
+               fprintf(f, "/%s ", police_action_n2a(*(int*)RTA_DATA(tb[TCA_POLICE_RESULT]), b1, sizeof(b1)));
+       } else
+               fprintf(f, " ");
+       fprintf(f, "rate %s ", sprint_rate(p->rate.rate, b1));
+       buffer = ((double)p->rate.rate*tc_core_tick2usec(p->burst))/1000000;
+       fprintf(f, "burst %s ", sprint_size(buffer, b1));
+       fprintf(f, "mtu %s ", sprint_size(p->mtu, b1));
+       if (show_raw)
+               fprintf(f, "[%08x] ", p->burst);
+       if (p->peakrate.rate)
+               fprintf(f, "peakrate %s ", sprint_rate(p->peakrate.rate, b1));
+       if (tb[TCA_POLICE_AVRATE])
+               fprintf(f, "avrate %s ", sprint_rate(*(__u32*)RTA_DATA(tb[TCA_POLICE_AVRATE]), b1));
+
+       return 0;
+}
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d17453870cf5a120581a5d43c19fba8a17c94a8e 100644 (file)
@@ -0,0 +1,268 @@
+/*
+ * q_atm.c             ATM.
+ *
+ * Hacked 1998-2000 by Werner Almesberger, EPFL ICA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <atm.h>
+#include <linux/atmdev.h>
+#include <linux/atmarp.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+
+#define MAX_HDR_LEN 64
+
+#define usage() return(-1)
+
+
+static int atm_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       if (argc) {
+               fprintf(stderr,"Usage: atm\n");
+               return -1;
+       }
+       return 0;
+}
+
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... atm ( pvc ADDR | svc ADDR [ sap SAP ] ) "
+           "[ qos QOS ] [ sndbuf BYTES ]\n");
+       fprintf(stderr, "  [ hdr HEX... ] [ excess ( CLASSID | clp ) ] "
+         "[ clip ]\n");
+}
+
+
+static int atm_parse_class_opt(struct qdisc_util *qu, int argc, char **argv,
+   struct nlmsghdr *n)
+{
+       struct sockaddr_atmsvc addr;
+       struct atm_qos qos;
+       struct atm_sap sap;
+       unsigned char hdr[MAX_HDR_LEN];
+       __u32 excess = 0;
+       struct rtattr *tail;
+       int sndbuf = 0;
+       int hdr_len = -1;
+       int set_clip = 0;
+       int s;
+
+       memset(&addr,0,sizeof(addr));
+       (void) text2qos("aal5,ubr:sdu=9180,rx:none",&qos,0);
+       (void) text2sap("blli:l2=iso8802",&sap,0);
+       while (argc > 0) {
+               if (!strcmp(*argv,"pvc")) {
+                       NEXT_ARG();
+                       if (text2atm(*argv,(struct sockaddr *) &addr,
+                           sizeof(addr),T2A_PVC | T2A_NAME) < 0) {
+                               explain();
+                               return -1;
+                       }
+               }
+               else if (!strcmp(*argv,"svc")) {
+                       NEXT_ARG();
+                       if (text2atm(*argv,(struct sockaddr *) &addr,
+                           sizeof(addr),T2A_SVC | T2A_NAME) < 0) {
+                               explain();
+                               return -1;
+                       }
+               }
+               else if (!strcmp(*argv,"qos")) {
+                       NEXT_ARG();
+                       if (text2qos(*argv,&qos,0) < 0) {
+                               explain();
+                               return -1;
+                       }
+               }
+               else if (!strcmp(*argv,"sndbuf")) {
+                       char *end;
+
+                       NEXT_ARG();
+                       sndbuf = strtol(*argv,&end,0);
+                       if (*end) {
+                               explain();
+                               return -1;
+                       }
+               }
+               else if (!strcmp(*argv,"sap")) {
+                       NEXT_ARG();
+                       if (addr.sas_family != AF_ATMSVC ||
+                           text2sap(*argv,&sap,T2A_NAME) < 0) {
+                               explain();
+                               return -1;
+                       }
+               }
+               else if (!strcmp(*argv,"hdr")) {
+                       unsigned char *ptr;
+                       char *walk;
+
+                       NEXT_ARG();
+                       ptr = hdr;
+                       for (walk = *argv; *walk; walk++) {
+                               int tmp;
+
+                               if (ptr == hdr+MAX_HDR_LEN) {
+                                       fprintf(stderr,"header is too long\n");
+                                       return -1;
+                               }
+                               if (*walk == '.') continue;
+                               if (!isxdigit(walk[0]) || !walk[1] ||
+                                   !isxdigit(walk[1])) {
+                                       explain();
+                                       return -1;
+                               }
+                               sscanf(walk,"%2x",&tmp);
+                               *ptr++ = tmp;
+                               walk++;
+                       }
+                       hdr_len = ptr-hdr;
+               }
+               else if (!strcmp(*argv,"excess")) {
+                       NEXT_ARG();
+                       if (!strcmp(*argv,"clp")) excess = 0;
+                       else if (get_tc_classid(&excess,*argv)) {
+                                       explain();
+                                       return -1;
+                               }
+               }
+               else if (!strcmp(*argv,"clip")) {
+                       set_clip = 1;
+               }
+               else {
+                       explain();
+                       return 1;
+               }
+               argc--;
+               argv++;
+       }
+       s = socket(addr.sas_family,SOCK_DGRAM,0);
+       if (s < 0) {
+               perror("socket");
+               return -1;
+       }
+       if (setsockopt(s,SOL_ATM,SO_ATMQOS,&qos,sizeof(qos)) < 0) {
+               perror("SO_ATMQOS");
+               return -1;
+       }
+       if (sndbuf)
+           if (setsockopt(s,SOL_SOCKET,SO_SNDBUF,&sndbuf,sizeof(sndbuf)) < 0) {
+               perror("SO_SNDBUF");
+           return -1;
+       }
+       if (addr.sas_family == AF_ATMSVC && setsockopt(s,SOL_ATM,SO_ATMSAP,
+           &sap,sizeof(sap)) < 0) {
+               perror("SO_ATMSAP");
+               return -1;
+       }
+       if (connect(s,(struct sockaddr *) &addr,addr.sas_family == AF_ATMPVC ?
+           sizeof(struct sockaddr_atmpvc) : sizeof(addr)) < 0) {
+               perror("connect");
+               return -1;
+       }
+       if (set_clip)
+               if (ioctl(s,ATMARP_MKIP,0) < 0) {
+                       perror("ioctl ATMARP_MKIP");
+                       return -1;
+               }
+       tail = (struct rtattr *) (((void *) n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n,1024,TCA_OPTIONS,NULL,0);
+       addattr_l(n,1024,TCA_ATM_FD,&s,sizeof(s));
+       if (excess) addattr_l(n,1024,TCA_ATM_EXCESS,&excess,sizeof(excess));
+       if (hdr_len != -1) addattr_l(n,1024,TCA_ATM_HDR,hdr,hdr_len);
+       tail->rta_len = (((void *) n)+NLMSG_ALIGN(n->nlmsg_len))-(void *) tail;
+       return 0;
+}
+
+
+
+static int atm_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       struct rtattr *tb[TCA_ATM_MAX+1];
+       char buffer[MAX_ATM_ADDR_LEN+1];
+
+       if (!opt) return 0;
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_ATM_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+       if (tb[TCA_ATM_ADDR]) {
+               if (RTA_PAYLOAD(tb[TCA_ATM_ADDR]) <
+                   sizeof(struct sockaddr_atmpvc))
+                       fprintf(stderr,"ATM: address too short\n");
+               else {
+                       if (atm2text(buffer,MAX_ATM_ADDR_LEN,
+                           RTA_DATA(tb[TCA_ATM_ADDR]),A2T_PRETTY | A2T_NAME) <
+                           0) fprintf(stderr,"atm2text error\n");
+                       fprintf(f,"pvc %s ",buffer);
+               }
+       }
+       if (tb[TCA_ATM_HDR]) {
+               int i;
+
+               fprintf(f,"hdr");
+               for (i = 0; i < RTA_PAYLOAD(tb[TCA_ATM_HDR]); i++)
+                       fprintf(f,"%c%02x",i ? '.' : ' ',
+                           ((unsigned char *) RTA_DATA(tb[TCA_ATM_HDR]))[i]);
+               if (!i) fprintf(f," .");
+               fprintf(f," ");
+       }
+       if (tb[TCA_ATM_EXCESS]) {
+               __u32 excess;
+
+               if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS]) < sizeof(excess))
+                       fprintf(stderr,"ATM: excess class ID too short\n");
+               else {
+                       excess = *(__u32 *) RTA_DATA(tb[TCA_ATM_EXCESS]);
+                       if (!excess) fprintf(f,"excess clp ");
+                       else {
+                               char buf[64];
+
+                               print_tc_classid(buf,sizeof(buf),excess);
+                               fprintf(f,"excess %s ",buf);
+                       }
+               }
+       }
+       if (tb[TCA_ATM_STATE]) {
+               static const char *map[] = { ATM_VS2TXT_MAP };
+               int state;
+
+               if (RTA_PAYLOAD(tb[TCA_ATM_STATE]) < sizeof(state))
+                       fprintf(stderr,"ATM: state field too short\n");
+               else {
+                       state = *(int *) RTA_DATA(tb[TCA_ATM_STATE]);
+                       fprintf(f,"%s ",map[state]);
+               }
+       }
+       return 0;
+}
+
+
+static int atm_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return 0;
+}
+
+
+struct qdisc_util atm_util = {
+       NULL,
+       "atm",
+       atm_parse_opt,
+       atm_print_opt,
+       atm_print_xstats,
+
+       atm_parse_class_opt,
+       atm_print_opt
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..51ed87a2993181db09615786934a97e6c875ad2a 100644 (file)
@@ -0,0 +1,555 @@
+/*
+ * q_cbq.c             CBQ.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+#include "tc_cbq.h"
+
+static void explain_class(void)
+{
+       fprintf(stderr, "Usage: ... cbq bandwidth BPS rate BPS maxburst PKTS [ avpkt BYTES ]\n");
+       fprintf(stderr, "               [ minburst PKTS ] [ bounded ] [ isolated ]\n");
+       fprintf(stderr, "               [ allot BYTES ] [ mpu BYTES ] [ weight RATE ]\n");
+       fprintf(stderr, "               [ prio NUMBER ] [ cell BYTES ] [ ewma LOG ]\n");
+       fprintf(stderr, "               [ estimator INTERVAL TIME_CONSTANT ]\n");
+       fprintf(stderr, "               [ split CLASSID ] [ defmap MASK/CHANGE ]\n");
+}
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... cbq bandwidth BPS avpkt BYTES [ mpu BYTES ]\n");
+       fprintf(stderr, "               [ cell BYTES ] [ ewma LOG ]\n");
+}
+
+static void explain1(char *arg)
+{
+       fprintf(stderr, "Illegal \"%s\"\n", arg);
+}
+
+#define usage() return(-1)
+
+static int cbq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       struct tc_ratespec r;
+       struct tc_cbq_lssopt lss;
+       __u32 rtab[256];
+       unsigned mpu=0, avpkt=0, allot=0;
+       int cell_log=-1; 
+       int ewma_log=-1;
+       struct rtattr *tail;
+
+       memset(&lss, 0, sizeof(lss));
+       memset(&r, 0, sizeof(r));
+
+       while (argc > 0) {
+               if (strcmp(*argv, "bandwidth") == 0 ||
+                   strcmp(*argv, "rate") == 0) {
+                       NEXT_ARG();
+                       if (get_rate(&r.rate, *argv)) {
+                               explain1("bandwidth");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "ewma") == 0) {
+                       NEXT_ARG();
+                       if (get_unsigned(&ewma_log, *argv, 0)) {
+                               explain1("ewma");
+                               return -1;
+                       }
+                       if (ewma_log > 31) {
+                               fprintf(stderr, "ewma_log must be < 32\n");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "cell") == 0) {
+                       unsigned cell;
+                       int i;
+                       NEXT_ARG();
+                       if (get_size(&cell, *argv)) {
+                               explain1("cell");
+                               return -1;
+                       }
+                       for (i=0; i<32; i++)
+                               if ((1<<i) == cell)
+                                       break;
+                       if (i>=32) {
+                               fprintf(stderr, "cell must be 2^n\n");
+                               return -1;
+                       }
+                       cell_log = i;
+               } else if (strcmp(*argv, "avpkt") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&avpkt, *argv)) {
+                               explain1("avpkt");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "mpu") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&mpu, *argv)) {
+                               explain1("mpu");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "allot") == 0) {
+                       NEXT_ARG();
+                       /* Accept and ignore "allot" for backward compatibility */
+                       if (get_size(&allot, *argv)) {
+                               explain1("allot");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       /* OK. All options are parsed. */
+
+       if (r.rate == 0) {
+               fprintf(stderr, "CBQ: bandwidth is required parameter.\n");
+               return -1;
+       }
+       if (avpkt == 0) {
+               fprintf(stderr, "CBQ: \"avpkt\" is required.\n");
+               return -1;
+       }
+       if (allot < (avpkt*3)/2)
+               allot = (avpkt*3)/2;
+
+       if ((cell_log = tc_calc_rtable(r.rate, rtab, cell_log, allot, mpu)) < 0) {
+               fprintf(stderr, "CBQ: failed to calculate rate table.\n");
+               return -1;
+       }
+       r.cell_log = cell_log;
+       r.mpu = mpu;
+
+       if (ewma_log < 0)
+               ewma_log = TC_CBQ_DEF_EWMA;
+       lss.ewma_log = ewma_log;
+       lss.maxidle = tc_cbq_calc_maxidle(r.rate, r.rate, avpkt, lss.ewma_log, 0);
+       lss.change = TCF_CBQ_LSS_MAXIDLE|TCF_CBQ_LSS_EWMA|TCF_CBQ_LSS_AVPKT;
+       lss.avpkt = avpkt;
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+       addattr_l(n, 1024, TCA_CBQ_RATE, &r, sizeof(r));
+       addattr_l(n, 1024, TCA_CBQ_LSSOPT, &lss, sizeof(lss));
+       addattr_l(n, 3024, TCA_CBQ_RTAB, rtab, 1024);
+       if (show_raw) {
+               int i;
+               for (i=0; i<256; i++)
+                       printf("%u ", rtab[i]);
+               printf("\n");
+       }
+       tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail;
+       return 0;
+}
+
+static int cbq_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       int wrr_ok=0, fopt_ok=0;
+       struct tc_ratespec r;
+       struct tc_cbq_lssopt lss;
+       struct tc_cbq_wrropt wrr;
+       struct tc_cbq_fopt fopt;
+       struct tc_cbq_ovl ovl;
+       __u32 rtab[256];
+       unsigned mpu=0;
+       int cell_log=-1; 
+       int ewma_log=-1;
+       unsigned bndw = 0;
+       unsigned minburst=0, maxburst=0;
+       struct rtattr *tail;
+
+       memset(&r, 0, sizeof(r));
+       memset(&lss, 0, sizeof(lss));
+       memset(&wrr, 0, sizeof(wrr));
+       memset(&fopt, 0, sizeof(fopt));
+       memset(&ovl, 0, sizeof(ovl));
+
+       while (argc > 0) {
+               if (strcmp(*argv, "rate") == 0) {
+                       NEXT_ARG();
+                       if (get_rate(&r.rate, *argv)) {
+                               explain1("rate");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "bandwidth") == 0) {
+                       NEXT_ARG();
+                       if (get_rate(&bndw, *argv)) {
+                               explain1("bandwidth");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "minidle") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&lss.minidle, *argv, 0)) {
+                               explain1("minidle");
+                               return -1;
+                       }
+                       lss.change |= TCF_CBQ_LSS_MINIDLE;
+               } else if (strcmp(*argv, "minburst") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&minburst, *argv, 0)) {
+                               explain1("minburst");
+                               return -1;
+                       }
+                       lss.change |= TCF_CBQ_LSS_OFFTIME;
+               } else if (strcmp(*argv, "maxburst") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&maxburst, *argv, 0)) {
+                               explain1("maxburst");
+                               return -1;
+                       }
+                       lss.change |= TCF_CBQ_LSS_MAXIDLE;
+               } else if (strcmp(*argv, "bounded") == 0) {
+                       lss.flags |= TCF_CBQ_LSS_BOUNDED;
+                       lss.change |= TCF_CBQ_LSS_FLAGS;
+               } else if (strcmp(*argv, "borrow") == 0) {
+                       lss.flags &= ~TCF_CBQ_LSS_BOUNDED;
+                       lss.change |= TCF_CBQ_LSS_FLAGS;
+               } else if (strcmp(*argv, "isolated") == 0) {
+                       lss.flags |= TCF_CBQ_LSS_ISOLATED;
+                       lss.change |= TCF_CBQ_LSS_FLAGS;
+               } else if (strcmp(*argv, "sharing") == 0) {
+                       lss.flags &= ~TCF_CBQ_LSS_ISOLATED;
+                       lss.change |= TCF_CBQ_LSS_FLAGS;
+               } else if (strcmp(*argv, "ewma") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&ewma_log, *argv, 0)) {
+                               explain1("ewma");
+                               return -1;
+                       }
+                       if (ewma_log > 31) {
+                               fprintf(stderr, "ewma_log must be < 32\n");
+                               return -1;
+                       }
+                       lss.change |= TCF_CBQ_LSS_EWMA;
+               } else if (strcmp(*argv, "cell") == 0) {
+                       unsigned cell;
+                       int i;
+                       NEXT_ARG();
+                       if (get_size(&cell, *argv)) {
+                               explain1("cell");
+                               return -1;
+                       }
+                       for (i=0; i<32; i++)
+                               if ((1<<i) == cell)
+                                       break;
+                       if (i>=32) {
+                               fprintf(stderr, "cell must be 2^n\n");
+                               return -1;
+                       }
+                       cell_log = i;
+               } else if (strcmp(*argv, "prio") == 0) {
+                       unsigned prio;
+                       NEXT_ARG();
+                       if (get_u32(&prio, *argv, 0)) {
+                               explain1("prio");
+                               return -1;
+                       }
+                       if (prio > TC_CBQ_MAXPRIO) {
+                               fprintf(stderr, "\"prio\" must be number in the range 1...%d\n", TC_CBQ_MAXPRIO);
+                               return -1;
+                       }
+                       wrr.priority = prio;
+                       wrr_ok++;
+               } else if (strcmp(*argv, "allot") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&wrr.allot, *argv)) {
+                               explain1("allot");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "avpkt") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&lss.avpkt, *argv)) {
+                               explain1("avpkt");
+                               return -1;
+                       }
+                       lss.change |= TCF_CBQ_LSS_AVPKT;
+               } else if (strcmp(*argv, "mpu") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&mpu, *argv)) {
+                               explain1("mpu");
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "weight") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&wrr.weight, *argv)) {
+                               explain1("weight");
+                               return -1;
+                       }
+                       wrr_ok++;
+               } else if (strcmp(*argv, "split") == 0) {
+                       NEXT_ARG();
+                       if (get_tc_classid(&fopt.split, *argv)) {
+                               fprintf(stderr, "Invalid split node ID.\n");
+                               usage();
+                       }
+                       fopt_ok++;
+               } else if (strcmp(*argv, "defmap") == 0) {
+                       int err;
+                       NEXT_ARG();
+                       err = sscanf(*argv, "%08x/%08x", &fopt.defmap, &fopt.defchange);
+                       if (err < 1) {
+                               fprintf(stderr, "Invalid defmap, should be MASK32[/MASK]\n");
+                               return -1;
+                       }
+                       if (err == 1)
+                               fopt.defchange = ~0;
+                       fopt_ok++;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain_class();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain_class();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       /* OK. All options are parsed. */
+
+       /* 1. Prepare link sharing scheduler parameters */
+       if (r.rate) {
+               unsigned pktsize = wrr.allot;
+               if (wrr.allot < (lss.avpkt*3)/2)
+                       wrr.allot = (lss.avpkt*3)/2;
+               if ((cell_log = tc_calc_rtable(r.rate, rtab, cell_log, pktsize, mpu)) < 0) {
+                       fprintf(stderr, "CBQ: failed to calculate rate table.\n");
+                       return -1;
+               }
+               r.cell_log = cell_log;
+               r.mpu = mpu;
+       }
+       if (ewma_log < 0)
+               ewma_log = TC_CBQ_DEF_EWMA;
+       lss.ewma_log = ewma_log;
+       if (lss.change&(TCF_CBQ_LSS_OFFTIME|TCF_CBQ_LSS_MAXIDLE)) {
+               if (lss.avpkt == 0) {
+                       fprintf(stderr, "CBQ: avpkt is required for max/minburst.\n");
+                       return -1;
+               }
+               if (bndw==0 || r.rate == 0) {
+                       fprintf(stderr, "CBQ: bandwidth&rate are required for max/minburst.\n");
+                       return -1;
+               }
+       }
+       if (wrr.priority == 0 && (n->nlmsg_flags&NLM_F_EXCL)) {
+               wrr_ok = 1;
+               wrr.priority = TC_CBQ_MAXPRIO;
+               if (wrr.allot == 0)
+                       wrr.allot = (lss.avpkt*3)/2;
+       }
+       if (wrr_ok) {
+               if (wrr.weight == 0)
+                       wrr.weight = (wrr.priority == TC_CBQ_MAXPRIO) ? 1 : r.rate;
+               if (wrr.allot == 0) {
+                       fprintf(stderr, "CBQ: \"allot\" is required to set WRR parameters.\n");
+                       return -1;
+               }
+       }
+       if (lss.change&TCF_CBQ_LSS_MAXIDLE) {
+               lss.maxidle = tc_cbq_calc_maxidle(bndw, r.rate, lss.avpkt, ewma_log, maxburst);
+               lss.change |= TCF_CBQ_LSS_MAXIDLE;
+               lss.change |= TCF_CBQ_LSS_EWMA|TCF_CBQ_LSS_AVPKT;
+       }
+       if (lss.change&TCF_CBQ_LSS_OFFTIME) {
+               lss.offtime = tc_cbq_calc_offtime(bndw, r.rate, lss.avpkt, ewma_log, minburst);
+               lss.change |= TCF_CBQ_LSS_OFFTIME;
+               lss.change |= TCF_CBQ_LSS_EWMA|TCF_CBQ_LSS_AVPKT;
+       }
+       if (lss.change&TCF_CBQ_LSS_MINIDLE) {
+               lss.minidle <<= lss.ewma_log;
+               lss.change |= TCF_CBQ_LSS_EWMA;
+       }
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+       if (lss.change) {
+               lss.change |= TCF_CBQ_LSS_FLAGS;
+               addattr_l(n, 1024, TCA_CBQ_LSSOPT, &lss, sizeof(lss));
+       }
+       if (wrr_ok)
+               addattr_l(n, 1024, TCA_CBQ_WRROPT, &wrr, sizeof(wrr));
+       if (fopt_ok)
+               addattr_l(n, 1024, TCA_CBQ_FOPT, &fopt, sizeof(fopt));
+       if (r.rate) {
+               addattr_l(n, 1024, TCA_CBQ_RATE, &r, sizeof(r));
+               addattr_l(n, 3024, TCA_CBQ_RTAB, rtab, 1024);
+               if (show_raw) {
+                       int i;
+                       for (i=0; i<256; i++)
+                               printf("%u ", rtab[i]);
+                       printf("\n");
+               }
+       }
+       tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail;
+       return 0;
+}
+
+
+static int cbq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       struct rtattr *tb[TCA_CBQ_MAX+1];
+       struct tc_ratespec *r = NULL;
+       struct tc_cbq_lssopt *lss = NULL;
+       struct tc_cbq_wrropt *wrr = NULL;
+       struct tc_cbq_fopt *fopt = NULL;
+       struct tc_cbq_ovl *ovl = NULL;
+
+       if (opt == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (tb[TCA_CBQ_RATE]) {
+               if (RTA_PAYLOAD(tb[TCA_CBQ_RATE]) < sizeof(*r))
+                       fprintf(stderr, "CBQ: too short rate opt\n");
+               else
+                       r = RTA_DATA(tb[TCA_CBQ_RATE]);
+       }
+       if (tb[TCA_CBQ_LSSOPT]) {
+               if (RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT]) < sizeof(*lss))
+                       fprintf(stderr, "CBQ: too short lss opt\n");
+               else
+                       lss = RTA_DATA(tb[TCA_CBQ_LSSOPT]);
+       }
+       if (tb[TCA_CBQ_WRROPT]) {
+               if (RTA_PAYLOAD(tb[TCA_CBQ_WRROPT]) < sizeof(*wrr))
+                       fprintf(stderr, "CBQ: too short wrr opt\n");
+               else
+                       wrr = RTA_DATA(tb[TCA_CBQ_WRROPT]);
+       }
+       if (tb[TCA_CBQ_FOPT]) {
+               if (RTA_PAYLOAD(tb[TCA_CBQ_FOPT]) < sizeof(*fopt))
+                       fprintf(stderr, "CBQ: too short fopt\n");
+               else
+                       fopt = RTA_DATA(tb[TCA_CBQ_FOPT]);
+       }
+       if (tb[TCA_CBQ_OVL_STRATEGY]) {
+               if (RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY]) < sizeof(*ovl))
+                       fprintf(stderr, "CBQ: too short overlimit strategy %u/%u\n",
+                               RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY]), sizeof(*ovl));
+               else
+                       ovl = RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY]);
+       }
+
+       if (r) {
+               char buf[64];
+               print_rate(buf, sizeof(buf), r->rate);
+               fprintf(f, "rate %s ", buf);
+               if (show_details) {
+                       fprintf(f, "cell %ub ", 1<<r->cell_log);
+                       if (r->mpu)
+                               fprintf(f, "mpu %ub ", r->mpu);
+               }
+       }
+       if (lss && lss->flags) {
+               int comma=0;
+               fprintf(f, "(");
+               if (lss->flags&TCF_CBQ_LSS_BOUNDED) {
+                       fprintf(f, "bounded");
+                       comma=1;
+               }
+               if (lss->flags&TCF_CBQ_LSS_ISOLATED) {
+                       if (comma)
+                               fprintf(f, ",");
+                       fprintf(f, "isolated");
+               }
+               fprintf(f, ") ");
+       }
+       if (wrr) {
+               if (wrr->priority != TC_CBQ_MAXPRIO)
+                       fprintf(f, "prio %u", wrr->priority);
+               else
+                       fprintf(f, "prio no-transmit");
+               if (show_details) {
+                       char buf[64];
+                       fprintf(f, "/%u ", wrr->cpriority);
+                       if (wrr->weight != 1) {
+                               print_rate(buf, sizeof(buf), wrr->weight);
+                               fprintf(f, "weight %s ", buf);
+                       }
+                       if (wrr->allot)
+                               fprintf(f, "allot %ub ", wrr->allot);
+               }
+       }
+       if (lss && show_details) {
+               fprintf(f, "\nlevel %u ewma %u avpkt %ub ", lss->level, lss->ewma_log, lss->avpkt);
+               if (lss->maxidle) {
+                       fprintf(f, "maxidle %luus ", tc_core_tick2usec(lss->maxidle>>lss->ewma_log));
+                       if (show_raw)
+                               fprintf(f, "[%08x] ", lss->maxidle);
+               }
+               if (lss->minidle!=0x7fffffff) {
+                       fprintf(f, "minidle %luus ", tc_core_tick2usec(lss->minidle>>lss->ewma_log));
+                       if (show_raw)
+                               fprintf(f, "[%08x] ", lss->minidle);
+               }
+               if (lss->offtime) {
+                       fprintf(f, "offtime %luus ", tc_core_tick2usec(lss->offtime));
+                       if (show_raw)
+                               fprintf(f, "[%08x] ", lss->offtime);
+               }
+       }
+       if (fopt && show_details) {
+               char buf[64];
+               print_tc_classid(buf, sizeof(buf), fopt->split);
+               fprintf(f, "\nsplit %s ", buf);
+               if (fopt->defmap) {
+                       fprintf(f, "defmap %08x", fopt->defmap);
+               }
+       }
+       return 0;
+}
+
+static int cbq_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       struct tc_cbq_xstats *st;
+
+       if (xstats == NULL)
+               return 0;
+
+       if (RTA_PAYLOAD(xstats) < sizeof(*st))
+               return -1;
+
+       st = RTA_DATA(xstats);
+       fprintf(f, "  borrowed %u overactions %u avgidle %g undertime %g", st->borrows,
+               st->overactions, (double)st->avgidle, (double)st->undertime);
+       return 0;
+}
+
+struct qdisc_util cbq_util = {
+       NULL,
+       "cbq",
+       cbq_parse_opt,
+       cbq_print_opt,
+       cbq_print_xstats,
+
+       cbq_parse_class_opt,
+       cbq_print_opt,
+};
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e2734cda73a2d7a6e191123ea9c91d5a1ad95629 100644 (file)
@@ -0,0 +1,61 @@
+/*
+ * q_csz.c             CSZ.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain()
+{
+       fprintf(stderr, "Usage: ... csz \n");
+}
+
+static void explain1(char *arg)
+{
+       fprintf(stderr, "Illegal \"%s\"\n", arg);
+}
+
+
+#define usage() return(-1)
+
+static int csz_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       return -1;
+}
+
+static int csz_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       return -1;
+}
+
+static int csz_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return -1;
+}
+
+struct qdisc_util csz_util = {
+       NULL,
+       "csz",
+       csz_parse_opt,
+       csz_print_opt,
+       csz_print_xstats,
+};
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8a1cd4d836ea55487e0cae2a630a562a0061b0fd 100644 (file)
@@ -0,0 +1,186 @@
+/*
+ * q_dsmark.c          Differentiated Services field marking.
+ *
+ * Hacked 1998,1999 by Werner Almesberger, EPFL ICA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+
+#define usage() return(-1)
+
+
+static void explain(void)
+{
+       fprintf(stderr,"Usage: dsmark indices INDICES [ default_index "
+           "DEFAULT_INDEX ] [ set_tc_index ]\n");
+}
+
+
+static int dsmark_parse_opt(struct qdisc_util *qu, int argc, char **argv,
+    struct nlmsghdr *n)
+{
+       struct rtattr *tail;
+       __u16 ind;
+       char *end;
+       int dflt,set_tc_index;
+
+       ind = set_tc_index = 0;
+       dflt = -1;
+       while (argc > 0) {
+               if (!strcmp(*argv,"indices")) {
+                       NEXT_ARG();
+                       ind = strtoul(*argv,&end,0);
+                       if (*end) {
+                               explain();
+                               return -1;
+                       }
+               }
+               else if (!strcmp(*argv,"default_index") || !strcmp(*argv,
+                   "default")) {
+                       NEXT_ARG();
+                       dflt = strtoul(*argv,&end,0);
+                       if (*end) {
+                               explain();
+                               return -1;
+                       }
+               }
+               else if (!strcmp(*argv,"set_tc_index")) {
+                       set_tc_index = 1;
+               }
+               else {
+                       explain();
+                       return -1;
+               }
+               argc--;
+               argv++;
+       }
+       if (!ind) {
+               explain();
+               return -1;
+       }
+       tail = (struct rtattr *) (((void *) n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n,1024,TCA_OPTIONS,NULL,0);
+       addattr_l(n,1024,TCA_DSMARK_INDICES,&ind,sizeof(ind));
+       if (dflt != -1) {
+           __u16 tmp = dflt;
+
+           addattr_l(n,1024,TCA_DSMARK_DEFAULT_INDEX,&tmp,sizeof(tmp));
+       }
+       if (set_tc_index) addattr_l(n,1024,TCA_DSMARK_SET_TC_INDEX,NULL,0);
+       tail->rta_len = (((void *) n)+n->nlmsg_len)-(void *) tail;
+       return 0;
+}
+
+
+static void explain_class(void)
+{
+       fprintf(stderr, "Usage: ... dsmark [ mask MASK ] [ value VALUE ]\n");
+}
+
+
+static int dsmark_parse_class_opt(struct qdisc_util *qu, int argc, char **argv,
+   struct nlmsghdr *n)
+{
+       struct rtattr *tail;
+       __u8 tmp;
+       char *end;
+
+       tail = (struct rtattr *) (((void *) n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n,1024,TCA_OPTIONS,NULL,0);
+       while (argc > 0) {
+               if (!strcmp(*argv,"mask")) {
+                       NEXT_ARG();
+                       tmp = strtoul(*argv,&end,0);
+                       if (*end) {
+                               explain_class();
+                               return -1;
+                       }
+                       addattr_l(n,1024,TCA_DSMARK_MASK,&tmp,1);
+               }
+               else if (!strcmp(*argv,"value")) {
+                       NEXT_ARG();
+                       tmp = strtoul(*argv,&end,0);
+                       if (*end) {
+                               explain_class();
+                               return -1;
+                       }
+                       addattr_l(n,1024,TCA_DSMARK_VALUE,&tmp,1);
+               }
+               else {
+                       explain_class();
+                       return -1;
+               }
+               argc--;
+               argv++;
+       }
+       tail->rta_len = (((void *) n)+n->nlmsg_len)-(void *) tail;
+       return 0;
+}
+
+
+
+static int dsmark_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       struct rtattr *tb[TCA_DSMARK_MAX+1];
+
+       if (!opt) return 0;
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_DSMARK_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+       if (tb[TCA_DSMARK_MASK]) {
+               if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK]))
+                       fprintf(stderr,"dsmark: empty mask\n");
+               else fprintf(f,"mask 0x%02x ",
+                           *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK]));
+       }
+       if (tb[TCA_DSMARK_VALUE]) {
+               if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE]))
+                       fprintf(stderr,"dsmark: empty value\n");
+               else fprintf(f,"value 0x%02x ",
+                           *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE]));
+       }
+       if (tb[TCA_DSMARK_INDICES]) {
+               if (RTA_PAYLOAD(tb[TCA_DSMARK_INDICES]) < sizeof(__u16))
+                       fprintf(stderr,"dsmark: indices too short\n");
+               else fprintf(f,"indices 0x%04x ",
+                           *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES]));
+       }
+       if (tb[TCA_DSMARK_DEFAULT_INDEX]) {
+               if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX]) < sizeof(__u16))
+                       fprintf(stderr,"dsmark: default_index too short\n");
+               else fprintf(f,"default_index 0x%04x ",
+                           *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX]));
+       }
+       if (tb[TCA_DSMARK_SET_TC_INDEX]) fprintf(f,"set_tc_index ");
+       return 0;
+}
+
+
+static int dsmark_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return 0;
+}
+
+
+struct qdisc_util dsmark_util = {
+       NULL,
+       "dsmark",
+       dsmark_parse_opt,
+       dsmark_print_opt,
+       dsmark_print_xstats,
+
+       dsmark_parse_class_opt,
+       dsmark_print_opt
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4cb9fdedb544fccde13e1137a8ecf7bc6f33681e 100644 (file)
@@ -0,0 +1,101 @@
+/*
+ * q_fifo.c            FIFO.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... [p|b]fifo [ limit NUMBER ]\n");
+}
+
+#define usage() return(-1)
+
+static int fifo_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       int ok=0;
+       struct tc_fifo_qopt opt;
+       memset(&opt, 0, sizeof(opt));
+
+       while (argc > 0) {
+               if (strcmp(*argv, "limit") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&opt.limit, *argv)) {
+                               fprintf(stderr, "Illegal \"limit\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       if (ok)
+               addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
+       return 0;
+}
+
+static int fifo_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       struct tc_fifo_qopt *qopt;
+
+       if (opt == NULL)
+               return 0;
+
+       if (RTA_PAYLOAD(opt)  < sizeof(*qopt))
+               return -1;
+       qopt = RTA_DATA(opt);
+       if (strcmp(qu->id, "bfifo") == 0) {
+               SPRINT_BUF(b1);
+               fprintf(f, "limit %s", sprint_size(qopt->limit, b1));
+       } else
+               fprintf(f, "limit %up", qopt->limit);
+       return 0;
+}
+
+static int fifo_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return 0;
+}
+
+
+struct qdisc_util bfifo_util = {
+       NULL,
+       "bfifo",
+       fifo_parse_opt,
+       fifo_print_opt,
+       fifo_print_xstats,
+};
+
+struct qdisc_util pfifo_util = {
+       NULL,
+       "pfifo",
+       fifo_parse_opt,
+       fifo_print_opt,
+       fifo_print_xstats,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b63f8ae78198c22948bee363be0204c7facd3166 100644 (file)
@@ -0,0 +1,345 @@
+/*
+ * q_gred.c            GRED.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    J Hadi Salim(hadi@nortelnetworks.com)   
+ *             code ruthlessly ripped from 
+ *            Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+#include "tc_red.h"
+
+
+#if 0
+#define DPRINTF(format,args...) fprintf(stderr,format,##args)
+#else
+#define DPRINTF(format,args...)
+#endif
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... gred DP drop-probability limit BYTES "
+           "min BYTES max BYTES\n");
+       fprintf(stderr, "    avpkt BYTES burst PACKETS probability PROBABILITY "
+           "bandwidth KBPS\n");
+       fprintf(stderr, "    [prio value]\n");
+       fprintf(stderr," OR ...\n");
+       fprintf(stderr," gred setup DPs <num of DPs> default <default DP> "
+           "[grio]\n");
+}
+
+#define usage() return(-1)
+
+static int init_gred(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+
+       struct rtattr *tail;
+       struct tc_gred_sopt opt;
+       memset(&opt, 0, sizeof(struct tc_gred_sopt));
+
+       while (argc > 0) {
+               DPRINTF(stderr,"init_gred: invoked with %s\n",*argv);
+               if (strcmp(*argv, "DPs") == 0) {
+                       NEXT_ARG();
+                       DPRINTF(stderr,"init_gred: next_arg with %s\n",*argv);
+                       opt.DPs=strtol(*argv, (char **)NULL, 10);
+                       if (opt.DPs >MAX_DPs) { /* need a better error check */
+                               fprintf(stderr, "DPs =%u \n",opt.DPs);
+                               fprintf(stderr, "Illegal \"DPs\"\n");
+                               fprintf(stderr, "GRED: only %d DPs are "
+                                   "currently supported\n",MAX_DPs);
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "default") == 0) {
+                       NEXT_ARG();
+                       opt.def_DP=strtol(*argv, (char **)NULL, 10);
+                       if (!opt.DPs) {
+                               fprintf(stderr, "\"default DP\" must be "
+                                   "defined after DPs\n");
+                               return -1;
+                       }
+#if 0
+                       if (opt.def_DP>opt.DPs-1) {
+#endif
+                       if (opt.def_DP>opt.DPs) {
+/*
+                               fprintf(stderr, "\"default DP\" must be less than %d\nNote: DP runs from 0 to %d for %d DPs\n",opt.DPs,opt.DPs-1,opt.DPs);
+*/
+                               fprintf(stderr, "\"default DP\" must be less than %d\n",opt.DPs);
+                               return -1;
+                       }
+               } else if (strcmp(*argv, "grio") == 0) {
+                       opt.grio=1;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+}
+
+if ((!opt.DPs) || (!opt.def_DP))
+{
+       fprintf(stderr, "Illegal gred setup parameters \n");
+                       return -1;
+}
+DPRINTF("TC_GRED: sending DPs=%d default=%d\n",opt.DPs,opt.def_DP);
+       n->nlmsg_flags|=NLM_F_CREATE;
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+
+       addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+       addattr_l(n, 1024, TCA_GRED_DPS, &opt, sizeof(struct tc_gred_sopt));
+       tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail;
+return 0;
+}
+/*
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+*/
+static int gred_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       int ok=0;
+       struct tc_gred_qopt opt;
+       unsigned burst = 0;
+       unsigned avpkt = 0;
+       double probability = 0.02;
+       unsigned rate = 0;
+       int wlog;
+       __u8 sbuf[256];
+       struct rtattr *tail;
+
+       memset(&opt, 0, sizeof(opt));
+
+       while (argc > 0) {
+               if (strcmp(*argv, "limit") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&opt.limit, *argv)) {
+                               fprintf(stderr, "Illegal \"limit\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "setup") == 0) {
+                       if (ok) {
+                               fprintf(stderr, "Illegal \"setup\"\n");
+                               return -1;
+                       }
+               return init_gred(qu,argc-1, argv+1,n);
+                       
+               } else if (strcmp(*argv, "min") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&opt.qth_min, *argv)) {
+                               fprintf(stderr, "Illegal \"min\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "max") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&opt.qth_max, *argv)) {
+                               fprintf(stderr, "Illegal \"max\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "DP") == 0) {
+                       NEXT_ARG();
+                       opt.DP=strtol(*argv, (char **)NULL, 10);
+                       DPRINTF ("\n ******* DP =%u\n",opt.DP);
+                       if (opt.DP >MAX_DPs) { /* need a better error check */
+                               fprintf(stderr, "DP =%u \n",opt.DP);
+                               fprintf(stderr, "Illegal \"DP\"\n");
+                               fprintf(stderr, "GRED: only %d DPs are currently supported\n",MAX_DPs);
+                               return -1;
+                       }
+#if 0
+                               return -1;
+                       }
+#endif
+                       ok++;
+               } else if (strcmp(*argv, "burst") == 0) {
+                       NEXT_ARG();
+                        if (get_unsigned(&burst, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"burst\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "avpkt") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&avpkt, *argv)) {
+                               fprintf(stderr, "Illegal \"avpkt\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "probability") == 0) {
+                       NEXT_ARG();
+                       if (sscanf(*argv, "%lg", &probability) != 1) {
+                               fprintf(stderr, "Illegal \"probability\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "prio") == 0) {
+                       NEXT_ARG();
+                       opt.prio=strtol(*argv, (char **)NULL, 10);
+                       /* some error check here */
+                       ok++;
+               } else if (strcmp(*argv, "bandwidth") == 0) {
+                       NEXT_ARG();
+                       if (get_rate(&rate, *argv)) {
+                               fprintf(stderr, "Illegal \"bandwidth\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       if (!ok)
+               return 0;
+
+       if (rate == 0)
+               get_rate(&rate, "10Mbit");
+
+       if (!opt.qth_min || !opt.qth_max || !burst || !opt.limit || !avpkt ||
+           (opt.DP<0)) {
+               fprintf(stderr, "Required parameter (min, max, burst, limit, "
+                   "avpket, DP) is missing\n");
+               return -1;
+       }
+
+       if ((wlog = tc_red_eval_ewma(opt.qth_min, burst, avpkt)) < 0) {
+               fprintf(stderr, "GRED: failed to calculate EWMA constant.\n");
+               return -1;
+       }
+       if (wlog >= 10)
+               fprintf(stderr, "GRED: WARNING. Burst %d seems to be to "
+                   "large.\n", burst);
+       opt.Wlog = wlog;
+       if ((wlog = tc_red_eval_P(opt.qth_min, opt.qth_max, probability)) < 0) {
+               fprintf(stderr, "GRED: failed to calculate probability.\n");
+               return -1;
+       }
+       opt.Plog = wlog;
+       if ((wlog = tc_red_eval_idle_damping(opt.Wlog, avpkt, rate, sbuf)) < 0)
+           {
+               fprintf(stderr, "GRED: failed to calculate idle damping "
+                   "table.\n");
+               return -1;
+       }
+       opt.Scell_log = wlog;
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+
+       addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+       addattr_l(n, 1024, TCA_GRED_PARMS, &opt, sizeof(opt));
+       addattr_l(n, 1024, TCA_GRED_STAB, sbuf, 256);
+       tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail;
+       return 0;
+}
+
+static int gred_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       struct rtattr *tb[TCA_GRED_STAB+1];
+       struct tc_gred_qopt *qopt;
+       int i;
+       SPRINT_BUF(b1);
+       SPRINT_BUF(b2);
+       SPRINT_BUF(b3);
+       SPRINT_BUF(b4);
+       SPRINT_BUF(b5);
+
+       if (opt == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (tb[TCA_GRED_PARMS] == NULL)
+               return -1;
+#if 0
+       sopt = RTA_DATA(tb[TCA_GRED_DPS]);
+       if (RTA_PAYLOAD(tb[TCA_GRED_DPS])  < sizeof(*sopt)) {
+               printf("\n GRED DPs message smaller than expected\n");
+               return -1;
+               }
+         
+       DPRINTF(f, "\n\tDPs:%d Default DP %d\n ",
+               sopt->DPs, sopt->def_DP);
+#endif
+       qopt = RTA_DATA(tb[TCA_GRED_PARMS]);
+       if (RTA_PAYLOAD(tb[TCA_GRED_PARMS])  < sizeof(*qopt)*MAX_DPs) {
+               fprintf(f,"\n GRED received message smaller than expected\n");
+               return -1;
+               }
+         
+
+#if 0
+
+       for (i=0;i<sopt->DPs;i++)
+#endif
+/* Bad hack! should really return a proper message as shown above*/
+
+       for (i=0;i<MAX_DPs;i++, qopt++) {
+               if (qopt->DP >= MAX_DPs) continue;
+               fprintf(f, "\n DP:%d (prio %d) Average Queue %s Measured "
+                   "Queue %s  ",
+                       qopt->DP,
+                       qopt->prio,
+                       sprint_size(qopt->qave, b4),
+                       sprint_size(qopt->backlog, b5));
+               fprintf(f, "\n\t Packet drops: %d (forced %d early %d)  ",
+                       qopt->forced+qopt->early,
+                       qopt->forced,
+                       qopt->early);
+               fprintf(f, "\n\t Packet totals: %u (bytes %u)  ",
+                       qopt->packets,
+                       qopt->bytesin);
+               if (show_details)
+                       fprintf(f, "\n limit %s min %s max %s ",
+                               sprint_size(qopt->limit, b1),
+                               sprint_size(qopt->qth_min, b2),
+                               sprint_size(qopt->qth_max, b3));
+                               fprintf(f, "ewma %u Plog %u Scell_log %u",
+                                   qopt->Wlog, qopt->Plog, qopt->Scell_log);
+       }
+       return 0;
+}
+
+static int gred_print_xstats(struct qdisc_util *qu, FILE *f,
+    struct rtattr *xstats)
+{
+       return 0;
+}
+
+
+struct qdisc_util gred_util = {
+       NULL,
+       "gred",
+       gred_parse_opt,
+       gred_print_opt,
+       gred_print_xstats,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b9b7b7517cf6014041d16ee3d30b1a0563f3d1b5 100644 (file)
@@ -0,0 +1,61 @@
+/*
+ * q_hfsc.c            HFSC.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain()
+{
+       fprintf(stderr, "Usage: ... hfsc \n");
+}
+
+static void explain1(char *arg)
+{
+       fprintf(stderr, "Illegal \"%s\"\n", arg);
+}
+
+
+#define usage() return(-1)
+
+static int hfsc_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       return -1;
+}
+
+static int hfsc_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       return -1;
+}
+
+static int hfsc_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return -1;
+}
+
+struct qdisc_util hfsc_util = {
+       NULL,
+       "hfsc",
+       hfsc_parse_opt,
+       hfsc_print_opt,
+       hfsc_print_xstats,
+};
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c2963669c7dbe599c3ce9b6f6d6e66ae318a1ff2 100644 (file)
@@ -0,0 +1,61 @@
+/*
+ * q_hpfq.c            HPFQ.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain()
+{
+       fprintf(stderr, "Usage: ... hpfq \n");
+}
+
+static void explain1(char *arg)
+{
+       fprintf(stderr, "Illegal \"%s\"\n", arg);
+}
+
+
+#define usage() return(-1)
+
+static int hpfq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       return -1;
+}
+
+static int hpfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       return -1;
+}
+
+static int hpfq_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return -1;
+}
+
+struct qdisc_util hpfq_util = {
+       NULL,
+       "hpfq",
+       hpfq_parse_opt,
+       hpfq_print_opt,
+       hpfq_print_xstats,
+};
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0a08906269b4dff0c9cad47a9ecf242cf83ff30b 100644 (file)
@@ -0,0 +1,76 @@
+/*
+ *
+ * q_ingress.c             INGRESS.
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Authors:    J Hadi Salim
+ *
+ * This is here just in case it is needed
+ * useless right now; might be useful in the future
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... ingress \n");
+}
+
+#define usage() return(-1)
+
+static int ingress_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+
+       if (argc > 0) {
+               while (argc > 0) {
+
+                       if (strcmp(*argv, "handle") == 0) {
+                               NEXT_ARG();
+                               argc--; argv++;
+                       } else {
+                               fprintf(stderr, "What is \"%s\"?\n", *argv);
+                               explain();
+                               return -1;
+                       }
+               }
+       }
+
+       addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+       return 0;
+}
+
+static int ingress_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+
+               fprintf(f, "---------------- ");
+       return 0;
+}
+
+static int ingress_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return 0;
+}
+
+struct qdisc_util ingress_util = {
+        NULL,
+        "ingress",
+        ingress_parse_opt,
+        ingress_print_opt,
+        ingress_print_xstats,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..ddda601b9f07348ddc695253229e66238053b650 100644 (file)
@@ -0,0 +1,127 @@
+/*
+ * q_prio.c            PRIO.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *
+ * Ole Husgaard <sparre@login.dknet.dk>: 990513: prio2band map was always reset.
+ * J Hadi Salim <hadi@cyberus.ca>: 990609: priomap fix.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... prio bands NUMBER priomap P1 P2...\n");
+}
+
+#define usage() return(-1)
+
+static int prio_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       int ok=0;
+       int pmap_mode = 0;
+       int idx = 0;
+       struct tc_prio_qopt opt={3,{ 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }};
+
+       while (argc > 0) {
+               if (strcmp(*argv, "bands") == 0) {
+                       if (pmap_mode)
+                               explain();
+                       NEXT_ARG();
+                       if (get_integer(&opt.bands, *argv, 10)) {
+                               fprintf(stderr, "Illegal \"bands\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "priomap") == 0) {
+                       if (pmap_mode) {
+                               fprintf(stderr, "Error: duplicate priomap\n");
+                               return -1;
+                       }
+                       pmap_mode = 1;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       unsigned band;
+                       if (!pmap_mode) {
+                               fprintf(stderr, "What is \"%s\"?\n", *argv);
+                               explain();
+                               return -1;
+                       }
+                       if (get_unsigned(&band, *argv, 10)) {
+                               fprintf(stderr, "Illegal \"priomap\" element\n");
+                               return -1;
+                       }
+                       if (band > opt.bands) {
+                               fprintf(stderr, "\"priomap\" element is out of bands\n");
+                               return -1;
+                       }
+                       if (idx > TC_PRIO_MAX) {
+                               fprintf(stderr, "\"priomap\" index > TC_PRIO_MAX=%u\n", TC_PRIO_MAX);
+                               return -1;
+                       }
+                       opt.priomap[idx++] = band;
+               }
+               argc--; argv++;
+       }
+
+/*
+       if (pmap_mode) {
+               for (; idx < TC_PRIO_MAX; idx++)
+                       opt.priomap[idx] = opt.priomap[TC_PRIO_BESTEFFORT];
+       }
+*/
+       addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
+       return 0;
+}
+
+static int prio_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       int i;
+       struct tc_prio_qopt *qopt;
+
+       if (opt == NULL)
+               return 0;
+
+       if (RTA_PAYLOAD(opt)  < sizeof(*qopt))
+               return -1;
+       qopt = RTA_DATA(opt);
+       fprintf(f, "bands %u priomap ", qopt->bands);
+       for (i=0; i<=TC_PRIO_MAX; i++)
+               fprintf(f, " %d", qopt->priomap[i]);
+       return 0;
+}
+
+static int prio_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return 0;
+}
+
+
+struct qdisc_util prio_util = {
+       NULL,
+       "prio",
+       prio_parse_opt,
+       prio_print_opt,
+       prio_print_xstats,
+};
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c156d47cd6afaed788b91b0091d47c7b9aedbce7 100644 (file)
@@ -0,0 +1,222 @@
+/*
+ * q_red.c             RED.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+#include "tc_red.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... red limit BYTES min BYTES max BYTES avpkt BYTES burst PACKETS\n");
+       fprintf(stderr, "               probability PROBABILITY bandwidth KBPS [ ecn ]\n");
+}
+
+#define usage() return(-1)
+
+static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       int ok=0;
+       struct tc_red_qopt opt;
+       unsigned burst = 0;
+       unsigned avpkt = 0;
+       double probability = 0.02;
+       unsigned rate = 0;
+       int ecn_ok = 0;
+       int wlog;
+       __u8 sbuf[256];
+       struct rtattr *tail;
+
+       memset(&opt, 0, sizeof(opt));
+
+       while (argc > 0) {
+               if (strcmp(*argv, "limit") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&opt.limit, *argv)) {
+                               fprintf(stderr, "Illegal \"limit\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "min") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&opt.qth_min, *argv)) {
+                               fprintf(stderr, "Illegal \"min\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "max") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&opt.qth_max, *argv)) {
+                               fprintf(stderr, "Illegal \"max\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "burst") == 0) {
+                       NEXT_ARG();
+                       if (get_unsigned(&burst, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"burst\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "avpkt") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&avpkt, *argv)) {
+                               fprintf(stderr, "Illegal \"avpkt\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "probability") == 0) {
+                       NEXT_ARG();
+                       if (sscanf(*argv, "%lg", &probability) != 1) {
+                               fprintf(stderr, "Illegal \"probability\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "bandwidth") == 0) {
+                       NEXT_ARG();
+                       if (get_rate(&rate, *argv)) {
+                               fprintf(stderr, "Illegal \"bandwidth\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "ecn") == 0) {
+                       ecn_ok = 1;
+                       ok++;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       if (!ok)
+               return 0;
+
+       if (rate == 0)
+               get_rate(&rate, "10Mbit");
+
+       if (!opt.qth_min || !opt.qth_max || !burst || !opt.limit || !avpkt) {
+               fprintf(stderr, "Required parameter (min, max, burst, limit, avpket) is missing\n");
+               return -1;
+       }
+
+       if ((wlog = tc_red_eval_ewma(opt.qth_min, burst, avpkt)) < 0) {
+               fprintf(stderr, "RED: failed to calculate EWMA constant.\n");
+               return -1;
+       }
+       if (wlog >= 10)
+               fprintf(stderr, "RED: WARNING. Burst %d seems to be to large.\n", burst);
+       opt.Wlog = wlog;
+       if ((wlog = tc_red_eval_P(opt.qth_min, opt.qth_max, probability)) < 0) {
+               fprintf(stderr, "RED: failed to calculate probability.\n");
+               return -1;
+       }
+       opt.Plog = wlog;
+       if ((wlog = tc_red_eval_idle_damping(opt.Wlog, avpkt, rate, sbuf)) < 0) {
+               fprintf(stderr, "RED: failed to calculate idle damping table.\n");
+               return -1;
+       }
+       opt.Scell_log = wlog;
+       if (ecn_ok) {
+#ifdef TC_RED_ECN
+               opt.flags |= TC_RED_ECN;
+#else
+               fprintf(stderr, "RED: ECN support is missing in this binary.\n");
+               return -1;
+#endif
+       }
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+
+       addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+       addattr_l(n, 1024, TCA_RED_PARMS, &opt, sizeof(opt));
+       addattr_l(n, 1024, TCA_RED_STAB, sbuf, 256);
+       tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail;
+       return 0;
+}
+
+static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       struct rtattr *tb[TCA_RED_STAB+1];
+       struct tc_red_qopt *qopt;
+       SPRINT_BUF(b1);
+       SPRINT_BUF(b2);
+       SPRINT_BUF(b3);
+
+       if (opt == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_RED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (tb[TCA_RED_PARMS] == NULL)
+               return -1;
+       qopt = RTA_DATA(tb[TCA_RED_PARMS]);
+       if (RTA_PAYLOAD(tb[TCA_RED_PARMS])  < sizeof(*qopt))
+               return -1;
+       fprintf(f, "limit %s min %s max %s ",
+               sprint_size(qopt->limit, b1),
+               sprint_size(qopt->qth_min, b2),
+               sprint_size(qopt->qth_max, b3));
+#ifdef TC_RED_ECN
+       if (qopt->flags & TC_RED_ECN)
+               fprintf(f, "ecn ");
+#endif
+       if (show_details) {
+               fprintf(f, "ewma %u Plog %u Scell_log %u",
+                       qopt->Wlog, qopt->Plog, qopt->Scell_log);
+       }
+       return 0;
+}
+
+static int red_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+#ifdef TC_RED_ECN
+       struct tc_red_xstats *st;
+
+       if (xstats == NULL)
+               return 0;
+
+       if (RTA_PAYLOAD(xstats) < sizeof(*st))
+               return -1;
+
+       st = RTA_DATA(xstats);
+       fprintf(f, "  marked %u early %u pdrop %u other %u",
+               st->marked, st->early, st->pdrop, st->other);
+       return 0;
+       
+#endif
+       return 0;
+}
+
+
+struct qdisc_util red_util = {
+       NULL,
+       "red",
+       red_parse_opt,
+       red_print_opt,
+       red_print_xstats,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d7a3c0fafaf656a0319f7637ae1a1f64ea675087 100644 (file)
@@ -0,0 +1,115 @@
+/*
+ * q_sfq.c             SFQ.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... sfq [ limit NUMBER ] [ perturb SECS ] [ quantum BYTES ]\n");
+}
+
+#define usage() return(-1)
+
+static int sfq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       int ok=0;
+       struct tc_sfq_qopt opt;
+
+       memset(&opt, 0, sizeof(opt));
+
+       while (argc > 0) {
+               if (strcmp(*argv, "quantum") == 0) {
+                       NEXT_ARG();
+                       if (get_size(&opt.quantum, *argv)) {
+                               fprintf(stderr, "Illegal \"limit\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "perturb") == 0) {
+                       NEXT_ARG();
+                       if (get_integer(&opt.perturb_period, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"perturb\"\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "limit") == 0) {
+                       NEXT_ARG();
+                       if (get_u32(&opt.limit, *argv, 0)) {
+                               fprintf(stderr, "Illegal \"limit\"\n");
+                               return -1;
+                       }
+                       if (opt.limit < 2) {
+                               fprintf(stderr, "Illegal \"limit\", must be > 1\n");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       if (ok)
+               addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
+       return 0;
+}
+
+static int sfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       struct tc_sfq_qopt *qopt;
+       SPRINT_BUF(b1);
+
+       if (opt == NULL)
+               return 0;
+
+       if (RTA_PAYLOAD(opt)  < sizeof(*qopt))
+               return -1;
+       qopt = RTA_DATA(opt);
+       fprintf(f, "limit %up ", qopt->limit);
+       fprintf(f, "quantum %s ", sprint_size(qopt->quantum, b1));
+       if (show_details) {
+               fprintf(f, "flows %u/%u ", qopt->flows, qopt->divisor);
+       }
+       if (qopt->perturb_period)
+               fprintf(f, "perturb %dsec ", qopt->perturb_period);
+       return 0;
+}
+
+static int sfq_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return 0;
+}
+
+
+struct qdisc_util sfq_util = {
+       NULL,
+       "sfq",
+       sfq_parse_opt,
+       sfq_print_opt,
+       sfq_print_xstats,
+};
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..01d514fbc605c3cc5cd5e9197890f238e1016dbc 100644 (file)
@@ -0,0 +1,272 @@
+/*
+ * q_tbf.c             TBF.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... tbf limit BYTES burst BYTES[/BYTES] rate KBPS [ mtu BYTES[/BYTES] ]\n");
+       fprintf(stderr, "               [ peakrate KBPS ] [ latency TIME ]\n");
+}
+
+static void explain1(char *arg)
+{
+       fprintf(stderr, "Illegal \"%s\"\n", arg);
+}
+
+
+#define usage() return(-1)
+
+static int tbf_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       int ok=0;
+       struct tc_tbf_qopt opt;
+       __u32 rtab[256];
+       __u32 ptab[256];
+       unsigned buffer=0, mtu=0, mpu=0, latency=0;
+       int Rcell_log=-1, Pcell_log = -1; 
+       struct rtattr *tail;
+
+       memset(&opt, 0, sizeof(opt));
+
+       while (argc > 0) {
+               if (matches(*argv, "limit") == 0) {
+                       NEXT_ARG();
+                       if (opt.limit || latency) {
+                               fprintf(stderr, "Double \"limit/latency\" spec\n");
+                               return -1;
+                       }
+                       if (get_size(&opt.limit, *argv)) {
+                               explain1("limit");
+                               return -1;
+                       }
+                       ok++;
+               } else if (matches(*argv, "latency") == 0) {
+                       NEXT_ARG();
+                       if (opt.limit || latency) {
+                               fprintf(stderr, "Double \"limit/latency\" spec\n");
+                               return -1;
+                       }
+                       if (get_usecs(&latency, *argv)) {
+                               explain1("latency");
+                               return -1;
+                       }
+                       ok++;
+               } else if (matches(*argv, "burst") == 0 ||
+                       strcmp(*argv, "buffer") == 0 ||
+                       strcmp(*argv, "maxburst") == 0) {
+                       NEXT_ARG();
+                       if (buffer) {
+                               fprintf(stderr, "Double \"buffer/burst\" spec\n");
+                               return -1;
+                       }
+                       if (get_size_and_cell(&buffer, &Rcell_log, *argv) < 0) {
+                               explain1("buffer");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "mtu") == 0 ||
+                          strcmp(*argv, "minburst") == 0) {
+                       NEXT_ARG();
+                       if (mtu) {
+                               fprintf(stderr, "Double \"mtu/minburst\" spec\n");
+                               return -1;
+                       }
+                       if (get_size_and_cell(&mtu, &Pcell_log, *argv) < 0) {
+                               explain1("mtu");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "mpu") == 0) {
+                       NEXT_ARG();
+                       if (mpu) {
+                               fprintf(stderr, "Double \"mpu\" spec\n");
+                               return -1;
+                       }
+                       if (get_size(&mpu, *argv)) {
+                               explain1("mpu");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "rate") == 0) {
+                       NEXT_ARG();
+                       if (opt.rate.rate) {
+                               fprintf(stderr, "Double \"rate\" spec\n");
+                               return -1;
+                       }
+                       if (get_rate(&opt.rate.rate, *argv)) {
+                               explain1("rate");
+                               return -1;
+                       }
+                       ok++;
+               } else if (matches(*argv, "peakrate") == 0) {
+                       NEXT_ARG();
+                       if (opt.peakrate.rate) {
+                               fprintf(stderr, "Double \"peakrate\" spec\n");
+                               return -1;
+                       }
+                       if (get_rate(&opt.peakrate.rate, *argv)) {
+                               explain1("peakrate");
+                               return -1;
+                       }
+                       ok++;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       if (!ok)
+               return 0;
+
+       if (opt.rate.rate == 0 || !buffer) {
+               fprintf(stderr, "Both \"rate\" and \"burst\" are required.\n");
+               return -1;
+       }
+       if (opt.peakrate.rate) {
+               if (!mtu) {
+                       fprintf(stderr, "\"mtu\" is required, if \"peakrate\" is requested.\n");
+                       return -1;
+               }
+       }
+
+       if (opt.limit == 0 && latency == 0) {
+               fprintf(stderr, "Either \"limit\" or \"latency\" are required.\n");
+               return -1;
+       }
+
+       if (opt.limit == 0) {
+               double lim = opt.rate.rate*(double)latency/1000000 + buffer;
+               if (opt.peakrate.rate) {
+                       double lim2 = opt.peakrate.rate*(double)latency/1000000 + mtu;
+                       if (lim2 < lim)
+                               lim = lim2;
+               }
+               opt.limit = lim;
+       }
+
+       if ((Rcell_log = tc_calc_rtable(opt.rate.rate, rtab, Rcell_log, mtu, mpu)) < 0) {
+               fprintf(stderr, "TBF: failed to calculate rate table.\n");
+               return -1;
+       }
+       opt.buffer = tc_calc_xmittime(opt.rate.rate, buffer);
+       opt.rate.cell_log = Rcell_log;
+       opt.rate.mpu = mpu;
+       if (opt.peakrate.rate) {
+               if ((Pcell_log = tc_calc_rtable(opt.peakrate.rate, ptab, Pcell_log, mtu, mpu)) < 0) {
+                       fprintf(stderr, "TBF: failed to calculate peak rate table.\n");
+                       return -1;
+               }
+               opt.mtu = tc_calc_xmittime(opt.peakrate.rate, mtu);
+               opt.peakrate.cell_log = Pcell_log;
+               opt.peakrate.mpu = mpu;
+       }
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+       addattr_l(n, 2024, TCA_TBF_PARMS, &opt, sizeof(opt));
+       addattr_l(n, 3024, TCA_TBF_RTAB, rtab, 1024);
+       if (opt.peakrate.rate)
+               addattr_l(n, 4096, TCA_TBF_PTAB, ptab, 1024);
+       tail->rta_len = (((void*)n)+NLMSG_ALIGN(n->nlmsg_len)) - (void*)tail;
+       return 0;
+}
+
+static int tbf_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       struct rtattr *tb[TCA_TBF_PTAB+1];
+       struct tc_tbf_qopt *qopt;
+       double buffer, mtu;
+       double latency;
+       SPRINT_BUF(b1);
+       SPRINT_BUF(b2);
+
+       if (opt == NULL)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_TBF_PTAB, RTA_DATA(opt), RTA_PAYLOAD(opt));
+
+       if (tb[TCA_TBF_PARMS] == NULL)
+               return -1;
+
+       qopt = RTA_DATA(tb[TCA_TBF_PARMS]);
+       if (RTA_PAYLOAD(tb[TCA_TBF_PARMS])  < sizeof(*qopt))
+               return -1;
+       fprintf(f, "rate %s ", sprint_rate(qopt->rate.rate, b1));
+       buffer = ((double)qopt->rate.rate*tc_core_tick2usec(qopt->buffer))/1000000;
+       if (show_details) {
+               fprintf(f, "burst %s/%u mpu %s ", sprint_size(buffer, b1),
+                       1<<qopt->rate.cell_log, sprint_size(qopt->rate.mpu, b2));
+       } else {
+               fprintf(f, "burst %s ", sprint_size(buffer, b1));
+       }
+       if (show_raw)
+               fprintf(f, "[%08x] ", qopt->buffer);
+       if (qopt->peakrate.rate) {
+               fprintf(f, "peakrate %s ", sprint_rate(qopt->peakrate.rate, b1));
+               if (qopt->mtu || qopt->peakrate.mpu) {
+                       mtu = ((double)qopt->peakrate.rate*tc_core_tick2usec(qopt->mtu))/1000000;
+                       if (show_details) {
+                               fprintf(f, "mtu %s/%u mpu %s ", sprint_size(mtu, b1),
+                                       1<<qopt->peakrate.cell_log, sprint_size(qopt->peakrate.mpu, b2));
+                       } else {
+                               fprintf(f, "minburst %s ", sprint_size(mtu, b1));
+                       }
+                       if (show_raw)
+                               fprintf(f, "[%08x] ", qopt->mtu);
+               }
+       }
+
+       if (show_raw)
+               fprintf(f, "limit %s ", sprint_size(qopt->limit, b1));
+
+       latency = 1000000*(qopt->limit/(double)qopt->rate.rate) - tc_core_tick2usec(qopt->buffer);
+       if (qopt->peakrate.rate) {
+               double lat2 = 1000000*(qopt->limit/(double)qopt->peakrate.rate) - tc_core_tick2usec(qopt->mtu);
+               if (lat2 > latency)
+                       latency = lat2;
+       }
+       fprintf(f, "lat %s ", sprint_usecs(tc_core_tick2usec(latency), b1));
+
+       return 0;
+}
+
+static int tbf_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+       return 0;
+}
+
+struct qdisc_util tbf_util = {
+       NULL,
+       "tbf",
+       tbf_parse_opt,
+       tbf_print_opt,
+       tbf_print_xstats,
+};
+
diff --git a/tc/tc.c b/tc/tc.c
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..35b3a95cfc87dc4e830620630eeaca856d74d885 100644 (file)
--- a/tc/tc.c
+++ b/tc/tc.c
@@ -0,0 +1,306 @@
+/*
+ * tc.c                "tc" utility frontend.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Fixes:
+ *
+ * Petri Mattila <petri@prihateam.fi> 990308: wrong memset's resulted in faults
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <errno.h>
+
+#include "SNAPSHOT.h"
+#include "utils.h"
+#include "tc_util.h"
+#include "tc_common.h"
+
+int show_stats = 0;
+int show_details = 0;
+int show_raw = 0;
+int resolve_hosts = 0;
+
+void *BODY;
+static struct qdisc_util * qdisc_list;
+static struct filter_util * filter_list;
+
+static int print_noqopt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+       if (opt && RTA_PAYLOAD(opt))
+               fprintf(f, "[Unknown qdisc, optlen=%u] ", RTA_PAYLOAD(opt));
+       return 0;
+}
+
+static int parse_noqopt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+       if (argc) {
+               fprintf(stderr, "Unknown qdisc \"%s\", hence option \"%s\" is unparsable\n", qu->id, *argv);
+               return -1;
+       }
+       return 0;
+}
+
+static int print_nofopt(struct filter_util *qu, FILE *f, struct rtattr *opt, __u32 fhandle)
+{
+       if (opt && RTA_PAYLOAD(opt))
+               fprintf(f, "fh %08x [Unknown filter, optlen=%u] ", fhandle, RTA_PAYLOAD(opt));
+       else if (fhandle)
+               fprintf(f, "fh %08x ", fhandle);
+       return 0;
+}
+
+static int parse_nofopt(struct filter_util *qu, char *fhandle, int argc, char **argv, struct nlmsghdr *n)
+{
+       __u32 handle;
+
+       if (argc) {
+               fprintf(stderr, "Unknown filter \"%s\", hence option \"%s\" is unparsable\n", qu->id, *argv);
+               return -1;
+       }
+       if (fhandle) {
+               struct tcmsg *t = NLMSG_DATA(n);
+               if (get_u32(&handle, fhandle, 16)) {
+                       fprintf(stderr, "Unparsable filter ID \"%s\"\n", fhandle);
+                       return -1;
+               }
+               t->tcm_handle = handle;
+       }
+       return 0;
+}
+
+#if 0
+/* Builtin filter types */
+
+static int f_parse_noopt(struct filter_util *qu, char *fhandle, int argc, char **argv, struct nlmsghdr *n)
+{
+       if (argc || fhandle) {
+               fprintf(stderr, "Filter \"%s\" has no options.\n", qu->id);
+               return -1;
+       }
+       return 0;
+}
+#endif
+
+struct qdisc_util *get_qdisc_kind(char *str)
+{
+       void *dlh;
+       char buf[256];
+       struct qdisc_util *q;
+
+       for (q = qdisc_list; q; q = q->next)
+               if (strcmp(q->id, str) == 0)
+                       return q;
+
+       snprintf(buf, sizeof(buf), "q_%s.so", str);
+       dlh = dlopen(buf, RTLD_LAZY);
+       if (dlh == NULL) {
+               dlh = BODY;
+               if (dlh == NULL) {
+                       dlh = BODY = dlopen(NULL, RTLD_LAZY);
+                       if (dlh == NULL)
+                               goto noexist;
+               }
+       }
+
+       snprintf(buf, sizeof(buf), "%s_util", str);
+       q = dlsym(dlh, buf);
+       if (q == NULL)
+               goto noexist;
+
+reg:
+       q->next = qdisc_list;
+       qdisc_list = q;
+       return q;
+
+noexist:
+       q = malloc(sizeof(*q));
+       if (q) {
+               memset(q, 0, sizeof(*q));
+               strncpy(q->id, str, 15);
+               q->parse_qopt = parse_noqopt;
+               q->print_qopt = print_noqopt;
+               goto reg;
+       }
+       return q;
+}
+
+
+struct filter_util *get_filter_kind(char *str)
+{
+       void *dlh;
+       char buf[256];
+       struct filter_util *q;
+
+       for (q = filter_list; q; q = q->next)
+               if (strcmp(q->id, str) == 0)
+                       return q;
+
+       snprintf(buf, sizeof(buf), "f_%s.so", str);
+       dlh = dlopen(buf, RTLD_LAZY);
+       if (dlh == NULL) {
+               dlh = BODY;
+               if (dlh == NULL) {
+                       dlh = BODY = dlopen(NULL, RTLD_LAZY);
+                       if (dlh == NULL)
+                               goto noexist;
+               }
+       }
+
+       snprintf(buf, sizeof(buf), "%s_util", str);
+       q = dlsym(dlh, buf);
+       if (q == NULL)
+               goto noexist;
+
+reg:
+       q->next = filter_list;
+       filter_list = q;
+       return q;
+
+noexist:
+       q = malloc(sizeof(*q));
+       if (q) {
+               memset(q, 0, sizeof(*q));
+               strncpy(q->id, str, 15);
+               q->parse_fopt = parse_nofopt;
+               q->print_fopt = print_nofopt;
+               goto reg;
+       }
+       return q;
+}
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n"
+                       "where  OBJECT := { qdisc | class | filter }\n"
+                       "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -b[atch] file }\n");
+       exit(-1);
+}
+
+
+
+int main(int argc, char **argv)
+{
+       char *basename;
+
+       basename = strrchr(argv[0], '/');
+       if (basename == NULL)
+               basename = argv[0];
+       else
+               basename++;
+       
+
+       /* batch mode */
+       if (argc > 1 && matches(argv[1], "-batch") == 0) {
+               FILE *batch;
+               char line[400];
+               char *largv[100];
+               int largc, ret=0;
+#define        BMAXARG (sizeof(largv)/sizeof(char *)-2)
+
+               if (argc != 3) {
+                       fprintf(stderr, "Wrong number of arguments in batch mode\n");
+                       exit(-1);
+               }
+               if (matches(argv[2], "-") != 0) {
+                       if ((batch = fopen(argv[2], "r")) == NULL) {
+                               fprintf(stderr, "Cannot open file \"%s\" for reading: %s=n", argv[2], strerror(errno));
+                               exit(-1);
+                       }
+               } else {
+                       if ((batch = fdopen(0, "r")) == NULL) {
+                               fprintf(stderr, "Cannot open stdin for reading: %s=n", strerror(errno));
+                               exit(-1);
+                       }
+               }
+
+               tc_core_init();
+
+               while (fgets(line, sizeof(line)-1, batch)) {
+                       if (line[strlen(line)-1]=='\n') {
+                               line[strlen(line)-1] = '\0';
+                       } else {
+                               fprintf(stderr, "No newline at the end of line, looks like to long (%d chars or more)\n", strlen(line));
+                               exit(-1);
+                       }
+                       largc = 0;
+                       largv[largc]=strtok(line, " ");
+                       while ((largv[++largc]=strtok(NULL, " ")) != NULL) {
+                               if (largc > BMAXARG) {
+                                       fprintf(stderr, "Over %d arguments in batch mode, enough!\n", BMAXARG);
+                                       exit(-1);
+                               }
+                       }
+
+                       if (matches(largv[0], "qdisc") == 0) {
+                               ret += do_qdisc(largc-1, largv+1);
+                       } else if (matches(largv[0], "class") == 0) {
+                               ret += do_class(largc-1, largv+1);
+                       } else if (matches(largv[0], "filter") == 0) {
+                               ret += do_filter(largc-1, largv+1);
+                       } else if (matches(largv[0], "help") == 0) {
+                               usage();        /* note that usage() doesn't return */
+                       } else {
+                               fprintf(stderr, "Object \"%s\" is unknown, try \"tc help\".\n", largv[1]);
+                               exit(-1);
+                       }
+               }
+               fclose(batch);
+               exit(0); /* end of batch, that's all */
+       }
+
+       while (argc > 1) {
+               if (argv[1][0] != '-')
+                       break;
+               if (matches(argv[1], "-stats") == 0 ||
+                   matches(argv[1], "-statistics") == 0) {
+                       ++show_stats;
+               } else if (matches(argv[1], "-details") == 0) {
+                       ++show_details;
+               } else if (matches(argv[1], "-raw") == 0) {
+                       ++show_raw;
+               } else if (matches(argv[1], "-Version") == 0) {
+                       printf("tc utility, iproute2-ss%s\n", SNAPSHOT);
+                       exit(0);
+               } else if (matches(argv[1], "-help") == 0) {
+                       usage();
+               } else {
+                       fprintf(stderr, "Option \"%s\" is unknown, try \"tc -help\".\n", argv[1]);
+                       exit(-1);
+               }
+               argc--; argv++;
+       }
+
+       tc_core_init();
+
+       if (argc > 1) {
+               if (matches(argv[1], "qdisc") == 0)
+                       return do_qdisc(argc-2, argv+2);
+               if (matches(argv[1], "class") == 0)
+                       return do_class(argc-2, argv+2);
+               if (matches(argv[1], "filter") == 0)
+                       return do_filter(argc-2, argv+2);
+               if (matches(argv[1], "help") == 0)
+                       usage();
+               fprintf(stderr, "Object \"%s\" is unknown, try \"tc help\".\n", argv[1]);
+               exit(-1);
+       }
+
+       usage();
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0abcc9da2d49e319de2611877765e6c75f657f2e 100644 (file)
@@ -0,0 +1,57 @@
+/*
+ * tc_cbq.c            CBQ maintanance routines.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "tc_core.h"
+#include "tc_cbq.h"
+
+unsigned tc_cbq_calc_maxidle(unsigned bndw, unsigned rate, unsigned avpkt,
+                            int ewma_log, unsigned maxburst)
+{
+       double maxidle;
+       double g = 1.0 - 1.0/(1<<ewma_log);
+       double xmt = (double)avpkt/bndw;
+
+       maxidle = xmt*(1-g);
+       if (bndw != rate && maxburst) {
+               double vxmt = (double)avpkt/rate - xmt;
+               vxmt *= (pow(g, -(double)maxburst) - 1);
+               if (vxmt > maxidle)
+                       maxidle = vxmt;
+       }
+       return tc_core_usec2tick(maxidle*(1<<ewma_log)*1000000);
+}
+
+unsigned tc_cbq_calc_offtime(unsigned bndw, unsigned rate, unsigned avpkt,
+                            int ewma_log, unsigned minburst)
+{
+       double g = 1.0 - 1.0/(1<<ewma_log);
+       double offtime = (double)avpkt/rate - (double)avpkt/bndw;
+
+       if (minburst == 0)
+               return 0;
+       if (minburst == 1)
+               offtime *= pow(g, -(double)minburst) - 1;
+       else
+               offtime *= 1 + (pow(g, -(double)(minburst-1)) - 1)/(1-g);
+       return tc_core_usec2tick(offtime*1000000);
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8f956490dd3055f0145719c16a37ef4e69f4a1f7 100644 (file)
@@ -0,0 +1,9 @@
+#ifndef _TC_CBQ_H_
+#define _TC_CBQ_H_ 1
+
+unsigned tc_cbq_calc_maxidle(unsigned bndw, unsigned rate, unsigned avpkt,
+                            int ewma_log, unsigned maxburst);
+unsigned tc_cbq_calc_offtime(unsigned bndw, unsigned rate, unsigned avpkt,
+                            int ewma_log, unsigned minburst);
+
+#endif
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..542f8d5fb94b997b902fa10f8e9b5ee7d6cf9017 100644 (file)
@@ -0,0 +1,361 @@
+/*
+ * tc_class.c          "tc class".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <math.h>
+
+#include "utils.h"
+#include "tc_util.h"
+#include "tc_common.h"
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: tc class [ add | del | change | get ] dev STRING\n");
+       fprintf(stderr, "       [ classid CLASSID ] [ root | parent CLASSID ]\n");
+       fprintf(stderr, "       [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n");
+       fprintf(stderr, "\n");
+       fprintf(stderr, "       tc class show [ dev STRING ] [ root | parent CLASSID ]\n");
+       fprintf(stderr, "Where:\n");
+       fprintf(stderr, "QDISC_KIND := { prio | cbq | etc. }\n");
+       fprintf(stderr, "OPTIONS := ... try tc class add <desired QDISC_KIND> help\n");
+       exit(-1);
+}
+
+int tc_class_modify(int cmd, unsigned flags, int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct tcmsg            t;
+               char                    buf[4096];
+       } req;
+       struct qdisc_util *q = NULL;
+       struct tc_estimator est;
+       char  d[16];
+       char  k[16];
+
+       memset(&req, 0, sizeof(req));
+       memset(&est, 0, sizeof(est));
+       memset(d, 0, sizeof(d));
+       memset(k, 0, sizeof(k));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST|flags;
+       req.n.nlmsg_type = cmd;
+       req.t.tcm_family = AF_UNSPEC;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       if (d[0])
+                               duparg("dev", *argv);
+                       strncpy(d, *argv, sizeof(d)-1);
+               } else if (strcmp(*argv, "classid") == 0) {
+                       __u32 handle;
+                       NEXT_ARG();
+                       if (req.t.tcm_handle)
+                               duparg("classid", *argv);
+                       if (get_tc_classid(&handle, *argv))
+                               invarg(*argv, "invalid class ID");
+                       req.t.tcm_handle = handle;
+               } else if (strcmp(*argv, "root") == 0) {
+                       if (req.t.tcm_parent) {
+                               fprintf(stderr, "Error: \"root\" is duplicate parent ID.\n");
+                               exit(-1);
+                       }
+                       req.t.tcm_parent = TC_H_ROOT;
+               } else if (strcmp(*argv, "parent") == 0) {
+                       __u32 handle;
+                       NEXT_ARG();
+                       if (req.t.tcm_parent)
+                               duparg("parent", *argv);
+                       if (get_tc_classid(&handle, *argv))
+                               invarg(*argv, "invalid parent ID");
+                       req.t.tcm_parent = handle;
+               } else if (matches(*argv, "estimator") == 0) {
+                       if (parse_estimator(&argc, &argv, &est))
+                               return -1;
+               } else if (matches(*argv, "help") == 0) {
+                       usage();
+               } else {
+                       strncpy(k, *argv, sizeof(k)-1);
+
+                       q = get_qdisc_kind(k);
+                       argc--; argv++;
+                       break;
+               }
+               argc--; argv++;
+       }
+
+       if (k[0])
+               addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1);
+       if (est.ewma_log)
+               addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
+
+       if (q) {
+               if (q->parse_copt == NULL) {
+                       fprintf(stderr, "Error: Qdisc \"%s\" is classless.\n", k);
+                       exit(1);
+               }
+               if (q->parse_copt(q, argc, argv, &req.n))
+                       exit(1);
+       } else {
+               if (argc) {
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       fprintf(stderr, "Garbage instead of arguments \"%s ...\". Try \"tc class help\".", *argv);
+                       exit(-1);
+               }
+       }
+
+       if (rtnl_open(&rth, 0) < 0) {
+               fprintf(stderr, "Cannot open rtnetlink\n");
+               exit(1);
+       }
+
+       if (d[0])  {
+               ll_init_map(&rth);
+
+               if ((req.t.tcm_ifindex = ll_name_to_index(d)) == 0) {
+                       fprintf(stderr, "Cannot find device \"%s\"\n", d);
+                       exit(1);
+               }
+       }
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+               exit(2);
+
+       rtnl_close(&rth);
+       return 0;
+}
+
+void print_class_tcstats(FILE *fp, struct tc_stats *st)
+{
+       SPRINT_BUF(b1);
+
+       fprintf(fp, " Sent %llu bytes %u pkts (dropped %u, overlimits %u) ",
+               (unsigned long long)st->bytes, st->packets, st->drops, st->overlimits);
+       if (st->bps || st->pps || st->qlen || st->backlog) {
+               fprintf(fp, "\n ");
+               if (st->bps || st->pps) {
+                       fprintf(fp, "rate ");
+                       if (st->bps)
+                               fprintf(fp, "%s ", sprint_rate(st->bps, b1));
+                       if (st->pps)
+                               fprintf(fp, "%upps ", st->pps);
+               }
+               if (st->qlen || st->backlog) {
+                       fprintf(fp, "backlog ");
+                       if (st->backlog)
+                               fprintf(fp, "%s ", sprint_size(st->backlog, b1));
+                       if (st->qlen)
+                               fprintf(fp, "%up ", st->qlen);
+               }
+       }
+}
+
+int filter_ifindex;
+__u32 filter_qdisc;
+
+int print_class(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct tcmsg *t = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr * tb[TCA_MAX+1];
+       struct qdisc_util *q;
+       char abuf[256];
+
+       if (n->nlmsg_type != RTM_NEWTCLASS && n->nlmsg_type != RTM_DELTCLASS) {
+               fprintf(stderr, "Not a class\n");
+               return 0;
+       }
+       len -= NLMSG_LENGTH(sizeof(*t));
+       if (len < 0) {
+               fprintf(stderr, "Wrong len %d\n", len);
+               return -1;
+       }
+       if (filter_qdisc && TC_H_MAJ(t->tcm_handle^filter_qdisc))
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+
+       if (tb[TCA_KIND] == NULL) {
+               fprintf(stderr, "NULL kind\n");
+               return -1;
+       }
+
+       if (n->nlmsg_type == RTM_DELTCLASS)
+               fprintf(fp, "deleted ");
+
+       abuf[0] = 0;
+       if (t->tcm_handle) {
+               if (filter_qdisc)
+                       print_tc_classid(abuf, sizeof(abuf), TC_H_MIN(t->tcm_handle));
+               else
+                       print_tc_classid(abuf, sizeof(abuf), t->tcm_handle);
+       }
+       fprintf(fp, "class %s %s ", (char*)RTA_DATA(tb[TCA_KIND]), abuf);
+
+       if (filter_ifindex == 0)
+               fprintf(fp, "dev %s ", ll_index_to_name(t->tcm_ifindex));
+
+       if (t->tcm_parent == TC_H_ROOT)
+               fprintf(fp, "root ");
+       else {
+               if (filter_qdisc)
+                       print_tc_classid(abuf, sizeof(abuf), TC_H_MIN(t->tcm_parent));
+               else
+                       print_tc_classid(abuf, sizeof(abuf), t->tcm_parent);
+               fprintf(fp, "parent %s ", abuf);
+       }
+       if (t->tcm_info)
+               fprintf(fp, "leaf %x: ", t->tcm_info>>16);
+       q = get_qdisc_kind(RTA_DATA(tb[TCA_KIND]));
+       if (tb[TCA_OPTIONS]) {
+               if (q && q->print_copt)
+                       q->print_copt(q, fp, tb[TCA_OPTIONS]);
+               else
+                       fprintf(fp, "[cannot parse class parameters]");
+       }
+       fprintf(fp, "\n");
+       if (show_stats) {
+               if (tb[TCA_STATS]) {
+                       if (RTA_PAYLOAD(tb[TCA_STATS]) < sizeof(struct tc_stats))
+                               fprintf(fp, "statistics truncated");
+                       else {
+                               struct tc_stats st;
+                               memcpy(&st, RTA_DATA(tb[TCA_STATS]), sizeof(st));
+                               print_class_tcstats(fp, &st);
+                               fprintf(fp, "\n");
+                       }
+               }
+               if (q && tb[TCA_XSTATS]) {
+                       q->print_xstats(q, fp, tb[TCA_XSTATS]);
+                       fprintf(fp, "\n");
+               }
+       }
+       fflush(fp);
+       return 0;
+}
+
+
+int tc_class_list(int argc, char **argv)
+{
+       struct tcmsg t;
+       struct rtnl_handle rth;
+       char d[16];
+
+       memset(&t, 0, sizeof(t));
+       t.tcm_family = AF_UNSPEC;
+       memset(d, 0, sizeof(d));
+
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       if (d[0])
+                               duparg("dev", *argv);
+                       strncpy(d, *argv, sizeof(d)-1);
+               } else if (strcmp(*argv, "qdisc") == 0) {
+                       NEXT_ARG();
+                       if (filter_qdisc)
+                               duparg("qdisc", *argv);
+                       if (get_qdisc_handle(&filter_qdisc, *argv))
+                               invarg(*argv, "invalid qdisc ID");
+               } else if (strcmp(*argv, "root") == 0) {
+                       if (t.tcm_parent) {
+                               fprintf(stderr, "Error: \"root\" is duplicate parent ID\n");
+                               exit(-1);
+                       }
+                       t.tcm_parent = TC_H_ROOT;
+               } else if (strcmp(*argv, "parent") == 0) {
+                       __u32 handle;
+                       if (t.tcm_parent)
+                               duparg("parent", *argv);
+                       NEXT_ARG();
+                       if (get_tc_classid(&handle, *argv))
+                               invarg(*argv, "invalid parent ID");
+                       t.tcm_parent = handle;
+               } else if (matches(*argv, "help") == 0) {
+                       usage();
+               } else {
+                       fprintf(stderr, "What is \"%s\"? Try \"tc class help\".\n", *argv);
+                       exit(-1);
+               }
+
+               argc--; argv++;
+       }
+
+       if (rtnl_open(&rth, 0) < 0) {
+               fprintf(stderr, "Cannot open rtnetlink\n");
+               exit(1);
+       }
+
+       ll_init_map(&rth);
+
+       if (d[0]) {
+               if ((t.tcm_ifindex = ll_name_to_index(d)) == 0) {
+                       fprintf(stderr, "Cannot find device \"%s\"\n", d);
+                       exit(1);
+               }
+               filter_ifindex = t.tcm_ifindex;
+       }
+
+       if (rtnl_dump_request(&rth, RTM_GETTCLASS, &t, sizeof(t)) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       if (rtnl_dump_filter(&rth, print_class, stdout, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+
+       rtnl_close(&rth);
+       return 0;
+}
+
+int do_class(int argc, char **argv)
+{
+       if (argc < 1)
+               return tc_class_list(0, NULL);
+       if (matches(*argv, "add") == 0)
+               return tc_class_modify(RTM_NEWTCLASS, NLM_F_EXCL|NLM_F_CREATE, argc-1, argv+1);
+       if (matches(*argv, "change") == 0)
+               return tc_class_modify(RTM_NEWTCLASS, 0, argc-1, argv+1);
+       if (matches(*argv, "replace") == 0)
+               return tc_class_modify(RTM_NEWTCLASS, NLM_F_CREATE, argc-1, argv+1);
+       if (matches(*argv, "delete") == 0)
+               return tc_class_modify(RTM_DELTCLASS, 0,  argc-1, argv+1);
+#if 0
+       if (matches(*argv, "get") == 0)
+               return tc_class_get(RTM_GETTCLASS, 0,  argc-1, argv+1);
+#endif
+       if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
+           || matches(*argv, "lst") == 0)
+               return tc_class_list(argc-1, argv+1);
+       if (matches(*argv, "help") == 0)
+               usage();
+       fprintf(stderr, "Command \"%s\" is unknown, try \"tc class help\".\n", *argv);
+       return -1;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d695ca2d85d9da8b855d1d65e44cc3727cbaf1ed 100644 (file)
@@ -0,0 +1,5 @@
+extern int do_qdisc(int argc, char **argv);
+extern int do_class(int argc, char **argv);
+extern int do_filter(int argc, char **argv);
+
+extern int parse_estimator(int *p_argc, char ***p_argv, struct tc_estimator *est);
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..55586741e70e53af1c84f28514fa1c9d2a3cb3d0 100644 (file)
@@ -0,0 +1,85 @@
+/*
+ * tc_core.c           TC core library.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "tc_core.h"
+
+static __u32 t2us=1;
+static __u32 us2t=1;
+static double tick_in_usec = 1;
+
+long tc_core_usec2tick(long usec)
+{
+       return usec*tick_in_usec;
+}
+
+long tc_core_tick2usec(long tick)
+{
+       return tick/tick_in_usec;
+}
+
+unsigned tc_calc_xmittime(unsigned rate, unsigned size)
+{
+       return tc_core_usec2tick(1000000*((double)size/rate));
+}
+
+/*
+   rtab[pkt_len>>cell_log] = pkt_xmit_time
+ */
+
+int tc_calc_rtable(unsigned bps, __u32 *rtab, int cell_log, unsigned mtu,
+                  unsigned mpu)
+{
+       int i;
+
+       if (mtu == 0)
+               mtu = 2047;
+
+       if (cell_log < 0) {
+               cell_log = 0;
+               while ((mtu>>cell_log) > 255)
+                       cell_log++;
+       }
+       for (i=0; i<256; i++) {
+               unsigned sz = (i<<cell_log);
+               if (sz < mpu)
+                       sz = mpu;
+               rtab[i] = tc_core_usec2tick(1000000*((double)sz/bps));
+       }
+       return cell_log;
+}
+
+int tc_core_init()
+{
+       FILE *fp = fopen("/proc/net/psched", "r");
+
+       if (fp == NULL)
+               return -1;
+
+       if (fscanf(fp, "%08x%08x", &t2us, &us2t) != 2) {
+               fclose(fp);
+               return -1;
+       }
+       fclose(fp);
+       tick_in_usec = (double)t2us/us2t;
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..1d2257ee12aa8a22d045fce6af8ad7352f3694f5 100644 (file)
@@ -0,0 +1,16 @@
+#ifndef _TC_CORE_H_
+#define _TC_CORE_H_ 1
+
+#include <asm/types.h>
+#include <linux/pkt_sched.h>
+
+long tc_core_usec2tick(long usec);
+long tc_core_tick2usec(long tick);
+unsigned tc_calc_xmittime(unsigned rate, unsigned size);
+int tc_calc_rtable(unsigned bps, __u32 *rtab, int cell_log, unsigned mtu, unsigned mpu);
+
+int tc_setup_estimator(unsigned A, unsigned time_const, struct tc_estimator *est);
+
+int tc_core_init(void);
+
+#endif
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..434db0fe7bbc430c300cdcd7c0bd0612d405defd 100644 (file)
@@ -0,0 +1,44 @@
+/*
+ * tc_core.c           TC core library.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "tc_core.h"
+
+int tc_setup_estimator(unsigned A, unsigned time_const, struct tc_estimator *est)
+{
+       for (est->interval=0; est->interval<=5; est->interval++) {
+               if (A <= (1<<est->interval)*(1000000/4))
+                       break;
+       }
+       if (est->interval > 5)
+               return -1;
+       est->interval -= 2;
+       for (est->ewma_log=1; est->ewma_log<32; est->ewma_log++) {
+               double w = 1.0 - 1.0/(1<<est->ewma_log);
+               if (A/(-log(w)) > time_const)
+                       break;
+       }
+       est->ewma_log--;
+       if (est->ewma_log==0 || est->ewma_log >= 31)
+               return -1;
+       return 0;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..300c3e70abf3347e80e8926b1e912533e5127077 100644 (file)
@@ -0,0 +1,388 @@
+/*
+ * tc_filter.c         "tc filter".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <linux/if_ether.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "tc_util.h"
+#include "tc_common.h"
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: tc filter [ add | del | change | get ] dev STRING\n");
+       fprintf(stderr, "       [ pref PRIO ] [ protocol PROTO ]\n");
+       fprintf(stderr, "       [ estimator INTERVAL TIME_CONSTANT ]\n");
+       fprintf(stderr, "       [ root | classid CLASSID ] [ handle FILTERID ]\n");
+       fprintf(stderr, "       [ [ FILTER_TYPE ] [ help | OPTIONS ] ]\n");
+       fprintf(stderr, "\n");
+       fprintf(stderr, "       tc filter show [ dev STRING ] [ root | parent CLASSID ]\n");
+       fprintf(stderr, "Where:\n");
+       fprintf(stderr, "FILTER_TYPE := { rsvp | u32 | fw | route | etc. }\n");
+       fprintf(stderr, "FILTERID := ... format depends on classifier, see there\n");
+       fprintf(stderr, "OPTIONS := ... try tc filter add <desired FILTER_KIND> help\n");
+       exit(-1);
+}
+
+
+int tc_filter_modify(int cmd, unsigned flags, int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct tcmsg            t;
+               char                    buf[4096];
+       } req;
+       struct filter_util *q = NULL;
+       __u32 prio = 0;
+       __u32 protocol = 0;
+       char *fhandle = NULL;
+       char  d[16];
+       char  k[16];
+       struct tc_estimator est;
+
+       memset(&req, 0, sizeof(req));
+       memset(&est, 0, sizeof(est));
+       memset(d, 0, sizeof(d));
+       memset(k, 0, sizeof(k));
+       memset(&req, 0, sizeof(req));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST|flags;
+       req.n.nlmsg_type = cmd;
+       req.t.tcm_family = AF_UNSPEC;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       if (d[0])
+                               duparg("dev", *argv);
+                       strncpy(d, *argv, sizeof(d)-1);
+               } else if (strcmp(*argv, "root") == 0) {
+                       if (req.t.tcm_parent) {
+                               fprintf(stderr, "Error: \"root\" is duplicate parent ID\n");
+                               exit(-1);
+                       }
+                       req.t.tcm_parent = TC_H_ROOT;
+               } else if (strcmp(*argv, "parent") == 0) {
+                       __u32 handle;
+                       NEXT_ARG();
+                       if (req.t.tcm_parent)
+                               duparg("parent", *argv);
+                       if (get_tc_classid(&handle, *argv))
+                               invarg(*argv, "Invalid parent ID");
+                       req.t.tcm_parent = handle;
+               } else if (strcmp(*argv, "handle") == 0) {
+                       NEXT_ARG();
+                       if (fhandle)
+                               duparg("handle", *argv);
+                       fhandle = *argv;
+               } else if (matches(*argv, "preference") == 0 ||
+                          matches(*argv, "priority") == 0) {
+                       NEXT_ARG();
+                       if (prio)
+                               duparg("priority", *argv);
+                       if (get_u32(&prio, *argv, 0))
+                               invarg(*argv, "invalid prpriority value");
+               } else if (matches(*argv, "protocol") == 0) {
+                       __u16 id;
+                       NEXT_ARG();
+                       if (protocol)
+                               duparg("protocol", *argv);
+                       if (ll_proto_a2n(&id, *argv))
+                               invarg(*argv, "invalid protocol");
+                       protocol = id;
+               } else if (matches(*argv, "estimator") == 0) {
+                       if (parse_estimator(&argc, &argv, &est) < 0)
+                               return -1;
+               } else if (matches(*argv, "help") == 0) {
+                       usage();
+               } else {
+                       strncpy(k, *argv, sizeof(k)-1);
+
+                       q = get_filter_kind(k);
+                       argc--; argv++;
+                       break;
+               }
+
+               argc--; argv++;
+       }
+
+       req.t.tcm_info = TC_H_MAKE(prio<<16, protocol);
+
+       if (k[0])
+               addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1);
+
+       if (q) {
+               if (q->parse_fopt(q, fhandle, argc, argv, &req.n))
+                       exit(1);
+       } else {
+               if (fhandle) {
+                       fprintf(stderr, "Must specify filter type when using "
+                               "\"handle\"\n");
+                       exit(-1);
+               }
+               if (argc) {
+                       if (matches(*argv, "help") == 0)
+                               usage();
+                       fprintf(stderr, "Garbage instead of arguments \"%s ...\". Try \"tc filter help\".\n", *argv);
+                       exit(-1);
+               }
+       }
+       if (est.ewma_log)
+               addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
+
+
+       if (rtnl_open(&rth, 0) < 0) {
+               fprintf(stderr, "Cannot open rtnetlink\n");
+               exit(1);
+       }
+
+       if (d[0])  {
+               ll_init_map(&rth);
+
+               if ((req.t.tcm_ifindex = ll_name_to_index(d)) == 0) {
+                       fprintf(stderr, "Cannot find device \"%s\"\n", d);
+                       exit(1);
+               }
+       }
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+               exit(2);
+
+       rtnl_close(&rth);
+       return 0;
+}
+
+static __u32 filter_parent;
+static int filter_ifindex;
+static __u32 filter_prio;
+static __u32 filter_protocol;
+
+int print_filter(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct tcmsg *t = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr * tb[TCA_MAX+1];
+       struct filter_util *q;
+       char abuf[256];
+
+       if (n->nlmsg_type != RTM_NEWTFILTER && n->nlmsg_type != RTM_DELTFILTER) {
+               fprintf(stderr, "Not a filter\n");
+               return 0;
+       }
+       len -= NLMSG_LENGTH(sizeof(*t));
+       if (len < 0) {
+               fprintf(stderr, "Wrong len %d\n", len);
+               return -1;
+       }
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+
+       if (tb[TCA_KIND] == NULL) {
+               fprintf(stderr, "NULL kind\n");
+               return -1;
+       }
+
+       if (n->nlmsg_type == RTM_DELTFILTER)
+               fprintf(fp, "deleted ");
+
+       fprintf(fp, "filter ");
+       if (!filter_ifindex || filter_ifindex != t->tcm_ifindex)
+               fprintf(fp, "dev %s ", ll_index_to_name(t->tcm_ifindex));
+
+       if (!filter_parent || filter_parent != t->tcm_parent) {
+               if (t->tcm_parent == TC_H_ROOT)
+                       fprintf(fp, "root ");
+               else {
+                       print_tc_classid(abuf, sizeof(abuf), t->tcm_parent);
+                       fprintf(fp, "parent %s ", abuf);
+               }
+       }
+       if (t->tcm_info) {
+               __u32 protocol = TC_H_MIN(t->tcm_info);
+               __u32 prio = TC_H_MAJ(t->tcm_info)>>16;
+               if (!filter_protocol || filter_protocol != protocol) {
+                       if (protocol) {
+                               SPRINT_BUF(b1);
+                               fprintf(fp, "protocol %s ",
+                                       ll_proto_n2a(protocol, b1, sizeof(b1)));
+                       }
+               }
+               if (!filter_prio || filter_prio != prio) {
+                       if (prio)
+                               fprintf(fp, "pref %u ", prio);
+               }
+       }
+       fprintf(fp, "%s ", (char*)RTA_DATA(tb[TCA_KIND]));
+       q = get_filter_kind(RTA_DATA(tb[TCA_KIND]));
+       if (tb[TCA_OPTIONS]) {
+               if (q)
+                       q->print_fopt(q, fp, tb[TCA_OPTIONS], t->tcm_handle);
+               else
+                       fprintf(fp, "[cannot parse parameters]");
+       }
+       fprintf(fp, "\n");
+
+       if (show_stats) {
+               if (tb[TCA_STATS]) {
+                       if (RTA_PAYLOAD(tb[TCA_STATS]) < sizeof(struct tc_stats))
+                               fprintf(fp, "statistics truncated");
+                       else {
+                               struct tc_stats st;
+                               memcpy(&st, RTA_DATA(tb[TCA_STATS]), sizeof(st));
+                               print_tcstats(fp, &st);
+                               fprintf(fp, "\n");
+                       }
+               }
+       }
+       fflush(fp);
+       return 0;
+}
+
+
+int tc_filter_list(int argc, char **argv)
+{
+       struct tcmsg t;
+       struct rtnl_handle rth;
+       char d[16];
+       __u32 prio = 0;
+       __u32 protocol = 0;
+       char *fhandle = NULL;
+
+       memset(&t, 0, sizeof(t));
+       t.tcm_family = AF_UNSPEC;
+       memset(d, 0, sizeof(d));
+
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       if (d[0])
+                               duparg("dev", *argv);
+                       strncpy(d, *argv, sizeof(d)-1);
+               } else if (strcmp(*argv, "root") == 0) {
+                       if (t.tcm_parent) {
+                               fprintf(stderr, "Error: \"root\" is duplicate parent ID\n");
+                               exit(-1);
+                       }
+                       filter_parent = t.tcm_parent = TC_H_ROOT;
+               } else if (strcmp(*argv, "parent") == 0) {
+                       __u32 handle;
+                       NEXT_ARG();
+                       if (t.tcm_parent)
+                               duparg("parent", *argv);
+                       if (get_tc_classid(&handle, *argv))
+                               invarg(*argv, "invalid parent ID");
+                       filter_parent = t.tcm_parent = handle;
+               } else if (strcmp(*argv, "handle") == 0) {
+                       NEXT_ARG();
+                       if (fhandle)
+                               duparg("handle", *argv);
+                       fhandle = *argv;
+               } else if (matches(*argv, "preference") == 0 ||
+                          matches(*argv, "priority") == 0) {
+                       NEXT_ARG();
+                       if (prio)
+                               duparg("priority", *argv);
+                       if (get_u32(&prio, *argv, 0))
+                               invarg(*argv, "invalid preference");
+                       filter_prio = prio;
+               } else if (matches(*argv, "protocol") == 0) {
+                       __u16 res;
+                       NEXT_ARG();
+                       if (protocol)
+                               duparg("protocol", *argv);
+                       if (ll_proto_a2n(&res, *argv))
+                               invarg(*argv, "invalid protocol");
+                       protocol = res;
+                       filter_protocol = protocol;
+               } else if (matches(*argv, "help") == 0) {
+                       usage();
+               } else {
+                       fprintf(stderr, " What is \"%s\"? Try \"tc filter help\"\n", *argv);
+                       exit(-1);
+               }
+
+               argc--; argv++;
+       }
+
+       t.tcm_info = TC_H_MAKE(prio<<16, protocol);
+
+       if (rtnl_open(&rth, 0) < 0) {
+               fprintf(stderr, "Cannot open rtnetlink\n");
+               exit(1);
+       }
+
+       ll_init_map(&rth);
+
+       if (d[0]) {
+               if ((t.tcm_ifindex = ll_name_to_index(d)) == 0) {
+                       fprintf(stderr, "Cannot find device \"%s\"\n", d);
+                       exit(1);
+               }
+               filter_ifindex = t.tcm_ifindex;
+       }
+
+       if (rtnl_dump_request(&rth, RTM_GETTFILTER, &t, sizeof(t)) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       if (rtnl_dump_filter(&rth, print_filter, stdout, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+
+       rtnl_close(&rth);
+       return 0;
+}
+
+int do_filter(int argc, char **argv)
+{
+       if (argc < 1)
+               return tc_filter_list(0, NULL);
+       if (matches(*argv, "add") == 0)
+               return tc_filter_modify(RTM_NEWTFILTER, NLM_F_EXCL|NLM_F_CREATE, argc-1, argv+1);
+       if (matches(*argv, "change") == 0)
+               return tc_filter_modify(RTM_NEWTFILTER, 0, argc-1, argv+1);
+       if (matches(*argv, "replace") == 0)
+               return tc_filter_modify(RTM_NEWTFILTER, NLM_F_CREATE, argc-1, argv+1);
+       if (matches(*argv, "delete") == 0)
+               return tc_filter_modify(RTM_DELTFILTER, 0,  argc-1, argv+1);
+#if 0
+       if (matches(*argv, "get") == 0)
+               return tc_filter_get(RTM_GETTFILTER, 0,  argc-1, argv+1);
+#endif
+       if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
+           || matches(*argv, "lst") == 0)
+               return tc_filter_list(argc-1, argv+1);
+       if (matches(*argv, "help") == 0)
+               usage();
+       fprintf(stderr, "Command \"%s\" is unknown, try \"tc filter help\".\n", *argv);
+       exit(-1);
+}
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..361ca8aaddc631b53ec6be4e529bfcf07829b842 100644 (file)
@@ -0,0 +1,353 @@
+/*
+ * tc_qdisc.c          "tc qdisc".
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *             J Hadi Salim: Extension to ingress
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <math.h>
+
+#include "utils.h"
+#include "tc_util.h"
+#include "tc_common.h"
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: tc qdisc [ add | del | replace | change | get ] dev STRING\n");
+       fprintf(stderr, "       [ handle QHANDLE ] [ root | ingress | parent CLASSID ]\n");
+       fprintf(stderr, "       [ estimator INTERVAL TIME_CONSTANT ]\n");
+       fprintf(stderr, "       [ [ QDISC_KIND ] [ help | OPTIONS ] ]\n");
+       fprintf(stderr, "\n");
+       fprintf(stderr, "       tc qdisc show [ dev STRING ] [ingress]\n");
+       fprintf(stderr, "Where:\n");
+       fprintf(stderr, "QDISC_KIND := { [p|b]fifo | tbf | prio | cbq | red | etc. }\n");
+       fprintf(stderr, "OPTIONS := ... try tc qdisc add <desired QDISC_KIND> help\n");
+       exit(-1);
+}
+
+int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct tcmsg            t;
+               char                    buf[4096];
+       } req;
+       struct qdisc_util *q = NULL;
+       struct tc_estimator est;
+       char  d[16];
+       char  k[16];
+
+       memset(&req, 0, sizeof(req));
+       memset(&est, 0, sizeof(est));
+       memset(&d, 0, sizeof(d));
+       memset(&k, 0, sizeof(k));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST|flags;
+       req.n.nlmsg_type = cmd;
+       req.t.tcm_family = AF_UNSPEC;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       if (d[0])
+                               duparg("dev", *argv);
+                       strncpy(d, *argv, sizeof(d)-1);
+               } else if (strcmp(*argv, "handle") == 0) {
+                       __u32 handle;
+                       if (req.t.tcm_handle)
+                               duparg("handle", *argv);
+                       NEXT_ARG();
+                       if (get_qdisc_handle(&handle, *argv))
+                               invarg(*argv, "invalid qdisc ID");
+                       req.t.tcm_handle = handle;
+               } else if (strcmp(*argv, "root") == 0) {
+                       if (req.t.tcm_parent) {
+                               fprintf(stderr, "Error: \"root\" is duplicate parent ID\n");
+                               exit(-1);
+                       }
+                       req.t.tcm_parent = TC_H_ROOT;
+#ifdef TC_H_INGRESS
+               } else if (strcmp(*argv, "ingress") == 0) {
+                       if (req.t.tcm_parent) {
+                               fprintf(stderr, "Error: \"ingress\" is a duplicate parent ID\n");
+                               exit(-1);
+                       }
+                       req.t.tcm_parent = TC_H_INGRESS;
+                       strncpy(k, "ingress", sizeof(k)-1);
+                       q = get_qdisc_kind(k);
+                       req.t.tcm_handle = 0xffff0000;
+
+                       argc--; argv++;
+                       break;
+#endif
+               } else if (strcmp(*argv, "parent") == 0) {
+                       __u32 handle;
+                       NEXT_ARG();
+                       if (req.t.tcm_parent)
+                               duparg("parent", *argv);
+                       if (get_tc_classid(&handle, *argv))
+                               invarg(*argv, "invalid parent ID");
+                       req.t.tcm_parent = handle;
+               } else if (matches(*argv, "estimator") == 0) {
+                       if (parse_estimator(&argc, &argv, &est))
+                               return -1;
+               } else if (matches(*argv, "help") == 0) {
+                       usage();
+               } else {
+                       strncpy(k, *argv, sizeof(k)-1);
+
+                       q = get_qdisc_kind(k);
+                       argc--; argv++;
+                       break;
+               }
+               argc--; argv++;
+       }
+
+       if (k[0])
+               addattr_l(&req.n, sizeof(req), TCA_KIND, k, strlen(k)+1);
+       if (est.ewma_log)
+               addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
+
+       if (q) {
+               if (q->parse_qopt(q, argc, argv, &req.n))
+                       exit(1);
+       } else {
+               if (argc) {
+                       if (matches(*argv, "help") == 0)
+                               usage();
+
+                       fprintf(stderr, "Garbage instead of arguments \"%s ...\". Try \"tc qdisc help\".\n", *argv);
+                       exit(-1);
+               }
+       }
+
+       if (rtnl_open(&rth, 0) < 0) {
+               fprintf(stderr, "Cannot open rtnetlink\n");
+               exit(1);
+       }
+
+       if (d[0])  {
+               int idx;
+
+               ll_init_map(&rth);
+
+               if ((idx = ll_name_to_index(d)) == 0) {
+                       fprintf(stderr, "Cannot find device \"%s\"\n", d);
+                       exit(1);
+               }
+               req.t.tcm_ifindex = idx;
+       }
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+               exit(2);
+
+       rtnl_close(&rth);
+       return 0;
+}
+
+void print_tcstats(FILE *fp, struct tc_stats *st)
+{
+       SPRINT_BUF(b1);
+
+       fprintf(fp, " Sent %llu bytes %u pkts (dropped %u, overlimits %u) ",
+               (unsigned long long)st->bytes, st->packets, st->drops, st->overlimits);
+       if (st->bps || st->pps || st->qlen || st->backlog) {
+               fprintf(fp, "\n ");
+               if (st->bps || st->pps) {
+                       fprintf(fp, "rate ");
+                       if (st->bps)
+                               fprintf(fp, "%s ", sprint_rate(st->bps, b1));
+                       if (st->pps)
+                               fprintf(fp, "%upps ", st->pps);
+               }
+               if (st->qlen || st->backlog) {
+                       fprintf(fp, "backlog ");
+                       if (st->backlog)
+                               fprintf(fp, "%s ", sprint_size(st->backlog, b1));
+                       if (st->qlen)
+                               fprintf(fp, "%up ", st->qlen);
+               }
+       }
+}
+
+static int filter_ifindex;
+
+int print_qdisc(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct tcmsg *t = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr * tb[TCA_MAX+1];
+       struct qdisc_util *q;
+       char abuf[256];
+
+       if (n->nlmsg_type != RTM_NEWQDISC && n->nlmsg_type != RTM_DELQDISC) {
+               fprintf(stderr, "Not a qdisc\n");
+               return 0;
+       }
+       len -= NLMSG_LENGTH(sizeof(*t));
+       if (len < 0) {
+               fprintf(stderr, "Wrong len %d\n", len);
+               return -1;
+       }
+
+       if (filter_ifindex && filter_ifindex != t->tcm_ifindex)
+               return 0;
+
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+
+       if (tb[TCA_KIND] == NULL) {
+               fprintf(stderr, "NULL kind\n");
+               return -1;
+       }
+
+       if (n->nlmsg_type == RTM_DELQDISC)
+               fprintf(fp, "deleted ");
+
+       fprintf(fp, "qdisc %s %x: ", (char*)RTA_DATA(tb[TCA_KIND]), t->tcm_handle>>16);
+       if (filter_ifindex == 0)
+               fprintf(fp, "dev %s ", ll_index_to_name(t->tcm_ifindex));
+       if (t->tcm_parent == TC_H_ROOT)
+               fprintf(fp, "root ");
+       else if (t->tcm_parent) {
+               print_tc_classid(abuf, sizeof(abuf), t->tcm_parent);
+               fprintf(fp, "parent %s ", abuf);
+       }
+       if (t->tcm_info != 1) {
+               fprintf(fp, "refcnt %d ", t->tcm_info);
+       }
+       q = get_qdisc_kind(RTA_DATA(tb[TCA_KIND]));
+       if (tb[TCA_OPTIONS]) {
+               if (q)
+                       q->print_qopt(q, fp, tb[TCA_OPTIONS]);
+               else
+                       fprintf(fp, "[cannot parse qdisc parameters]");
+       }
+       fprintf(fp, "\n");
+       if (show_stats) {
+               if (tb[TCA_STATS]) {
+                       if (RTA_PAYLOAD(tb[TCA_STATS]) < sizeof(struct tc_stats))
+                               fprintf(fp, "statistics truncated");
+                       else {
+                               struct tc_stats st;
+                               memcpy(&st, RTA_DATA(tb[TCA_STATS]), sizeof(st));
+                               print_tcstats(fp, &st);
+                               fprintf(fp, "\n");
+                       }
+               }
+               if (q && tb[TCA_XSTATS]) {
+                       q->print_xstats(q, fp, tb[TCA_XSTATS]);
+                       fprintf(fp, "\n");
+               }
+       }
+       fflush(fp);
+       return 0;
+}
+
+
+int tc_qdisc_list(int argc, char **argv)
+{
+       struct tcmsg t;
+       struct rtnl_handle rth;
+       char d[16];
+
+       memset(&t, 0, sizeof(t));
+       t.tcm_family = AF_UNSPEC;
+       memset(&d, 0, sizeof(d));
+       
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+                       strncpy(d, *argv, sizeof(d)-1);
+#ifdef TC_H_INGRESS
+                } else if (strcmp(*argv, "ingress") == 0) {
+                             if (t.tcm_parent) {
+                                     fprintf(stderr, "Duplicate parent ID\n");
+                                     usage();
+                             }
+                             t.tcm_parent = TC_H_INGRESS;
+#endif
+               } else if (matches(*argv, "help") == 0) {
+                       usage();
+               } else {
+                       fprintf(stderr, "What is \"%s\"? Try \"tc qdisc help\".\n", *argv);
+                       return -1;
+               }
+
+               argc--; argv++;
+       }
+
+       if (rtnl_open(&rth, 0) < 0) {
+               fprintf(stderr, "Cannot open rtnetlink\n");
+               exit(1);
+       }
+
+       ll_init_map(&rth);
+
+       if (d[0]) {
+               if ((t.tcm_ifindex = ll_name_to_index(d)) == 0) {
+                       fprintf(stderr, "Cannot find device \"%s\"\n", d);
+                       exit(1);
+               }
+               filter_ifindex = t.tcm_ifindex;
+       }
+
+       if (rtnl_dump_request(&rth, RTM_GETQDISC, &t, sizeof(t)) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       if (rtnl_dump_filter(&rth, print_qdisc, stdout, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+
+       rtnl_close(&rth);
+       return 0;
+}
+
+int do_qdisc(int argc, char **argv)
+{
+       if (argc < 1)
+               return tc_qdisc_list(0, NULL);
+       if (matches(*argv, "add") == 0)
+               return tc_qdisc_modify(RTM_NEWQDISC, NLM_F_EXCL|NLM_F_CREATE, argc-1, argv+1);
+       if (matches(*argv, "change") == 0)
+               return tc_qdisc_modify(RTM_NEWQDISC, 0, argc-1, argv+1);
+       if (matches(*argv, "replace") == 0)
+               return tc_qdisc_modify(RTM_NEWQDISC, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);
+       if (matches(*argv, "link") == 0)
+               return tc_qdisc_modify(RTM_NEWQDISC, NLM_F_REPLACE, argc-1, argv+1);
+       if (matches(*argv, "delete") == 0)
+               return tc_qdisc_modify(RTM_DELQDISC, 0,  argc-1, argv+1);
+#if 0
+       if (matches(*argv, "get") == 0)
+               return tc_qdisc_get(RTM_GETQDISC, 0,  argc-1, argv+1);
+#endif
+       if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
+           || matches(*argv, "lst") == 0)
+               return tc_qdisc_list(argc-1, argv+1);
+       if (matches(*argv, "help") == 0)
+               usage();
+       fprintf(stderr, "Command \"%s\" is unknown, try \"tc qdisc help\".\n", *argv);
+       return -1;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..385e7af116b399980085cd20b1951b4374551ab2 100644 (file)
@@ -0,0 +1,97 @@
+/*
+ * tc_red.c            RED maintanance routines.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "tc_core.h"
+#include "tc_red.h"
+
+/*
+   Plog = log(prob/(qmax - qmin))
+ */
+int tc_red_eval_P(unsigned qmin, unsigned qmax, double prob)
+{
+       int i = qmax - qmin;
+
+       if (i <= 0)
+               return -1;
+
+       prob /= i;
+
+       for (i=0; i<32; i++) {
+               if (prob > 1.0)
+                       break;
+               prob *= 2;
+       }
+       if (i>=32)
+               return -1;
+       return i;
+}
+
+/*
+   burst + 1 - qmin/avpkt < (1-(1-W)^burst)/W
+ */
+
+int tc_red_eval_ewma(unsigned qmin, unsigned burst, unsigned avpkt)
+{
+       int wlog = 1;
+       double W = 0.5;
+       double a = (double)burst + 1 - (double)qmin/avpkt;
+
+       if (a < 1.0)
+               return -1;
+       for (wlog=1; wlog<32; wlog++, W /= 2) {
+               if (a <= (1 - pow(1-W, burst))/W)
+                       return wlog;
+       }
+       return -1;
+}
+
+/*
+   Stab[t>>Scell_log] = -log(1-W) * t/xmit_time
+ */
+
+int tc_red_eval_idle_damping(int Wlog, unsigned avpkt, unsigned bps, __u8 *sbuf)
+{
+       double xmit_time = tc_core_usec2tick(1000000*(double)avpkt/bps);
+       double lW = -log(1.0 - 1.0/(1<<Wlog))/xmit_time;
+       double maxtime = 31/lW;
+       int clog;
+       int i;
+       double tmp;
+
+       tmp = maxtime;
+       for (clog=0; clog<32; clog++) {
+               if (maxtime/(1<<clog) < 512)
+                       break;
+       }
+       if (clog >= 32)
+               return -1;
+
+       sbuf[0] = 0;
+       for (i=1; i<255; i++) {
+               sbuf[i] = (i<<clog)*lW;
+               if (sbuf[i] > 31)
+                       sbuf[i] = 31;
+       }
+       sbuf[255] = 31;
+       return clog;
+}
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6f6b09e3e05e34b7f15bcfa7113df2d93de7c415 100644 (file)
@@ -0,0 +1,8 @@
+#ifndef _TC_RED_H_
+#define _TC_RED_H_ 1
+
+extern int tc_red_eval_P(unsigned qmin, unsigned qmax, double prob);
+extern int tc_red_eval_ewma(unsigned qmin, unsigned burst, unsigned avpkt);
+extern int tc_red_eval_idle_damping(int wlog, unsigned avpkt, unsigned bandwidth, __u8 *sbuf);
+
+#endif
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d1355391c90f9b3436538ab681d4e76e6b409950 100644 (file)
@@ -0,0 +1,313 @@
+/*
+ * tc_util.c           Misc TC utility functions.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <math.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+int get_qdisc_handle(__u32 *h, char *str)
+{
+       __u32 maj;
+       char *p;
+
+       maj = TC_H_UNSPEC;
+       if (strcmp(str, "none") == 0)
+               goto ok;
+       maj = strtoul(str, &p, 16);
+       if (p == str)
+               return -1;
+       maj <<= 16;
+       if (*p != ':' && *p!=0)
+               return -1;
+ok:
+       *h = maj;
+       return 0;
+}
+
+int get_tc_classid(__u32 *h, char *str)
+{
+       __u32 maj, min;
+       char *p;
+
+       maj = TC_H_ROOT;
+       if (strcmp(str, "root") == 0)
+               goto ok;
+       maj = TC_H_UNSPEC;
+       if (strcmp(str, "none") == 0)
+               goto ok;
+       maj = strtoul(str, &p, 16);
+       if (p == str) {
+               maj = 0;
+               if (*p != ':')
+                       return -1;
+       }
+       if (*p == ':') {
+               maj <<= 16;
+               str = p+1;
+               min = strtoul(str, &p, 16);
+               if (*p != 0)
+                       return -1;
+               maj |= min;
+       } else if (*p != 0)
+               return -1;
+
+ok:
+       *h = maj;
+       return 0;
+}
+
+int print_tc_classid(char *buf, int len, __u32 h)
+{
+       if (h == TC_H_ROOT)
+               sprintf(buf, "root");
+       else if (h == TC_H_UNSPEC)
+               snprintf(buf, len, "none");
+       else if (TC_H_MAJ(h) == 0)
+               snprintf(buf, len, ":%x", TC_H_MIN(h));
+       else if (TC_H_MIN(h) == 0)
+               snprintf(buf, len, "%x:", TC_H_MAJ(h)>>16);
+       else
+               snprintf(buf, len, "%x:%x", TC_H_MAJ(h)>>16, TC_H_MIN(h));
+       return 0;
+}
+
+char * sprint_tc_classid(__u32 h, char *buf)
+{
+       if (print_tc_classid(buf, SPRINT_BSIZE-1, h))
+               strcpy(buf, "???");
+       return buf;
+}
+
+
+int get_rate(unsigned *rate, char *str)
+{
+       char *p;
+       double bps = strtod(str, &p);
+
+       if (p == str)
+               return -1;
+
+       if (*p) {
+               if (strcasecmp(p, "kbps") == 0)
+                       bps *= 1024;
+               else if (strcasecmp(p, "mbps") == 0)
+                       bps *= 1024*1024;
+               else if (strcasecmp(p, "mbit") == 0)
+                       bps *= 1024*1024/8;
+               else if (strcasecmp(p, "kbit") == 0)
+                       bps *= 1024/8;
+               else if (strcasecmp(p, "bps") != 0)
+                       return -1;
+       } else
+               bps /= 8;
+
+       *rate = bps;
+       return 0;
+}
+
+int get_rate_and_cell(unsigned *rate, int *cell_log, char *str)
+{
+       char * slash = strchr(str, '/');
+
+       if (slash)
+               *slash = 0;
+
+       if (get_rate(rate, str))
+               return -1;
+
+       if (slash) {
+               int cell;
+               int i;
+
+               if (get_integer(&cell, slash+1, 0))
+                       return -1;
+               *slash = '/';
+
+               for (i=0; i<32; i++) {
+                       if ((1<<i) == cell) {
+                               *cell_log = i;
+                               return 0;
+                       }
+               }
+               return -1;
+       }
+       return 0;
+}
+
+
+int print_rate(char *buf, int len, __u32 rate)
+{
+       double tmp = (double)rate*8;
+
+       if (tmp >= 1024*1023 && fabs(1024*1024*rint(tmp/(1024*1024)) - tmp) < 1024)
+               snprintf(buf, len, "%gMbit", rint(tmp/(1024*1024)));
+       else if (tmp >= 1024-16 && fabs(1024*rint(tmp/1024) - tmp) < 16)
+               snprintf(buf, len, "%gKbit", rint(tmp/1024));
+       else
+               snprintf(buf, len, "%ubps", rate);
+       return 0;
+}
+
+char * sprint_rate(__u32 rate, char *buf)
+{
+       if (print_rate(buf, SPRINT_BSIZE-1, rate))
+               strcpy(buf, "???");
+       return buf;
+}
+
+int get_usecs(unsigned *usecs, char *str)
+{
+       double t;
+       char *p;
+
+       t = strtod(str, &p);
+       if (p == str)
+               return -1;
+
+       if (*p) {
+               if (strcasecmp(p, "s") == 0 || strcasecmp(p, "sec")==0 ||
+                   strcasecmp(p, "secs")==0)
+                       t *= 1000000;
+               else if (strcasecmp(p, "ms") == 0 || strcasecmp(p, "msec")==0 ||
+                        strcasecmp(p, "msecs") == 0)
+                       t *= 1000;
+               else if (strcasecmp(p, "us") == 0 || strcasecmp(p, "usec")==0 ||
+                        strcasecmp(p, "usecs") == 0)
+                       t *= 1;
+               else
+                       return -1;
+       }
+
+       *usecs = t;
+       return 0;
+}
+
+
+int print_usecs(char *buf, int len, __u32 usec)
+{
+       double tmp = usec;
+
+       if (tmp >= 1000000)
+               snprintf(buf, len, "%.1fs", tmp/1000000);
+       else if (tmp >= 1000)
+               snprintf(buf, len, "%.1fms", tmp/1000);
+       else
+               snprintf(buf, len, "%uus", usec);
+       return 0;
+}
+
+char * sprint_usecs(__u32 usecs, char *buf)
+{
+       if (print_usecs(buf, SPRINT_BSIZE-1, usecs))
+               strcpy(buf, "???");
+       return buf;
+}
+
+int get_size(unsigned *size, char *str)
+{
+       double sz;
+       char *p;
+
+       sz = strtod(str, &p);
+       if (p == str)
+               return -1;
+
+       if (*p) {
+               if (strcasecmp(p, "kb") == 0 || strcasecmp(p, "k")==0)
+                       sz *= 1024;
+               else if (strcasecmp(p, "mb") == 0 || strcasecmp(p, "m")==0)
+                       sz *= 1024*1024;
+               else if (strcasecmp(p, "mbit") == 0)
+                       sz *= 1024*1024/8;
+               else if (strcasecmp(p, "kbit") == 0)
+                       sz *= 1024/8;
+               else if (strcasecmp(p, "b") != 0)
+                       return -1;
+       }
+
+       *size = sz;
+       return 0;
+}
+
+int get_size_and_cell(unsigned *size, int *cell_log, char *str)
+{
+       char * slash = strchr(str, '/');
+
+       if (slash)
+               *slash = 0;
+
+       if (get_size(size, str))
+               return -1;
+
+       if (slash) {
+               int cell;
+               int i;
+
+               if (get_integer(&cell, slash+1, 0))
+                       return -1;
+               *slash = '/';
+
+               for (i=0; i<32; i++) {
+                       if ((1<<i) == cell) {
+                               *cell_log = i;
+                               return 0;
+                       }
+               }
+               return -1;
+       }
+       return 0;
+}
+
+int print_size(char *buf, int len, __u32 sz)
+{
+       double tmp = sz;
+
+       if (sz >= 1024*1024 && fabs(1024*1024*rint(tmp/(1024*1024)) - sz) < 1024)
+               snprintf(buf, len, "%gMb", rint(tmp/(1024*1024)));
+       else if (sz >= 1024 && fabs(1024*rint(tmp/1024) - sz) < 16)
+               snprintf(buf, len, "%gKb", rint(tmp/1024));
+       else
+               snprintf(buf, len, "%ub", sz);
+       return 0;
+}
+
+char * sprint_size(__u32 size, char *buf)
+{
+       if (print_size(buf, SPRINT_BSIZE-1, size))
+               strcpy(buf, "???");
+       return buf;
+}
+
+int print_qdisc_handle(char *buf, int len, __u32 h)
+{
+       snprintf(buf, len, "%x:", TC_H_MAJ(h)>>16);
+       return 0;
+}
+
+char * sprint_qdisc_handle(__u32 h, char *buf)
+{
+       if (print_qdisc_handle(buf, SPRINT_BSIZE-1, h))
+               strcpy(buf, "???");
+       return buf;
+}
+
+
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..bdc88d1f8dfbeb4599dc00fd6a45ae4eeee944f3 100644 (file)
@@ -0,0 +1,57 @@
+#ifndef _TC_UTIL_H_
+#define _TC_UTIL_H_ 1
+
+#include <linux/pkt_sched.h>
+#include <linux/pkt_cls.h>
+#include "tc_core.h"
+
+struct qdisc_util
+{
+       struct qdisc_util *next;
+       char    id[16];
+       int     (*parse_qopt)(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n);
+       int     (*print_qopt)(struct qdisc_util *qu, FILE *f, struct rtattr *opt);
+       int     (*print_xstats)(struct qdisc_util *qu, FILE *f, struct rtattr *xstats);
+
+       int     (*parse_copt)(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n);
+       int     (*print_copt)(struct qdisc_util *qu, FILE *f, struct rtattr *opt);
+};
+
+struct filter_util
+{
+       struct filter_util *next;
+       char    id[16];
+       int     (*parse_fopt)(struct filter_util *qu, char *fhandle, int argc, char **argv, struct nlmsghdr *n);
+       int     (*print_fopt)(struct filter_util *qu, FILE *f, struct rtattr *opt, __u32 fhandle);
+};
+
+
+extern struct qdisc_util *get_qdisc_kind(char *str);
+extern struct filter_util *get_filter_kind(char *str);
+
+extern int get_qdisc_handle(__u32 *h, char *str);
+extern int get_rate(unsigned *rate, char *str);
+extern int get_size(unsigned *size, char *str);
+extern int get_size_and_cell(unsigned *size, int *cell_log, char *str);
+extern int get_usecs(unsigned *usecs, char *str);
+extern int print_rate(char *buf, int len, __u32 rate);
+extern int print_size(char *buf, int len, __u32 size);
+extern int print_qdisc_handle(char *buf, int len, __u32 h);
+extern int print_usecs(char *buf, int len, __u32 usecs);
+extern char * sprint_rate(__u32 rate, char *buf);
+extern char * sprint_size(__u32 size, char *buf);
+extern char * sprint_qdisc_handle(__u32 h, char *buf);
+extern char * sprint_tc_classid(__u32 h, char *buf);
+extern char * sprint_usecs(__u32 usecs, char *buf);
+
+extern void print_tcstats(FILE *fp, struct tc_stats *st);
+
+extern int get_tc_classid(__u32 *h, char *str);
+extern int print_tc_classid(char *buf, int len, __u32 h);
+extern char * sprint_tc_classid(__u32 h, char *buf);
+
+extern int tc_print_police(FILE *f, struct rtattr *tb);
+extern int parse_police(int *, char ***, int, struct nlmsghdr *);
+
+
+#endif