]> git.proxmox.com Git - mirror_iproute2.git/blob - doc/ip-cref.tex
Merge branch 'master' into net-2.6.25
[mirror_iproute2.git] / doc / ip-cref.tex
1 \documentstyle[12pt,twoside]{article}
2 \def\TITLE{IP Command Reference}
3 \input preamble
4 \begin{center}
5 \Large\bf IP Command Reference.
6 \end{center}
7
8
9 \begin{center}
10 { \large Alexey~N.~Kuznetsov } \\
11 \em Institute for Nuclear Research, Moscow \\
12 \verb|kuznet@ms2.inr.ac.ru| \\
13 \rm April 14, 1999
14 \end{center}
15
16 \vspace{5mm}
17
18 \tableofcontents
19
20 \newpage
21
22 \section{About this document}
23
24 This document presents a comprehensive description of the \verb|ip| utility
25 from the \verb|iproute2| package. It is not a tutorial or user's guide.
26 It is a {\em dictionary\/}, not explaining terms,
27 but translating them into other terms, which may also be unknown to the reader.
28 However, the document is self-contained and the reader, provided they have a
29 basic networking background, will find enough information
30 and examples to understand and configure Linux-2.2 IP and IPv6
31 networking.
32
33 This document is split into sections explaining \verb|ip| commands
34 and options, decrypting \verb|ip| output and containing a few examples.
35 More voluminous examples and some topics, which require more elaborate
36 discussion, are in the appendix.
37
38 The paragraphs beginning with NB contain side notes, warnings about
39 bugs and design drawbacks. They may be skipped at the first reading.
40
41 \section{{\tt ip} --- command syntax}
42
43 The generic form of an \verb|ip| command is:
44 \begin{verbatim}
45 ip [ OPTIONS ] OBJECT [ COMMAND [ ARGUMENTS ]]
46 \end{verbatim}
47 where \verb|OPTIONS| is a set of optional modifiers affecting the
48 general behaviour of the \verb|ip| utility or changing its output. All options
49 begin with the character \verb|'-'| and may be used in either long or abbreviated
50 forms. Currently, the following options are available:
51
52 \begin{itemize}
53 \item \verb|-V|, \verb|-Version|
54
55 --- print the version of the \verb|ip| utility and exit.
56
57
58 \item \verb|-s|, \verb|-stats|, \verb|-statistics|
59
60 --- output more information. If the option
61 appears twice or more, the amount of information increases.
62 As a rule, the information is statistics or some time values.
63
64
65 \item \verb|-f|, \verb|-family| followed by a protocol family
66 identifier: \verb|inet|, \verb|inet6| or \verb|link|.
67
68 --- enforce the protocol family to use. If the option is not present,
69 the protocol family is guessed from other arguments. If the rest of the command
70 line does not give enough information to guess the family, \verb|ip| falls back to the default
71 one, usually \verb|inet| or \verb|any|. \verb|link| is a special family
72 identifier meaning that no networking protocol is involved.
73
74 \item \verb|-4|
75
76 --- shortcut for \verb|-family inet|.
77
78 \item \verb|-6|
79
80 --- shortcut for \verb|-family inet6|.
81
82 \item \verb|-0|
83
84 --- shortcut for \verb|-family link|.
85
86
87 \item \verb|-o|, \verb|-oneline|
88
89 --- output each record on a single line, replacing line feeds
90 with the \verb|'\'| character. This is convenient when you want to
91 count records with \verb|wc| or to \verb|grep| the output. The trivial
92 script \verb|rtpr| converts the output back into readable form.
93
94 \item \verb|-r|, \verb|-resolve|
95
96 --- use the system's name resolver to print DNS names instead of
97 host addresses.
98
99 \begin{NB}
100 Do not use this option when reporting bugs or asking for advice.
101 \end{NB}
102 \begin{NB}
103 \verb|ip| never uses DNS to resolve names to addresses.
104 \end{NB}
105
106 \end{itemize}
107
108 \verb|OBJECT| is the object to manage or to get information about.
109 The object types currently understood by \verb|ip| are:
110
111 \begin{itemize}
112 \item \verb|link| --- network device
113 \item \verb|address| --- protocol (IP or IPv6) address on a device
114 \item \verb|neighbour| --- ARP or NDISC cache entry
115 \item \verb|route| --- routing table entry
116 \item \verb|rule| --- rule in routing policy database
117 \item \verb|maddress| --- multicast address
118 \item \verb|mroute| --- multicast routing cache entry
119 \item \verb|tunnel| --- tunnel over IP
120 \end{itemize}
121
122 Again, the names of all objects may be written in full or
123 abbreviated form, f.e.\ \verb|address| is abbreviated as \verb|addr|
124 or just \verb|a|.
125
126 \verb|COMMAND| specifies the action to perform on the object.
127 The set of possible actions depends on the object type.
128 As a rule, it is possible to \verb|add|, \verb|delete| and
129 \verb|show| (or \verb|list|) objects, but some objects
130 do not allow all of these operations or have some additional commands.
131 The \verb|help| command is available for all objects. It prints
132 out a list of available commands and argument syntax conventions.
133
134 If no command is given, some default command is assumed.
135 Usually it is \verb|list| or, if the objects of this class
136 cannot be listed, \verb|help|.
137
138 \verb|ARGUMENTS| is a list of arguments to the command.
139 The arguments depend on the command and object. There are two types of arguments:
140 {\em flags\/}, consisting of a single keyword, and {\em parameters\/},
141 consisting of a keyword followed by a value. For convenience,
142 each command has some {\em default parameter\/}
143 which may be omitted. F.e.\ parameter \verb|dev| is the default
144 for the {\tt ip link} command, so {\tt ip link ls eth0} is equivalent
145 to {\tt ip link ls dev eth0}.
146 In the command descriptions below such parameters
147 are distinguished with the marker: ``(default)''.
148
149 Almost all keywords may be abbreviated with several first (or even single)
150 letters. The shortcuts are convenient when \verb|ip| is used interactively,
151 but they are not recommended in scripts or when reporting bugs
152 or asking for advice. ``Officially'' allowed abbreviations are listed
153 in the document body.
154
155
156
157 \section{{\tt ip} --- error messages}
158
159 \verb|ip| may fail for one of the following reasons:
160
161 \begin{itemize}
162 \item
163 A syntax error on the command line: an unknown keyword, incorrectly formatted
164 IP address {\em et al\/}. In this case \verb|ip| prints an error message
165 and exits. As a rule, the error message will contain information
166 about the reason for the failure. Sometimes it also prints a help page.
167
168 \item
169 The arguments did not pass verification for self-consistency.
170
171 \item
172 \verb|ip| failed to compile a kernel request from the arguments
173 because the user didn't give enough information.
174
175 \item
176 The kernel returned an error to some syscall. In this case \verb|ip|
177 prints the error message, as it is output with \verb|perror(3)|,
178 prefixed with a comment and a syscall identifier.
179
180 \item
181 The kernel returned an error to some RTNETLINK request.
182 In this case \verb|ip| prints the error message, as it is output
183 with \verb|perror(3)| prefixed with ``RTNETLINK answers:''.
184
185 \end{itemize}
186
187 All the operations are atomic, i.e.\
188 if the \verb|ip| utility fails, it does not change anything
189 in the system. One harmful exception is \verb|ip link| command
190 (Sec.\ref{IP-LINK}, p.\pageref{IP-LINK}),
191 which may change only some of the device parameters given
192 on command line.
193
194 It is difficult to list all the error messages (especially
195 syntax errors). However, as a rule, their meaning is clear
196 from the context of the command.
197
198 The most common mistakes are:
199
200 \begin{enumerate}
201 \item Netlink is not configured in the kernel. The message is:
202 \begin{verbatim}
203 Cannot open netlink socket: Invalid value
204 \end{verbatim}
205
206 \item RTNETLINK is not configured in the kernel. In this case
207 one of the following messages may be printed, depending on the command:
208 \begin{verbatim}
209 Cannot talk to rtnetlink: Connection refused
210 Cannot send dump request: Connection refused
211 \end{verbatim}
212
213 \item The \verb|CONFIG_IP_MULTIPLE_TABLES| option was not selected
214 when configuring the kernel. In this case any attempt to use the
215 \verb|ip| \verb|rule| command will fail, f.e.
216 \begin{verbatim}
217 kuznet@kaiser $ ip rule list
218 RTNETLINK error: Invalid argument
219 dump terminated
220 \end{verbatim}
221
222 \end{enumerate}
223
224
225 \section{{\tt ip link} --- network device configuration}
226 \label{IP-LINK}
227
228 \paragraph{Object:} A \verb|link| is a network device and the corresponding
229 commands display and change the state of devices.
230
231 \paragraph{Commands:} \verb|set| and \verb|show| (or \verb|list|).
232
233 \subsection{{\tt ip link set} --- change device attributes}
234
235 \paragraph{Abbreviations:} \verb|set|, \verb|s|.
236
237 \paragraph{Arguments:}
238
239 \begin{itemize}
240 \item \verb|dev NAME| (default)
241
242 --- \verb|NAME| specifies the network device on which to operate.
243
244 \item \verb|up| and \verb|down|
245
246 --- change the state of the device to \verb|UP| or \verb|DOWN|.
247
248 \item \verb|arp on| or \verb|arp off|
249
250 --- change the \verb|NOARP| flag on the device.
251
252 \begin{NB}
253 This operation is {\em not allowed\/} if the device is in state \verb|UP|.
254 Though neither the \verb|ip| utility nor the kernel check for this condition.
255 You can get unpredictable results changing this flag while the
256 device is running.
257 \end{NB}
258
259 \item \verb|multicast on| or \verb|multicast off|
260
261 --- change the \verb|MULTICAST| flag on the device.
262
263 \item \verb|dynamic on| or \verb|dynamic off|
264
265 --- change the \verb|DYNAMIC| flag on the device.
266
267 \item \verb|name NAME|
268
269 --- change the name of the device. This operation is not
270 recommended if the device is running or has some addresses
271 already configured.
272
273 \item \verb|txqueuelen NUMBER| or \verb|txqlen NUMBER|
274
275 --- change the transmit queue length of the device.
276
277 \item \verb|mtu NUMBER|
278
279 --- change the MTU of the device.
280
281 \item \verb|address LLADDRESS|
282
283 --- change the station address of the interface.
284
285 \item \verb|broadcast LLADDRESS|, \verb|brd LLADDRESS| or \verb|peer LLADDRESS|
286
287 --- change the link layer broadcast address or the peer address when
288 the interface is \verb|POINTOPOINT|.
289
290 \vskip 1mm
291 \begin{NB}
292 For most devices (f.e.\ for Ethernet) changing the link layer
293 broadcast address will break networking.
294 Do not use it, if you do not understand what this operation really does.
295 \end{NB}
296
297 \end{itemize}
298
299 \vskip 1mm
300 \begin{NB}
301 The \verb|PROMISC| and \verb|ALLMULTI| flags are considered
302 obsolete and should not be changed administratively, though
303 the {\tt ip} utility will allow that.
304 \end{NB}
305
306 \paragraph{Warning:} If multiple parameter changes are requested,
307 \verb|ip| aborts immediately after any of the changes have failed.
308 This is the only case when \verb|ip| can move the system to
309 an unpredictable state. The solution is to avoid changing
310 several parameters with one {\tt ip link set} call.
311
312 \paragraph{Examples:}
313 \begin{itemize}
314 \item \verb|ip link set dummy address 00:00:00:00:00:01|
315
316 --- change the station address of the interface \verb|dummy|.
317
318 \item \verb|ip link set dummy up|
319
320 --- start the interface \verb|dummy|.
321
322 \end{itemize}
323
324
325 \subsection{{\tt ip link show} --- display device attributes}
326 \label{IP-LINK-SHOW}
327
328 \paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|,
329 \verb|l|.
330
331 \paragraph{Arguments:}
332 \begin{itemize}
333 \item \verb|dev NAME| (default)
334
335 --- \verb|NAME| specifies the network device to show.
336 If this argument is omitted all devices are listed.
337
338 \item \verb|up|
339
340 --- only display running interfaces.
341
342 \end{itemize}
343
344
345 \paragraph{Output format:}
346
347 \begin{verbatim}
348 kuznet@alisa:~ $ ip link ls eth0
349 3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
350 link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
351 kuznet@alisa:~ $ ip link ls sit0
352 5: sit0@NONE: <NOARP,UP> mtu 1480 qdisc noqueue
353 link/sit 0.0.0.0 brd 0.0.0.0
354 kuznet@alisa:~ $ ip link ls dummy
355 2: dummy: <BROADCAST,NOARP> mtu 1500 qdisc noop
356 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
357 kuznet@alisa:~ $
358 \end{verbatim}
359
360
361 The number before each colon is an {\em interface index\/} or {\em ifindex\/}.
362 This number uniquely identifies the interface. This is followed by the {\em interface name\/}
363 (\verb|eth0|, \verb|sit0| etc.). The interface name is also
364 unique at every given moment. However, the interface may disappear from the
365 list (f.e.\ when the corresponding driver module is unloaded) and another
366 one with the same name may be created later. Besides that,
367 the administrator may change the name of any device with
368 \verb|ip| \verb|link| \verb|set| \verb|name|
369 to make it more intelligible.
370
371 The interface name may have another name or \verb|NONE| appended
372 after the \verb|@| sign. This means that this device is bound to some other
373 device,
374 i.e.\ packets send through it are encapsulated and sent via the ``master''
375 device. If the name is \verb|NONE|, the master is unknown.
376
377 Then we see the interface {\em mtu\/} (``maximal transfer unit''). This determines
378 the maximal size of data which can be sent as a single packet over this interface.
379
380 {\em qdisc\/} (``queuing discipline'') shows the queuing algorithm used
381 on the interface. Particularly, \verb|noqueue| means that this interface
382 does not queue anything and \verb|noop| means that the interface is in blackhole
383 mode i.e.\ all packets sent to it are immediately discarded.
384 {\em qlen\/} is the default transmit queue length of the device measured
385 in packets.
386
387 The interface flags are summarized in the angle brackets.
388
389 \begin{itemize}
390 \item \verb|UP| --- the device is turned on. It is ready to accept
391 packets for transmission and it may inject into the kernel packets received
392 from other nodes on the network.
393
394 \item \verb|LOOPBACK| --- the interface does not communicate with other
395 hosts. All packets sent through it will be returned
396 and nothing but bounced packets can be received.
397
398 \item \verb|BROADCAST| --- the device has the facility to send packets
399 to all hosts sharing the same link. A typical example is an Ethernet link.
400
401 \item \verb|POINTOPOINT| --- the link has only two ends with one node
402 attached to each end. All packets sent to this link will reach the peer
403 and all packets received by us came from this single peer.
404
405 If neither \verb|LOOPBACK| nor \verb|BROADCAST| nor \verb|POINTOPOINT|
406 are set, the interface is assumed to be NMBA (Non-Broadcast Multi-Access).
407 This is the most generic type of device and the most complicated one, because
408 the host attached to a NBMA link has no means to send to anyone
409 without additionally configured information.
410
411 \item \verb|MULTICAST| --- is an advisory flag indicating that the interface
412 is aware of multicasting i.e.\ sending packets to some subset of neighbouring
413 nodes. Broadcasting is a particular case of multicasting, where the multicast
414 group consists of all nodes on the link. It is important to emphasize
415 that software {\em must not\/} interpret the absence of this flag as the inability
416 to use multicasting on this interface. Any \verb|POINTOPOINT| and
417 \verb|BROADCAST| link is multicasting by definition, because we have
418 direct access to all the neighbours and, hence, to any part of them.
419 Certainly, the use of high bandwidth multicast transfers is not recommended
420 on broadcast-only links because of high expense, but it is not strictly
421 prohibited.
422
423 \item \verb|PROMISC| --- the device listens to and feeds to the kernel all
424 traffic on the link even if it is not destined for us, not broadcasted
425 and not destined for a multicast group of which we are member. Usually
426 this mode exists only on broadcast links and is used by bridges and for network
427 monitoring.
428
429 \item \verb|ALLMULTI| --- the device receives all multicast packets
430 wandering on the link. This mode is used by multicast routers.
431
432 \item \verb|NOARP| --- this flag is different from the other ones. It has
433 no invariant value and its interpretation depends on the network protocols
434 involved. As a rule, it indicates that the device needs no address
435 resolution and that the software or hardware knows how to deliver packets
436 without any help from the protocol stacks.
437
438 \item \verb|DYNAMIC| --- is an advisory flag indicating that the interface is
439 dynamically created and destroyed.
440
441 \item \verb|SLAVE| --- this interface is bonded to some other interfaces
442 to share link capacities.
443
444 \end{itemize}
445
446 \vskip 1mm
447 \begin{NB}
448 There are other flags but they are either obsolete (\verb|NOTRAILERS|)
449 or not implemented (\verb|DEBUG|) or specific to some devices
450 (\verb|MASTER|, \verb|AUTOMEDIA| and \verb|PORTSEL|). We do not discuss
451 them here.
452 \end{NB}
453
454
455 The second line contains information on the link layer addresses
456 associated with the device. The first word (\verb|ether|, \verb|sit|)
457 defines the interface hardware type. This type determines the format and semantics
458 of the addresses and is logically part of the address.
459 The default format of the station address and the broadcast address
460 (or the peer address for pointopoint links) is a
461 sequence of hexadecimal bytes separated by colons, but some link
462 types may have their natural address format, f.e.\ addresses
463 of tunnels over IP are printed as dotted-quad IP addresses.
464
465 \vskip 1mm
466 \begin{NB}
467 NBMA links have no well-defined broadcast or peer address,
468 however this field may contain useful information, f.e.\
469 about the address of broadcast relay or about the address of the ARP server.
470 \end{NB}
471 \begin{NB}
472 Multicast addresses are not shown by this command, see
473 \verb|ip maddr ls| in~Sec.\ref{IP-MADDR} (p.\pageref{IP-MADDR} of this
474 document).
475 \end{NB}
476
477
478 \paragraph{Statistics:} With the \verb|-statistics| option, \verb|ip| also
479 prints interface statistics:
480
481 \begin{verbatim}
482 kuznet@alisa:~ $ ip -s link ls eth0
483 3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
484 link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
485 RX: bytes packets errors dropped overrun mcast
486 2449949362 2786187 0 0 0 0
487 TX: bytes packets errors dropped carrier collsns
488 178558497 1783945 332 0 332 35172
489 kuznet@alisa:~ $
490 \end{verbatim}
491 \verb|RX:| and \verb|TX:| lines summarize receiver and transmitter
492 statistics. They contain:
493 \begin{itemize}
494 \item \verb|bytes| --- the total number of bytes received or transmitted
495 on the interface. This number wraps when the maximal length of the data type
496 natural for the architecture is exceeded, so continuous monitoring requires
497 a user level daemon snapping it periodically.
498 \item \verb|packets| --- the total number of packets received or transmitted
499 on the interface.
500 \item \verb|errors| --- the total number of receiver or transmitter errors.
501 \item \verb|dropped| --- the total number of packets dropped due to lack
502 of resources.
503 \item \verb|overrun| --- the total number of receiver overruns resulting
504 in dropped packets. As a rule, if the interface is overrun, it means
505 serious problems in the kernel or that your machine is too slow
506 for this interface.
507 \item \verb|mcast| --- the total number of received multicast packets. This option
508 is only supported by a few devices.
509 \item \verb|carrier| --- total number of link media failures f.e.\ because
510 of lost carrier.
511 \item \verb|collsns| --- the total number of collision events
512 on Ethernet-like media. This number may have a different sense on other
513 link types.
514 \item \verb|compressed| --- the total number of compressed packets. This is
515 available only for links using VJ header compression.
516 \end{itemize}
517
518
519 If the \verb|-s| option is entered twice or more,
520 \verb|ip| prints more detailed statistics on receiver
521 and transmitter errors.
522
523 \begin{verbatim}
524 kuznet@alisa:~ $ ip -s -s link ls eth0
525 3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
526 link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
527 RX: bytes packets errors dropped overrun mcast
528 2449949362 2786187 0 0 0 0
529 RX errors: length crc frame fifo missed
530 0 0 0 0 0
531 TX: bytes packets errors dropped carrier collsns
532 178558497 1783945 332 0 332 35172
533 TX errors: aborted fifo window heartbeat
534 0 0 0 332
535 kuznet@alisa:~ $
536 \end{verbatim}
537 These error names are pure Ethernetisms. Other devices
538 may have non zero values in these fields but they may be
539 interpreted differently.
540
541
542 \section{{\tt ip address} --- protocol address management}
543
544 \paragraph{Abbreviations:} \verb|address|, \verb|addr|, \verb|a|.
545
546 \paragraph{Object:} The \verb|address| is a protocol (IP or IPv6) address attached
547 to a network device. Each device must have at least one address
548 to use the corresponding protocol. It is possible to have several
549 different addresses attached to one device. These addresses are not
550 discriminated, so that the term {\em alias\/} is not quite appropriate
551 for them and we do not use it in this document.
552
553 The \verb|ip addr| command displays addresses and their properties,
554 adds new addresses and deletes old ones.
555
556 \paragraph{Commands:} \verb|add|, \verb|delete|, \verb|flush| and \verb|show|
557 (or \verb|list|).
558
559
560 \subsection{{\tt ip address add} --- add a new protocol address}
561 \label{IP-ADDR-ADD}
562
563 \paragraph{Abbreviations:} \verb|add|, \verb|a|.
564
565 \paragraph{Arguments:}
566
567 \begin{itemize}
568 \item \verb|dev NAME|
569
570 \noindent--- the name of the device to add the address to.
571
572 \item \verb|local ADDRESS| (default)
573
574 --- the address of the interface. The format of the address depends
575 on the protocol. It is a dotted quad for IP and a sequence of hexadecimal halfwords
576 separated by colons for IPv6. The \verb|ADDRESS| may be followed by
577 a slash and a decimal number which encodes the network prefix length.
578
579
580 \item \verb|peer ADDRESS|
581
582 --- the address of the remote endpoint for pointopoint interfaces.
583 Again, the \verb|ADDRESS| may be followed by a slash and a decimal number,
584 encoding the network prefix length. If a peer address is specified,
585 the local address {\em cannot\/} have a prefix length. The network prefix is associated
586 with the peer rather than with the local address.
587
588
589 \item \verb|broadcast ADDRESS|
590
591 --- the broadcast address on the interface.
592
593 It is possible to use the special symbols \verb|'+'| and \verb|'-'|
594 instead of the broadcast address. In this case, the broadcast address
595 is derived by setting/resetting the host bits of the interface prefix.
596
597 \vskip 1mm
598 \begin{NB}
599 Unlike \verb|ifconfig|, the \verb|ip| utility {\em does not\/} set any broadcast
600 address unless explicitly requested.
601 \end{NB}
602
603
604 \item \verb|label NAME|
605
606 --- Each address may be tagged with a label string.
607 In order to preserve compatibility with Linux-2.0 net aliases,
608 this string must coincide with the name of the device or must be prefixed
609 with the device name followed by colon.
610
611
612 \item \verb|scope SCOPE_VALUE|
613
614 --- the scope of the area where this address is valid.
615 The available scopes are listed in file \verb|/etc/iproute2/rt_scopes|.
616 Predefined scope values are:
617
618 \begin{itemize}
619 \item \verb|global| --- the address is globally valid.
620 \item \verb|site| --- (IPv6 only) the address is site local,
621 i.e.\ it is valid inside this site.
622 \item \verb|link| --- the address is link local, i.e.\
623 it is valid only on this device.
624 \item \verb|host| --- the address is valid only inside this host.
625 \end{itemize}
626
627 Appendix~\ref{ADDR-SEL} (p.\pageref{ADDR-SEL} of this document)
628 contains more details on address scopes.
629
630 \end{itemize}
631
632 \paragraph{Examples:}
633 \begin{itemize}
634 \item \verb|ip addr add 127.0.0.1/8 dev lo brd + scope host|
635
636 --- add the usual loopback address to the loopback device.
637
638 \item \verb|ip addr add 10.0.0.1/24 brd + dev eth0 label eth0:Alias|
639
640 --- add the address 10.0.0.1 with prefix length 24 (i.e.\ netmask
641 \verb|255.255.255.0|), standard broadcast and label \verb|eth0:Alias|
642 to the interface \verb|eth0|.
643 \end{itemize}
644
645
646 \subsection{{\tt ip address delete} --- delete a protocol address}
647
648 \paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
649
650 \paragraph{Arguments:} coincide with the arguments of \verb|ip addr add|.
651 The device name is a required argument. The rest are optional.
652 If no arguments are given, the first address is deleted.
653
654 \paragraph{Examples:}
655 \begin{itemize}
656 \item \verb|ip addr del 127.0.0.1/8 dev lo|
657
658 --- deletes the loopback address from the loopback device.
659 It would be best not to repeat this experiment.
660
661 \item Disable IP on the interface \verb|eth0|:
662 \begin{verbatim}
663 while ip -f inet addr del dev eth0; do
664 : nothing
665 done
666 \end{verbatim}
667 Another method to disable IP on an interface using {\tt ip addr flush}
668 may be found in sec.\ref{IP-ADDR-FLUSH}, p.\pageref{IP-ADDR-FLUSH}.
669
670 \end{itemize}
671
672
673 \subsection{{\tt ip address show} --- display protocol addresses}
674
675 \paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|,
676 \verb|l|.
677
678 \paragraph{Arguments:}
679
680 \begin{itemize}
681 \item \verb|dev NAME| (default)
682
683 --- the name of the device.
684
685 \item \verb|scope SCOPE_VAL|
686
687 --- only list addresses with this scope.
688
689 \item \verb|to PREFIX|
690
691 --- only list addresses matching this prefix.
692
693 \item \verb|label PATTERN|
694
695 --- only list addresses with labels matching the \verb|PATTERN|.
696 \verb|PATTERN| is a usual shell style pattern.
697
698
699 \item \verb|dynamic| and \verb|permanent|
700
701 --- (IPv6 only) only list addresses installed due to stateless
702 address configuration or only list permanent (not dynamic) addresses.
703
704 \item \verb|tentative|
705
706 --- (IPv6 only) only list addresses which did not pass duplicate
707 address detection.
708
709 \item \verb|deprecated|
710
711 --- (IPv6 only) only list deprecated addresses.
712
713
714 \item \verb|primary| and \verb|secondary|
715
716 --- only list primary (or secondary) addresses.
717
718 \end{itemize}
719
720
721 \paragraph{Output format:}
722
723 \begin{verbatim}
724 kuznet@alisa:~ $ ip addr ls eth0
725 3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
726 link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
727 inet 193.233.7.90/24 brd 193.233.7.255 scope global eth0
728 inet6 3ffe:2400:0:1:2a0:ccff:fe66:1878/64 scope global dynamic
729 valid_lft forever preferred_lft 604746sec
730 inet6 fe80::2a0:ccff:fe66:1878/10 scope link
731 kuznet@alisa:~ $
732 \end{verbatim}
733
734 The first two lines coincide with the output of \verb|ip link ls|.
735 It is natural to interpret link layer addresses
736 as addresses of the protocol family \verb|AF_PACKET|.
737
738 Then the list of IP and IPv6 addresses follows, accompanied by
739 additional address attributes: scope value (see Sec.\ref{IP-ADDR-ADD},
740 p.\pageref{IP-ADDR-ADD} above), flags and the address label.
741
742 Address flags are set by the kernel and cannot be changed
743 administratively. Currently, the following flags are defined:
744
745 \begin{enumerate}
746 \item \verb|secondary|
747
748 --- the address is not used when selecting the default source address
749 of outgoing packets (Cf.\ Appendix~\ref{ADDR-SEL}, p.\pageref{ADDR-SEL}.).
750 An IP address becomes secondary if another address with the same
751 prefix bits already exists. The first address is primary.
752 It is the leader of the group of all secondary addresses. When the leader
753 is deleted, all secondaries are purged too.
754
755
756 \item \verb|dynamic|
757
758 --- the address was created due to stateless autoconfiguration~\cite{RFC-ADDRCONF}.
759 In this case the output also contains information on times, when
760 the address is still valid. After \verb|preferred_lft| expires the address is
761 moved to the deprecated state. After \verb|valid_lft| expires the address
762 is finally invalidated.
763
764 \item \verb|deprecated|
765
766 --- the address is deprecated, i.e.\ it is still valid, but cannot
767 be used by newly created connections.
768
769 \item \verb|tentative|
770
771 --- the address is not used because duplicate address detection~\cite{RFC-ADDRCONF}
772 is still not complete or failed.
773
774 \end{enumerate}
775
776
777 \subsection{{\tt ip address flush} --- flush protocol addresses}
778 \label{IP-ADDR-FLUSH}
779
780 \paragraph{Abbreviations:} \verb|flush|, \verb|f|.
781
782 \paragraph{Description:}This command flushes the protocol addresses
783 selected by some criteria.
784
785 \paragraph{Arguments:} This command has the same arguments as \verb|show|.
786 The difference is that it does not run when no arguments are given.
787
788 \paragraph{Warning:} This command (and other \verb|flush| commands
789 described below) is pretty dangerous. If you make a mistake, it will
790 not forgive it, but will cruelly purge all the addresses.
791
792 \paragraph{Statistics:} With the \verb|-statistics| option, the command
793 becomes verbose. It prints out the number of deleted addresses and the number
794 of rounds made to flush the address list. If this option is given
795 twice, \verb|ip addr flush| also dumps all the deleted addresses
796 in the format described in the previous subsection.
797
798 \paragraph{Example:} Delete all the addresses from the private network
799 10.0.0.0/8:
800 \begin{verbatim}
801 netadm@amber:~ # ip -s -s a f to 10/8
802 2: dummy inet 10.7.7.7/16 brd 10.7.255.255 scope global dummy
803 3: eth0 inet 10.10.7.7/16 brd 10.10.255.255 scope global eth0
804 4: eth1 inet 10.8.7.7/16 brd 10.8.255.255 scope global eth1
805
806 *** Round 1, deleting 3 addresses ***
807 *** Flush is complete after 1 round ***
808 netadm@amber:~ #
809 \end{verbatim}
810 Another instructive example is disabling IP on all the Ethernets:
811 \begin{verbatim}
812 netadm@amber:~ # ip -4 addr flush label "eth*"
813 \end{verbatim}
814 And the last example shows how to flush all the IPv6 addresses
815 acquired by the host from stateless address autoconfiguration
816 after you enabled forwarding or disabled autoconfiguration.
817 \begin{verbatim}
818 netadm@amber:~ # ip -6 addr flush dynamic
819 \end{verbatim}
820
821
822
823 \section{{\tt ip neighbour} --- neighbour/arp tables management}
824
825 \paragraph{Abbreviations:} \verb|neighbour|, \verb|neighbor|, \verb|neigh|,
826 \verb|n|.
827
828 \paragraph{Object:} \verb|neighbour| objects establish bindings between protocol
829 addresses and link layer addresses for hosts sharing the same link.
830 Neighbour entries are organized into tables. The IPv4 neighbour table
831 is known by another name --- the ARP table.
832
833 The corresponding commands display neighbour bindings
834 and their properties, add new neighbour entries and delete old ones.
835
836 \paragraph{Commands:} \verb|add|, \verb|change|, \verb|replace|,
837 \verb|delete|, \verb|flush| and \verb|show| (or \verb|list|).
838
839 \paragraph{See also:} Appendix~\ref{PROXY-NEIGH}, p.\pageref{PROXY-NEIGH}
840 describes how to manage proxy ARP/NDISC with the \verb|ip| utility.
841
842
843 \subsection{{\tt ip neighbour add} --- add a new neighbour entry\\
844 {\tt ip neighbour change} --- change an existing entry\\
845 {\tt ip neighbour replace} --- add a new entry or change an existing one}
846
847 \paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
848 \verb|replace|, \verb|repl|.
849
850 \paragraph{Description:} These commands create new neighbour records
851 or update existing ones.
852
853 \paragraph{Arguments:}
854
855 \begin{itemize}
856 \item \verb|to ADDRESS| (default)
857
858 --- the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
859
860 \item \verb|dev NAME|
861
862 --- the interface to which this neighbour is attached.
863
864
865 \item \verb|lladdr LLADDRESS|
866
867 --- the link layer address of the neighbour. \verb|LLADDRESS| can also be
868 \verb|null|.
869
870 \item \verb|nud NUD_STATE|
871
872 --- the state of the neighbour entry. \verb|nud| is an abbreviation for ``Neighbour
873 Unreachability Detection''. The state can take one of the following values:
874
875 \begin{enumerate}
876 \item \verb|permanent| --- the neighbour entry is valid forever and can be only be removed
877 administratively.
878 \item \verb|noarp| --- the neighbour entry is valid. No attempts to validate
879 this entry will be made but it can be removed when its lifetime expires.
880 \item \verb|reachable| --- the neighbour entry is valid until the reachability
881 timeout expires.
882 \item \verb|stale| --- the neighbour entry is valid but suspicious.
883 This option to \verb|ip neigh| does not change the neighbour state if
884 it was valid and the address is not changed by this command.
885 \end{enumerate}
886
887 \end{itemize}
888
889 \paragraph{Examples:}
890 \begin{itemize}
891 \item \verb|ip neigh add 10.0.0.3 lladdr 0:0:0:0:0:1 dev eth0 nud perm|
892
893 --- add a permanent ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|.
894
895 \item \verb|ip neigh chg 10.0.0.3 dev eth0 nud reachable|
896
897 --- change its state to \verb|reachable|.
898 \end{itemize}
899
900
901 \subsection{{\tt ip neighbour delete} --- delete a neighbour entry}
902
903 \paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
904
905 \paragraph{Description:} This command invalidates a neighbour entry.
906
907 \paragraph{Arguments:} The arguments are the same as with \verb|ip neigh add|,
908 except that \verb|lladdr| and \verb|nud| are ignored.
909
910
911 \paragraph{Example:}
912 \begin{itemize}
913 \item \verb|ip neigh del 10.0.0.3 dev eth0|
914
915 --- invalidate an ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|.
916
917 \end{itemize}
918
919 \begin{NB}
920 The deleted neighbour entry will not disappear from the tables
921 immediately. If it is in use it cannot be deleted until the last
922 client releases it. Otherwise it will be destroyed during
923 the next garbage collection.
924 \end{NB}
925
926
927 \paragraph{Warning:} Attempts to delete or manually change
928 a \verb|noarp| entry created by the kernel may result in unpredictable behaviour.
929 Particularly, the kernel may try to resolve this address even
930 on a \verb|NOARP| interface or if the address is multicast or broadcast.
931
932
933 \subsection{{\tt ip neighbour show} --- list neighbour entries}
934
935 \paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|.
936
937 \paragraph{Description:}This commands displays neighbour tables.
938
939 \paragraph{Arguments:}
940
941 \begin{itemize}
942
943 \item \verb|to ADDRESS| (default)
944
945 --- the prefix selecting the neighbours to list.
946
947 \item \verb|dev NAME|
948
949 --- only list the neighbours attached to this device.
950
951 \item \verb|unused|
952
953 --- only list neighbours which are not currently in use.
954
955 \item \verb|nud NUD_STATE|
956
957 --- only list neighbour entries in this state. \verb|NUD_STATE| takes
958 values listed below or the special value \verb|all| which means all states.
959 This option may occur more than once. If this option is absent, \verb|ip|
960 lists all entries except for \verb|none| and \verb|noarp|.
961
962 \end{itemize}
963
964
965 \paragraph{Output format:}
966
967 \begin{verbatim}
968 kuznet@alisa:~ $ ip neigh ls
969 :: dev lo lladdr 00:00:00:00:00:00 nud noarp
970 fe80::200:cff:fe76:3f85 dev eth0 lladdr 00:00:0c:76:3f:85 router \
971 nud stale
972 0.0.0.0 dev lo lladdr 00:00:00:00:00:00 nud noarp
973 193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 nud reachable
974 193.233.7.85 dev eth0 lladdr 00:e0:1e:63:39:00 nud stale
975 kuznet@alisa:~ $
976 \end{verbatim}
977
978 The first word of each line is the protocol address of the neighbour.
979 Then the device name follows. The rest of the line describes the contents of
980 the neighbour entry identified by the pair (device, address).
981
982 \verb|lladdr| is the link layer address of the neighbour.
983
984 \verb|nud| is the state of the ``neighbour unreachability detection'' machine
985 for this entry. The detailed description of the neighbour
986 state machine can be found in~\cite{RFC-NDISC}. Here is the full list
987 of the states with short descriptions:
988
989 \begin{enumerate}
990 \item\verb|none| --- the state of the neighbour is void.
991 \item\verb|incomplete| --- the neighbour is in the process of resolution.
992 \item\verb|reachable| --- the neighbour is valid and apparently reachable.
993 \item\verb|stale| --- the neighbour is valid, but is probably already
994 unreachable, so the kernel will try to check it at the first transmission.
995 \item\verb|delay| --- a packet has been sent to the stale neighbour and the kernel is waiting
996 for confirmation.
997 \item\verb|probe| --- the delay timer expired but no confirmation was received.
998 The kernel has started to probe the neighbour with ARP/NDISC messages.
999 \item\verb|failed| --- resolution has failed.
1000 \item\verb|noarp| --- the neighbour is valid. No attempts to check the entry
1001 will be made.
1002 \item\verb|permanent| --- it is a \verb|noarp| entry, but only the administrator
1003 may remove the entry from the neighbour table.
1004 \end{enumerate}
1005
1006 The link layer address is valid in all states except for \verb|none|,
1007 \verb|failed| and \verb|incomplete|.
1008
1009 IPv6 neighbours can be marked with the additional flag \verb|router|
1010 which means that the neighbour introduced itself as an IPv6 router~\cite{RFC-NDISC}.
1011
1012 \paragraph{Statistics:} The \verb|-statistics| option displays some usage
1013 statistics, f.e.\
1014
1015 \begin{verbatim}
1016 kuznet@alisa:~ $ ip -s n ls 193.233.7.254
1017 193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
1018 nud reachable
1019 kuznet@alisa:~ $
1020 \end{verbatim}
1021
1022 Here \verb|ref| is the number of users of this entry
1023 and \verb|used| is a triplet of time intervals in seconds
1024 separated by slashes. In this case they show that:
1025
1026 \begin{enumerate}
1027 \item the entry was used 12 seconds ago.
1028 \item the entry was confirmed 13 seconds ago.
1029 \item the entry was updated 20 seconds ago.
1030 \end{enumerate}
1031
1032 \subsection{{\tt ip neighbour flush} --- flush neighbour entries}
1033
1034 \paragraph{Abbreviations:} \verb|flush|, \verb|f|.
1035
1036 \paragraph{Description:}This command flushes neighbour tables, selecting
1037 entries to flush by some criteria.
1038
1039 \paragraph{Arguments:} This command has the same arguments as \verb|show|.
1040 The differences are that it does not run when no arguments are given,
1041 and that the default neighbour states to be flushed do not include
1042 \verb|permanent| and \verb|noarp|.
1043
1044
1045 \paragraph{Statistics:} With the \verb|-statistics| option, the command
1046 becomes verbose. It prints out the number of deleted neighbours and the number
1047 of rounds made to flush the neighbour table. If the option is given
1048 twice, \verb|ip neigh flush| also dumps all the deleted neighbours
1049 in the format described in the previous subsection.
1050
1051 \paragraph{Example:}
1052 \begin{verbatim}
1053 netadm@alisa:~ # ip -s -s n f 193.233.7.254
1054 193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
1055 nud reachable
1056
1057 *** Round 1, deleting 1 entries ***
1058 *** Flush is complete after 1 round ***
1059 netadm@alisa:~ #
1060 \end{verbatim}
1061
1062
1063 \section{{\tt ip route} --- routing table management}
1064 \label{IP-ROUTE}
1065
1066 \paragraph{Abbreviations:} \verb|route|, \verb|ro|, \verb|r|.
1067
1068 \paragraph{Object:} \verb|route| entries in the kernel routing tables keep
1069 information about paths to other networked nodes.
1070
1071 Each route entry has a {\em key\/} consisting of a {\em prefix\/}
1072 (i.e.\ a pair containing a network address and the length of its mask) and,
1073 optionally, the TOS value. An IP packet matches the route if the highest
1074 bits of its destination address are equal to the route prefix at least
1075 up to the prefix length and if the TOS of the route is zero or equal to
1076 the TOS of the packet.
1077
1078 If several routes match the packet, the following pruning rules
1079 are used to select the best one (see~\cite{RFC1812}):
1080 \begin{enumerate}
1081 \item The longest matching prefix is selected. All shorter ones
1082 are dropped.
1083
1084 \item If the TOS of some route with the longest prefix is equal to the TOS
1085 of the packet, the routes with different TOS are dropped.
1086
1087 If no exact TOS match was found and routes with TOS=0 exist,
1088 the rest of routes are pruned.
1089
1090 Otherwise, the route lookup fails.
1091
1092 \item If several routes remain after the previous steps, then
1093 the routes with the best preference values are selected.
1094
1095 \item If we still have several routes, then the {\em first\/} of them
1096 is selected.
1097
1098 \begin{NB}
1099 Note the ambiguity of the last step. Unfortunately, Linux
1100 historically allows such a bizarre situation. The sense of the
1101 word ``first'' depends on the order of route additions and it is practically
1102 impossible to maintain a bundle of such routes in this order.
1103 \end{NB}
1104
1105 For simplicity we will limit ourselves to the case where such a situation
1106 is impossible and routes are uniquely identified by the triplet
1107 \{prefix, tos, preference\}. Actually, it is impossible to create
1108 non-unique routes with \verb|ip| commands described in this section.
1109
1110 One useful exception to this rule is the default route on non-forwarding
1111 hosts. It is ``officially'' allowed to have several fallback routes
1112 when several routers are present on directly connected networks.
1113 In this case, Linux-2.2 makes ``dead gateway detection''~\cite{RFC1122}
1114 controlled by neighbour unreachability detection and by advice
1115 from transport protocols to select a working router, so the order
1116 of the routes is not essential. However, in this case,
1117 fiddling with default routes manually is not recommended. Use the Router Discovery
1118 protocol (see Appendix~\ref{EXAMPLE-SETUP}, p.\pageref{EXAMPLE-SETUP})
1119 instead. Actually, Linux-2.2 IPv6 does not give user level applications
1120 any access to default routes.
1121 \end{enumerate}
1122
1123 Certainly, the steps above are not performed exactly
1124 in this sequence. Instead, the routing table in the kernel is kept
1125 in some data structure to achieve the final result
1126 with minimal cost. However, not depending on a particular
1127 routing algorithm implemented in the kernel, we can summarize
1128 the statements above as: a route is identified by the triplet
1129 \{prefix, tos, preference\}. This {\em key\/} lets us locate
1130 the route in the routing table.
1131
1132 \paragraph{Route attributes:} Each route key refers to a routing
1133 information record containing
1134 the data required to deliver IP packets (f.e.\ output device and
1135 next hop router) and some optional attributes (f.e. the path MTU or
1136 the preferred source address when communicating with this destination).
1137 These attributes are described in the following subsection.
1138
1139 \paragraph{Route types:} \label{IP-ROUTE-TYPES}
1140 It is important that the set
1141 of required and optional attributes depend on the route {\em type\/}.
1142 The most important route type
1143 is \verb|unicast|. It describes real paths to other hosts.
1144 As a rule, common routing tables contain only such routes. However,
1145 there are other types of routes with different semantics. The
1146 full list of types understood by Linux-2.2 is:
1147 \begin{itemize}
1148 \item \verb|unicast| --- the route entry describes real paths to the
1149 destinations covered by the route prefix.
1150 \item \verb|unreachable| --- these destinations are unreachable. Packets
1151 are discarded and the ICMP message {\em host unreachable\/} is generated.
1152 The local senders get an \verb|EHOSTUNREACH| error.
1153 \item \verb|blackhole| --- these destinations are unreachable. Packets
1154 are discarded silently. The local senders get an \verb|EINVAL| error.
1155 \item \verb|prohibit| --- these destinations are unreachable. Packets
1156 are discarded and the ICMP message {\em communication administratively
1157 prohibited\/} is generated. The local senders get an \verb|EACCES| error.
1158 \item \verb|local| --- the destinations are assigned to this
1159 host. The packets are looped back and delivered locally.
1160 \item \verb|broadcast| --- the destinations are broadcast addresses.
1161 The packets are sent as link broadcasts.
1162 \item \verb|throw| --- a special control route used together with policy
1163 rules (see sec.\ref{IP-RULE}, p.\pageref{IP-RULE}). If such a route is selected, lookup
1164 in this table is terminated pretending that no route was found.
1165 Without policy routing it is equivalent to the absence of the route in the routing
1166 table. The packets are dropped and the ICMP message {\em net unreachable\/}
1167 is generated. The local senders get an \verb|ENETUNREACH| error.
1168 \item \verb|nat| --- a special NAT route. Destinations covered by the prefix
1169 are considered to be dummy (or external) addresses which require translation
1170 to real (or internal) ones before forwarding. The addresses to translate to
1171 are selected with the attribute \verb|via|. More about NAT is
1172 in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
1173 \item \verb|anycast| --- ({\em not implemented\/}) the destinations are
1174 {\em anycast\/} addresses assigned to this host. They are mainly equivalent
1175 to \verb|local| with one difference: such addresses are invalid when used
1176 as the source address of any packet.
1177 \item \verb|multicast| --- a special type used for multicast routing.
1178 It is not present in normal routing tables.
1179 \end{itemize}
1180
1181 \paragraph{Route tables:} Linux-2.2 can pack routes into several routing
1182 tables identified by a number in the range from 1 to 255 or by
1183 name from the file \verb|/etc/iproute2/rt_tables|. By default all normal
1184 routes are inserted into the \verb|main| table (ID 254) and the kernel only uses
1185 this table when calculating routes.
1186
1187 Actually, one other table always exists, which is invisible but
1188 even more important. It is the \verb|local| table (ID 255). This table
1189 consists of routes for local and broadcast addresses. The kernel maintains
1190 this table automatically and the administrator usually need not modify it
1191 or even look at it.
1192
1193 The multiple routing tables enter the game when {\em policy routing\/}
1194 is used. See sec.\ref{IP-RULE}, p.\pageref{IP-RULE}.
1195 In this case, the table identifier effectively becomes
1196 one more parameter, which should be added to the triplet
1197 \{prefix, tos, preference\} to uniquely identify the route.
1198
1199
1200 \subsection{{\tt ip route add} --- add a new route\\
1201 {\tt ip route change} --- change a route\\
1202 {\tt ip route replace} --- change a route or add a new one}
1203 \label{IP-ROUTE-ADD}
1204
1205 \paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
1206 \verb|replace|, \verb|repl|.
1207
1208
1209 \paragraph{Arguments:}
1210 \begin{itemize}
1211 \item \verb|to PREFIX| or \verb|to TYPE PREFIX| (default)
1212
1213 --- the destination prefix of the route. If \verb|TYPE| is omitted,
1214 \verb|ip| assumes type \verb|unicast|. Other values of \verb|TYPE|
1215 are listed above. \verb|PREFIX| is an IP or IPv6 address optionally followed
1216 by a slash and the prefix length. If the length of the prefix is missing,
1217 \verb|ip| assumes a full-length host route. There is also a special
1218 \verb|PREFIX| --- \verb|default| --- which is equivalent to IP \verb|0/0| or
1219 to IPv6 \verb|::/0|.
1220
1221 \item \verb|tos TOS| or \verb|dsfield TOS|
1222
1223 --- the Type Of Service (TOS) key. This key has no associated mask and
1224 the longest match is understood as: First, compare the TOS
1225 of the route and of the packet. If they are not equal, then the packet
1226 may still match a route with a zero TOS. \verb|TOS| is either an 8 bit hexadecimal
1227 number or an identifier from {\tt /etc/iproute2/rt\_dsfield}.
1228
1229
1230 \item \verb|metric NUMBER| or \verb|preference NUMBER|
1231
1232 --- the preference value of the route. \verb|NUMBER| is an arbitrary 32bit number.
1233
1234 \item \verb|table TABLEID|
1235
1236 --- the table to add this route to.
1237 \verb|TABLEID| may be a number or a string from the file
1238 \verb|/etc/iproute2/rt_tables|. If this parameter is omitted,
1239 \verb|ip| assumes the \verb|main| table, with the exception of
1240 \verb|local|, \verb|broadcast| and \verb|nat| routes, which are
1241 put into the \verb|local| table by default.
1242
1243 \item \verb|dev NAME|
1244
1245 --- the output device name.
1246
1247 \item \verb|via ADDRESS|
1248
1249 --- the address of the nexthop router. Actually, the sense of this field depends
1250 on the route type. For normal \verb|unicast| routes it is either the true nexthop
1251 router or, if it is a direct route installed in BSD compatibility mode,
1252 it can be a local address of the interface.
1253 For NAT routes it is the first address of the block of translated IP destinations.
1254
1255 \item \verb|src ADDRESS|
1256
1257 --- the source address to prefer when sending to the destinations
1258 covered by the route prefix.
1259
1260 \item \verb|realm REALMID|
1261
1262 --- the realm to which this route is assigned.
1263 \verb|REALMID| may be a number or a string from the file
1264 \verb|/etc/iproute2/rt_realms|. Sec.\ref{RT-REALMS} (p.\pageref{RT-REALMS})
1265 contains more information on realms.
1266
1267 \item \verb|mtu MTU| or \verb|mtu lock MTU|
1268
1269 --- the MTU along the path to the destination. If the modifier \verb|lock| is
1270 not used, the MTU may be updated by the kernel due to Path MTU Discovery.
1271 If the modifier \verb|lock| is used, no path MTU discovery will be tried,
1272 all packets will be sent without the DF bit in IPv4 case
1273 or fragmented to MTU for IPv6.
1274
1275 \item \verb|window NUMBER|
1276
1277 --- the maximal window for TCP to advertise to these destinations,
1278 measured in bytes. It limits maximal data bursts that our TCP
1279 peers are allowed to send to us.
1280
1281 \item \verb|rtt NUMBER|
1282
1283 --- the initial RTT (``Round Trip Time'') estimate.
1284
1285
1286 \item \verb|rttvar NUMBER|
1287
1288 --- \threeonly the initial RTT variance estimate.
1289
1290
1291 \item \verb|ssthresh NUMBER|
1292
1293 --- \threeonly an estimate for the initial slow start threshold.
1294
1295
1296 \item \verb|cwnd NUMBER|
1297
1298 --- \threeonly the clamp for congestion window. It is ignored if the \verb|lock|
1299 flag is not used.
1300
1301
1302 \item \verb|advmss NUMBER|
1303
1304 --- \threeonly the MSS (``Maximal Segment Size'') to advertise to these
1305 destinations when establishing TCP connections. If it is not given,
1306 Linux uses a default value calculated from the first hop device MTU.
1307
1308 \begin{NB}
1309 If the path to these destination is asymmetric, this guess may be wrong.
1310 \end{NB}
1311
1312 \item \verb|reordering NUMBER|
1313
1314 --- \threeonly Maximal reordering on the path to this destination.
1315 If it is not given, Linux uses the value selected with \verb|sysctl|
1316 variable \verb|net/ipv4/tcp_reordering|.
1317
1318
1319
1320 \item \verb|nexthop NEXTHOP|
1321
1322 --- the nexthop of a multipath route. \verb|NEXTHOP| is a complex value
1323 with its own syntax similar to the top level argument lists:
1324 \begin{itemize}
1325 \item \verb|via ADDRESS| is the nexthop router.
1326 \item \verb|dev NAME| is the output device.
1327 \item \verb|weight NUMBER| is a weight for this element of a multipath
1328 route reflecting its relative bandwidth or quality.
1329 \end{itemize}
1330
1331 \item \verb|scope SCOPE_VAL|
1332
1333 --- the scope of the destinations covered by the route prefix.
1334 \verb|SCOPE_VAL| may be a number or a string from the file
1335 \verb|/etc/iproute2/rt_scopes|.
1336 If this parameter is omitted,
1337 \verb|ip| assumes scope \verb|global| for all gatewayed \verb|unicast|
1338 routes, scope \verb|link| for direct \verb|unicast| and \verb|broadcast| routes
1339 and scope \verb|host| for \verb|local| routes.
1340
1341 \item \verb|protocol RTPROTO|
1342
1343 --- the routing protocol identifier of this route.
1344 \verb|RTPROTO| may be a number or a string from the file
1345 \verb|/etc/iproute2/rt_protos|. If the routing protocol ID is
1346 not given, \verb|ip| assumes protocol \verb|boot| (i.e.\
1347 it assumes the route was added by someone who doesn't
1348 understand what they are doing). Several protocol values have a fixed interpretation.
1349 Namely:
1350 \begin{itemize}
1351 \item \verb|redirect| --- the route was installed due to an ICMP redirect.
1352 \item \verb|kernel| --- the route was installed by the kernel during
1353 autoconfiguration.
1354 \item \verb|boot| --- the route was installed during the bootup sequence.
1355 If a routing daemon starts, it will purge all of them.
1356 \item \verb|static| --- the route was installed by the administrator
1357 to override dynamic routing. Routing daemon will respect them
1358 and, probably, even advertise them to its peers.
1359 \item \verb|ra| --- the route was installed by Router Discovery protocol.
1360 \end{itemize}
1361 The rest of the values are not reserved and the administrator is free
1362 to assign (or not to assign) protocol tags. At least, routing
1363 daemons should take care of setting some unique protocol values,
1364 f.e.\ as they are assigned in \verb|rtnetlink.h| or in \verb|rt_protos|
1365 database.
1366
1367
1368 \item \verb|onlink|
1369
1370 --- pretend that the nexthop is directly attached to this link,
1371 even if it does not match any interface prefix. One application of this
1372 option may be found in~\cite{IP-TUNNELS}.
1373
1374 \item \verb|equalize|
1375
1376 --- allow packet by packet randomization on multipath routes.
1377 Without this modifier, the route will be frozen to one selected
1378 nexthop, so that load splitting will only occur on per-flow base.
1379 \verb|equalize| only works if the kernel is patched.
1380
1381
1382 \end{itemize}
1383
1384
1385 \begin{NB}
1386 Actually there are more commands: \verb|prepend| does the same
1387 thing as classic \verb|route add|, i.e.\ adds a route, even if another
1388 route to the same destination exists. Its opposite case is \verb|append|,
1389 which adds the route to the end of the list. Avoid these
1390 features.
1391 \end{NB}
1392 \begin{NB}
1393 More sad news, IPv6 only understands the \verb|append| command correctly.
1394 All the others are translated into \verb|append| commands. Certainly,
1395 this will change in the future.
1396 \end{NB}
1397
1398 \paragraph{Examples:}
1399 \begin{itemize}
1400 \item add a plain route to network 10.0.0/24 via gateway 193.233.7.65
1401 \begin{verbatim}
1402 ip route add 10.0.0/24 via 193.233.7.65
1403 \end{verbatim}
1404 \item change it to a direct route via the \verb|dummy| device
1405 \begin{verbatim}
1406 ip ro chg 10.0.0/24 dev dummy
1407 \end{verbatim}
1408 \item add a default multipath route splitting the load between \verb|ppp0|
1409 and \verb|ppp1|
1410 \begin{verbatim}
1411 ip route add default scope global nexthop dev ppp0 \
1412 nexthop dev ppp1
1413 \end{verbatim}
1414 Note the scope value. It is not necessary but it informs the kernel
1415 that this route is gatewayed rather than direct. Actually, if you
1416 know the addresses of remote endpoints it would be better to use the
1417 \verb|via| parameter.
1418 \item announce that the address 192.203.80.144 is not a real one, but
1419 should be translated to 193.233.7.83 before forwarding
1420 \begin{verbatim}
1421 ip route add nat 192.203.80.144 via 193.233.7.83
1422 \end{verbatim}
1423 Backward translation is setup with policy rules described
1424 in the following section (sec.\ref{IP-RULE}, p.\pageref{IP-RULE}).
1425 \end{itemize}
1426
1427 \subsection{{\tt ip route delete} --- delete a route}
1428
1429 \paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
1430
1431 \paragraph{Arguments:} \verb|ip route del| has the same arguments as
1432 \verb|ip route add|, but their semantics are a bit different.
1433
1434 Key values (\verb|to|, \verb|tos|, \verb|preference| and \verb|table|)
1435 select the route to delete. If optional attributes are present, \verb|ip|
1436 verifies that they coincide with the attributes of the route to delete.
1437 If no route with the given key and attributes was found, \verb|ip route del|
1438 fails.
1439 \begin{NB}
1440 Linux-2.0 had the option to delete a route selected only by prefix address,
1441 ignoring its length (i.e.\ netmask). This option no longer exists
1442 because it was ambiguous. However, look at {\tt ip route flush}
1443 (sec.\ref{IP-ROUTE-FLUSH}, p.\pageref{IP-ROUTE-FLUSH}) which
1444 provides similar and even richer functionality.
1445 \end{NB}
1446
1447 \paragraph{Example:}
1448 \begin{itemize}
1449 \item delete the multipath route created by the command in previous subsection
1450 \begin{verbatim}
1451 ip route del default scope global nexthop dev ppp0 \
1452 nexthop dev ppp1
1453 \end{verbatim}
1454 \end{itemize}
1455
1456
1457
1458 \subsection{{\tt ip route show} --- list routes}
1459
1460 \paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
1461
1462 \paragraph{Description:} the command displays the contents of the routing tables
1463 or the route(s) selected by some criteria.
1464
1465
1466 \paragraph{Arguments:}
1467 \begin{itemize}
1468 \item \verb|to SELECTOR| (default)
1469
1470 --- only select routes from the given range of destinations. \verb|SELECTOR|
1471 consists of an optional modifier (\verb|root|, \verb|match| or \verb|exact|)
1472 and a prefix. \verb|root PREFIX| selects routes with prefixes not shorter
1473 than \verb|PREFIX|. F.e.\ \verb|root 0/0| selects the entire routing table.
1474 \verb|match PREFIX| selects routes with prefixes not longer than
1475 \verb|PREFIX|. F.e.\ \verb|match 10.0/16| selects \verb|10.0/16|,
1476 \verb|10/8| and \verb|0/0|, but it does not select \verb|10.1/16| and
1477 \verb|10.0.0/24|. And \verb|exact PREFIX| (or just \verb|PREFIX|)
1478 selects routes with this exact prefix. If neither of these options
1479 are present, \verb|ip| assumes \verb|root 0/0| i.e.\ it lists the entire table.
1480
1481
1482 \item \verb|tos TOS| or \verb|dsfield TOS|
1483
1484 --- only select routes with the given TOS.
1485
1486
1487 \item \verb|table TABLEID|
1488
1489 --- show the routes from this table(s). The default setting is to show
1490 \verb|table| \verb|main|. \verb|TABLEID| may either be the ID of a real table
1491 or one of the special values:
1492 \begin{itemize}
1493 \item \verb|all| --- list all of the tables.
1494 \item \verb|cache| --- dump the routing cache.
1495 \end{itemize}
1496 \begin{NB}
1497 IPv6 has a single table. However, splitting it into \verb|main|, \verb|local|
1498 and \verb|cache| is emulated by the \verb|ip| utility.
1499 \end{NB}
1500
1501 \item \verb|cloned| or \verb|cached|
1502
1503 --- list cloned routes i.e.\ routes which were dynamically forked from
1504 other routes because some route attribute (f.e.\ MTU) was updated.
1505 Actually, it is equivalent to \verb|table cache|.
1506
1507 \item \verb|from SELECTOR|
1508
1509 --- the same syntax as for \verb|to|, but it binds the source address range
1510 rather than destinations. Note that the \verb|from| option only works with
1511 cloned routes.
1512
1513 \item \verb|protocol RTPROTO|
1514
1515 --- only list routes of this protocol.
1516
1517
1518 \item \verb|scope SCOPE_VAL|
1519
1520 --- only list routes with this scope.
1521
1522 \item \verb|type TYPE|
1523
1524 --- only list routes of this type.
1525
1526 \item \verb|dev NAME|
1527
1528 --- only list routes going via this device.
1529
1530 \item \verb|via PREFIX|
1531
1532 --- only list routes going via the nexthop routers selected by \verb|PREFIX|.
1533
1534 \item \verb|src PREFIX|
1535
1536 --- only list routes with preferred source addresses selected
1537 by \verb|PREFIX|.
1538
1539 \item \verb|realm REALMID| or \verb|realms FROMREALM/TOREALM|
1540
1541 --- only list routes with these realms.
1542
1543 \end{itemize}
1544
1545 \paragraph{Examples:} Let us count routes of protocol \verb|gated/bgp|
1546 on a router:
1547 \begin{verbatim}
1548 kuznet@amber:~ $ ip ro ls proto gated/bgp | wc
1549 1413 9891 79010
1550 kuznet@amber:~ $
1551 \end{verbatim}
1552 To count the size of the routing cache, we have to use the \verb|-o| option
1553 because cached attributes can take more than one line of output:
1554 \begin{verbatim}
1555 kuznet@amber:~ $ ip -o ro ls cloned | wc
1556 159 2543 18707
1557 kuznet@amber:~ $
1558 \end{verbatim}
1559
1560
1561 \paragraph{Output format:} The output of this command consists
1562 of per route records separated by line feeds.
1563 However, some records may consist
1564 of more than one line: particularly, this is the case when the route
1565 is cloned or you requested additional statistics. If the
1566 \verb|-o| option was given, then line feeds separating lines inside
1567 records are replaced with the backslash sign.
1568
1569 The output has the same syntax as arguments given to {\tt ip route add},
1570 so that it can be understood easily. F.e.\
1571 \begin{verbatim}
1572 kuznet@amber:~ $ ip ro ls 193.233.7/24
1573 193.233.7.0/24 dev eth0 proto gated/conn scope link \
1574 src 193.233.7.65 realms inr.ac
1575 kuznet@amber:~ $
1576 \end{verbatim}
1577
1578 If you list cloned entries, the output contains other attributes which
1579 are evaluated during route calculation and updated during route
1580 lifetime. An example of the output is:
1581 \begin{verbatim}
1582 kuznet@amber:~ $ ip ro ls 193.233.7.82 tab cache
1583 193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
1584 realms inr.ac/inr.ac
1585 cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
1586 193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
1587 cache mtu 1500 rtt 300
1588 kuznet@amber:~ $
1589 \end{verbatim}
1590 \begin{NB}
1591 \label{NB-strange-route}
1592 The route looks a bit strange, doesn't it? Did you notice that
1593 it is a path from 193.233.7.82 back to 193.233.82? Well, you will
1594 see in the section on \verb|ip route get| (p.\pageref{NB-nature-of-strangeness})
1595 how it appeared.
1596 \end{NB}
1597 The second line, starting with the word \verb|cache|, shows
1598 additional attributes which normal routes do not possess.
1599 Cached flags are summarized in angle brackets:
1600 \begin{itemize}
1601 \item \verb|local| --- packets are delivered locally.
1602 It stands for loopback unicast routes, for broadcast routes
1603 and for multicast routes, if this host is a member of the corresponding
1604 group.
1605
1606 \item \verb|reject| --- the path is bad. Any attempt to use it results
1607 in an error. See attribute \verb|error| below (p.\pageref{IP-ROUTE-GET-error}).
1608
1609 \item \verb|mc| --- the destination is multicast.
1610
1611 \item \verb|brd| --- the destination is broadcast.
1612
1613 \item \verb|src-direct| --- the source is on a directly connected
1614 interface.
1615
1616 \item \verb|redirected| --- the route was created by an ICMP Redirect.
1617
1618 \item \verb|redirect| --- packets going via this route will
1619 trigger an ICMP redirect.
1620
1621 \item \verb|fastroute| --- the route is eligible to be used for fastroute.
1622
1623 \item \verb|equalize| --- make packet by packet randomization
1624 along this path.
1625
1626 \item \verb|dst-nat| --- the destination address requires translation.
1627
1628 \item \verb|src-nat| --- the source address requires translation.
1629
1630 \item \verb|masq| --- the source address requires masquerading.
1631 This feature disappeared in linux-2.4.
1632
1633 \item \verb|notify| --- ({\em not implemented}) change/deletion
1634 of this route will trigger RTNETLINK notification.
1635 \end{itemize}
1636
1637 Then some optional attributes follow:
1638 \begin{itemize}
1639 \item \verb|error| --- on \verb|reject| routes it is error code
1640 returned to local senders when they try to use this route.
1641 These error codes are translated into ICMP error codes, sent to remote
1642 senders, according to the rules described above in the subsection
1643 devoted to route types (p.\pageref{IP-ROUTE-TYPES}).
1644 \label{IP-ROUTE-GET-error}
1645
1646 \item \verb|expires| --- this entry will expire after this timeout.
1647
1648 \item \verb|iif| --- the packets for this path are expected to arrive
1649 on this interface.
1650 \end{itemize}
1651
1652 \paragraph{Statistics:} With the \verb|-statistics| option, more
1653 information about this route is shown:
1654 \begin{itemize}
1655 \item \verb|users| --- the number of users of this entry.
1656 \item \verb|age| --- shows when this route was last used.
1657 \item \verb|used| --- the number of lookups of this route since its creation.
1658 \end{itemize}
1659
1660
1661 \subsection{{\tt ip route flush} --- flush routing tables}
1662 \label{IP-ROUTE-FLUSH}
1663
1664 \paragraph{Abbreviations:} \verb|flush|, \verb|f|.
1665
1666 \paragraph{Description:} this command flushes routes selected
1667 by some criteria.
1668
1669 \paragraph{Arguments:} the arguments have the same syntax and semantics
1670 as the arguments of \verb|ip route show|, but routing tables are not
1671 listed but purged. The only difference is the default action: \verb|show|
1672 dumps all the IP main routing table but \verb|flush| prints the helper page.
1673 The reason for this difference does not require any explanation, does it?
1674
1675
1676 \paragraph{Statistics:} With the \verb|-statistics| option, the command
1677 becomes verbose. It prints out the number of deleted routes and the number
1678 of rounds made to flush the routing table. If the option is given
1679 twice, \verb|ip route flush| also dumps all the deleted routes
1680 in the format described in the previous subsection.
1681
1682 \paragraph{Examples:} The first example flushes all the
1683 gatewayed routes from the main table (f.e.\ after a routing daemon crash).
1684 \begin{verbatim}
1685 netadm@amber:~ # ip -4 ro flush scope global type unicast
1686 \end{verbatim}
1687 This option deserves to be put into a scriptlet \verb|routef|.
1688 \begin{NB}
1689 This option was described in the \verb|route(8)| man page borrowed
1690 from BSD, but was never implemented in Linux.
1691 \end{NB}
1692
1693 The second example flushes all IPv6 cloned routes:
1694 \begin{verbatim}
1695 netadm@amber:~ # ip -6 -s -s ro flush cache
1696 3ffe:2400::220:afff:fef4:c5d1 via 3ffe:2400::220:afff:fef4:c5d1 \
1697 dev eth0 metric 0
1698 cache used 2 age 12sec mtu 1500 rtt 300
1699 3ffe:2400::280:adff:feb7:8034 via 3ffe:2400::280:adff:feb7:8034 \
1700 dev eth0 metric 0
1701 cache used 2 age 15sec mtu 1500 rtt 300
1702 3ffe:2400::280:c8ff:fe59:5bcc via 3ffe:2400::280:c8ff:fe59:5bcc \
1703 dev eth0 metric 0
1704 cache users 1 used 1 age 23sec mtu 1500 rtt 300
1705 3ffe:2400:0:1:2a0:ccff:fe66:1878 via 3ffe:2400:0:1:2a0:ccff:fe66:1878 \
1706 dev eth1 metric 0
1707 cache used 2 age 20sec mtu 1500 rtt 300
1708 3ffe:2400:0:1:a00:20ff:fe71:fb30 via 3ffe:2400:0:1:a00:20ff:fe71:fb30 \
1709 dev eth1 metric 0
1710 cache used 2 age 33sec mtu 1500 rtt 300
1711 ff02::1 via ff02::1 dev eth1 metric 0
1712 cache users 1 used 1 age 45sec mtu 1500 rtt 300
1713
1714 *** Round 1, deleting 6 entries ***
1715 *** Flush is complete after 1 round ***
1716 netadm@amber:~ # ip -6 -s -s ro flush cache
1717 Nothing to flush.
1718 netadm@amber:~ #
1719 \end{verbatim}
1720
1721 The third example flushes BGP routing tables after a \verb|gated|
1722 death.
1723 \begin{verbatim}
1724 netadm@amber:~ # ip ro ls proto gated/bgp | wc
1725 1408 9856 78730
1726 netadm@amber:~ # ip -s ro f proto gated/bgp
1727
1728 *** Round 1, deleting 1408 entries ***
1729 *** Flush is complete after 1 round ***
1730 netadm@amber:~ # ip ro f proto gated/bgp
1731 Nothing to flush.
1732 netadm@amber:~ # ip ro ls proto gated/bgp
1733 netadm@amber:~ #
1734 \end{verbatim}
1735
1736
1737 \subsection{{\tt ip route get} --- get a single route}
1738 \label{IP-ROUTE-GET}
1739
1740 \paragraph{Abbreviations:} \verb|get|, \verb|g|.
1741
1742 \paragraph{Description:} this command gets a single route to a destination
1743 and prints its contents exactly as the kernel sees it.
1744
1745 \paragraph{Arguments:}
1746 \begin{itemize}
1747 \item \verb|to ADDRESS| (default)
1748
1749 --- the destination address.
1750
1751 \item \verb|from ADDRESS|
1752
1753 --- the source address.
1754
1755 \item \verb|tos TOS| or \verb|dsfield TOS|
1756
1757 --- the Type Of Service.
1758
1759 \item \verb|iif NAME|
1760
1761 --- the device from which this packet is expected to arrive.
1762
1763 \item \verb|oif NAME|
1764
1765 --- force the output device on which this packet will be routed.
1766
1767 \item \verb|connected|
1768
1769 --- if no source address (option \verb|from|) was given, relookup
1770 the route with the source set to the preferred address received from the first lookup.
1771 If policy routing is used, it may be a different route.
1772
1773 \end{itemize}
1774
1775 Note that this operation is not equivalent to \verb|ip route show|.
1776 \verb|show| shows existing routes. \verb|get| resolves them and
1777 creates new clones if necessary. Essentially, \verb|get|
1778 is equivalent to sending a packet along this path.
1779 If the \verb|iif| argument is not given, the kernel creates a route
1780 to output packets towards the requested destination.
1781 This is equivalent to pinging the destination
1782 with a subsequent {\tt ip route ls cache}, however, no packets are
1783 actually sent. With the \verb|iif| argument, the kernel pretends
1784 that a packet arrived from this interface and searches for
1785 a path to forward the packet.
1786
1787 \paragraph{Output format:} This command outputs routes in the same
1788 format as \verb|ip route ls|.
1789
1790 \paragraph{Examples:}
1791 \begin{itemize}
1792 \item Find a route to output packets to 193.233.7.82:
1793 \begin{verbatim}
1794 kuznet@amber:~ $ ip route get 193.233.7.82
1795 193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
1796 cache mtu 1500 rtt 300
1797 kuznet@amber:~ $
1798 \end{verbatim}
1799
1800 \item Find a route to forward packets arriving on \verb|eth0|
1801 from 193.233.7.82 and destined for 193.233.7.82:
1802 \begin{verbatim}
1803 kuznet@amber:~ $ ip r g 193.233.7.82 from 193.233.7.82 iif eth0
1804 193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
1805 realms inr.ac/inr.ac
1806 cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
1807 kuznet@amber:~ $
1808 \end{verbatim}
1809 \begin{NB}
1810 \label{NB-nature-of-strangeness}
1811 This is the command that created the funny route from 193.233.7.82
1812 looped back to 193.233.7.82 (cf.\ NB on~p.\pageref{NB-strange-route}).
1813 Note the \verb|redirect| flag on it.
1814 \end{NB}
1815
1816 \item Find a multicast route for packets arriving on \verb|eth0|
1817 from host 193.233.7.82 and destined for multicast group 224.2.127.254
1818 (it is assumed that a multicast routing daemon is running.
1819 In this case, it is \verb|pimd|)
1820 \begin{verbatim}
1821 kuznet@amber:~ $ ip r g 224.2.127.254 from 193.233.7.82 iif eth0
1822 multicast 224.2.127.254 from 193.233.7.82 dev lo \
1823 src 193.233.7.65 realms inr.ac/cosmos
1824 cache <mc> iif eth0 Oifs: eth1 pimreg
1825 kuznet@amber:~ $
1826 \end{verbatim}
1827 This route differs from the ones seen before. It contains a ``normal'' part
1828 and a ``multicast'' part. The normal part is used to deliver (or not to
1829 deliver) the packet to local IP listeners. In this case the router
1830 is not a member
1831 of this group, so that route has no \verb|local| flag and only
1832 forwards packets. The output device for such entries is always loopback.
1833 The multicast part consists of an additional \verb|Oifs:| list showing
1834 the output interfaces.
1835 \end{itemize}
1836
1837
1838 It is time for a more complicated example. Let us add an invalid
1839 gatewayed route for a destination which is really directly connected:
1840 \begin{verbatim}
1841 netadm@alisa:~ # ip route add 193.233.7.98 via 193.233.7.254
1842 netadm@alisa:~ # ip route get 193.233.7.98
1843 193.233.7.98 via 193.233.7.254 dev eth0 src 193.233.7.90
1844 cache mtu 1500 rtt 3072
1845 netadm@alisa:~ #
1846 \end{verbatim}
1847 and probe it with ping:
1848 \begin{verbatim}
1849 netadm@alisa:~ # ping -n 193.233.7.98
1850 PING 193.233.7.98 (193.233.7.98) from 193.233.7.90 : 56 data bytes
1851 From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
1852 64 bytes from 193.233.7.98: icmp_seq=0 ttl=255 time=3.5 ms
1853 From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
1854 64 bytes from 193.233.7.98: icmp_seq=1 ttl=255 time=2.2 ms
1855 64 bytes from 193.233.7.98: icmp_seq=2 ttl=255 time=0.4 ms
1856 64 bytes from 193.233.7.98: icmp_seq=3 ttl=255 time=0.4 ms
1857 64 bytes from 193.233.7.98: icmp_seq=4 ttl=255 time=0.4 ms
1858 ^C
1859 --- 193.233.7.98 ping statistics ---
1860 5 packets transmitted, 5 packets received, 0% packet loss
1861 round-trip min/avg/max = 0.4/1.3/3.5 ms
1862 netadm@alisa:~ #
1863 \end{verbatim}
1864 What happened? Router 193.233.7.254 understood that we have a much
1865 better path to the destination and sent us an ICMP redirect message.
1866 We may retry \verb|ip route get| to see what we have in the routing
1867 tables now:
1868 \begin{verbatim}
1869 netadm@alisa:~ # ip route get 193.233.7.98
1870 193.233.7.98 dev eth0 src 193.233.7.90
1871 cache <redirected> mtu 1500 rtt 3072
1872 netadm@alisa:~ #
1873 \end{verbatim}
1874
1875
1876
1877 \section{{\tt ip rule} --- routing policy database management}
1878 \label{IP-RULE}
1879
1880 \paragraph{Abbreviations:} \verb|rule|, \verb|ru|.
1881
1882 \paragraph{Object:} \verb|rule|s in the routing policy database control
1883 the route selection algorithm.
1884
1885 Classic routing algorithms used in the Internet make routing decisions
1886 based only on the destination address of packets (and in theory,
1887 but not in practice, on the TOS field). The seminal review of classic
1888 routing algorithms and their modifications can be found in~\cite{RFC1812}.
1889
1890 In some circumstances we want to route packets differently depending not only
1891 on destination addresses, but also on other packet fields: source address,
1892 IP protocol, transport protocol ports or even packet payload.
1893 This task is called ``policy routing''.
1894
1895 \begin{NB}
1896 ``policy routing'' $\neq$ ``routing policy''.
1897
1898 \noindent ``policy routing'' $=$ ``cunning routing''.
1899
1900 \noindent ``routing policy'' $=$ ``routing tactics'' or ``routing plan''.
1901 \end{NB}
1902
1903 To solve this task, the conventional destination based routing table, ordered
1904 according to the longest match rule, is replaced with a ``routing policy
1905 database'' (or RPDB), which selects routes
1906 by executing some set of rules. The rules may have lots of keys of different
1907 natures and therefore they have no natural ordering, but one imposed
1908 by the administrator. Linux-2.2 RPDB is a linear list of rules
1909 ordered by numeric priority value.
1910 RPDB explicitly allows matching a few packet fields:
1911
1912 \begin{itemize}
1913 \item packet source address.
1914 \item packet destination address.
1915 \item TOS.
1916 \item incoming interface (which is packet metadata, rather than a packet field).
1917 \end{itemize}
1918
1919 Matching IP protocols and transport ports is also possible,
1920 indirectly, via \verb|ipchains|, by exploiting their ability
1921 to mark some classes of packets with \verb|fwmark|. Therefore,
1922 \verb|fwmark| is also included in the set of keys checked by rules.
1923
1924 Each policy routing rule consists of a {\em selector\/} and an {\em action\/}
1925 predicate. The RPDB is scanned in the order of increasing priority. The selector
1926 of each rule is applied to \{source address, destination address, incoming
1927 interface, tos, fwmark\} and, if the selector matches the packet,
1928 the action is performed. The action predicate may return with success.
1929 In this case, it will either give a route or failure indication
1930 and the RPDB lookup is terminated. Otherwise, the RPDB program
1931 continues on the next rule.
1932
1933 What is the action, semantically? The natural action is to select the
1934 nexthop and the output device. This is what
1935 Cisco IOS~\cite{IOS} does. Let us call it ``match \& set''.
1936 The Linux-2.2 approach is more flexible. The action includes
1937 lookups in destination-based routing tables and selecting
1938 a route from these tables according to the classic longest match algorithm.
1939 The ``match \& set'' approach is the simplest case of the Linux one. It is realized
1940 when a second level routing table contains a single default route.
1941 Recall that Linux-2.2 supports multiple tables
1942 managed with the \verb|ip route| command, described in the previous section.
1943
1944 At startup time the kernel configures the default RPDB consisting of three
1945 rules:
1946
1947 \begin{enumerate}
1948 \item Priority: 0, Selector: match anything, Action: lookup routing
1949 table \verb|local| (ID 255).
1950 The \verb|local| table is a special routing table containing
1951 high priority control routes for local and broadcast addresses.
1952
1953 Rule 0 is special. It cannot be deleted or overridden.
1954
1955
1956 \item Priority: 32766, Selector: match anything, Action: lookup routing
1957 table \verb|main| (ID 254).
1958 The \verb|main| table is the normal routing table containing all non-policy
1959 routes. This rule may be deleted and/or overridden with other
1960 ones by the administrator.
1961
1962 \item Priority: 32767, Selector: match anything, Action: lookup routing
1963 table \verb|default| (ID 253).
1964 The \verb|default| table is empty. It is reserved for some
1965 post-processing if no previous default rules selected the packet.
1966 This rule may also be deleted.
1967
1968 \end{enumerate}
1969
1970 Do not confuse routing tables with rules: rules point to routing tables,
1971 several rules may refer to one routing table and some routing tables
1972 may have no rules pointing to them. If the administrator deletes all the rules
1973 referring to a table, the table is not used, but it still exists
1974 and will disappear only after all the routes contained in it are deleted.
1975
1976
1977 \paragraph{Rule attributes:} Each RPDB entry has additional
1978 attributes. F.e.\ each rule has a pointer to some routing
1979 table. NAT and masquerading rules have an attribute to select new IP
1980 address to translate/masquerade. Besides that, rules have some
1981 optional attributes, which routes have, namely \verb|realms|.
1982 These values do not override those contained in the routing tables. They
1983 are only used if the route did not select any attributes.
1984
1985
1986 \paragraph{Rule types:} The RPDB may contain rules of the following
1987 types:
1988 \begin{itemize}
1989 \item \verb|unicast| --- the rule prescribes to return the route found
1990 in the routing table referenced by the rule.
1991 \item \verb|blackhole| --- the rule prescribes to silently drop the packet.
1992 \item \verb|unreachable| --- the rule prescribes to generate a ``Network
1993 is unreachable'' error.
1994 \item \verb|prohibit| --- the rule prescribes to generate
1995 ``Communication is administratively prohibited'' error.
1996 \item \verb|nat| --- the rule prescribes to translate the source address
1997 of the IP packet into some other value. More about NAT is
1998 in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
1999 \end{itemize}
2000
2001
2002 \paragraph{Commands:} \verb|add|, \verb|delete| and \verb|show|
2003 (or \verb|list|).
2004
2005 \subsection{{\tt ip rule add} --- insert a new rule\\
2006 {\tt ip rule delete} --- delete a rule}
2007 \label{IP-RULE-ADD}
2008
2009 \paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|,
2010 \verb|d|.
2011
2012 \paragraph{Arguments:}
2013
2014 \begin{itemize}
2015 \item \verb|type TYPE| (default)
2016
2017 --- the type of this rule. The list of valid types was given in the previous
2018 subsection.
2019
2020 \item \verb|from PREFIX|
2021
2022 --- select the source prefix to match.
2023
2024 \item \verb|to PREFIX|
2025
2026 --- select the destination prefix to match.
2027
2028 \item \verb|iif NAME|
2029
2030 --- select the incoming device to match. If the interface is loopback,
2031 the rule only matches packets originating from this host. This means that you
2032 may create separate routing tables for forwarded and local packets and,
2033 hence, completely segregate them.
2034
2035 \item \verb|tos TOS| or \verb|dsfield TOS|
2036
2037 --- select the TOS value to match.
2038
2039 \item \verb|fwmark MARK|
2040
2041 --- select the \verb|fwmark| value to match.
2042
2043 \item \verb|priority PREFERENCE|
2044
2045 --- the priority of this rule. Each rule should have an explicitly
2046 set {\em unique\/} priority value.
2047 \begin{NB}
2048 Really, for historical reasons \verb|ip rule add| does not require a
2049 priority value and allows them to be non-unique.
2050 If the user does not supplied a priority, it is selected by the kernel.
2051 If the user creates a rule with a priority value that
2052 already exists, the kernel does not reject the request. It adds
2053 the new rule before all old rules of the same priority.
2054
2055 It is mistake in design, no more. And it will be fixed one day,
2056 so do not rely on this feature. Use explicit priorities.
2057 \end{NB}
2058
2059
2060 \item \verb|table TABLEID|
2061
2062 --- the routing table identifier to lookup if the rule selector matches.
2063
2064 \item \verb|realms FROM/TO|
2065
2066 --- Realms to select if the rule matched and the routing table lookup
2067 succeeded. Realm \verb|TO| is only used if the route did not select
2068 any realm.
2069
2070 \item \verb|nat ADDRESS|
2071
2072 --- The base of the IP address block to translate (for source addresses).
2073 The \verb|ADDRESS| may be either the start of the block of NAT addresses
2074 (selected by NAT routes) or in linux-2.2 a local host address (or even zero).
2075 In the last case the router does not translate the packets,
2076 but masquerades them to this address; this feature disappered in 2.4.
2077 More about NAT is in Appendix~\ref{ROUTE-NAT},
2078 p.\pageref{ROUTE-NAT}.
2079
2080 \end{itemize}
2081
2082 \paragraph{Warning:} Changes to the RPDB made with these commands
2083 do not become active immediately. It is assumed that after
2084 a script finishes a batch of updates, it flushes the routing cache
2085 with \verb|ip route flush cache|.
2086
2087 \paragraph{Examples:}
2088 \begin{itemize}
2089 \item Route packets with source addresses from 192.203.80/24
2090 according to routing table \verb|inr.ruhep|:
2091 \begin{verbatim}
2092 ip ru add from 192.203.80.0/24 table inr.ruhep prio 220
2093 \end{verbatim}
2094
2095 \item Translate packet source address 193.233.7.83 into 192.203.80.144
2096 and route it according to table \#1 (actually, it is \verb|inr.ruhep|):
2097 \begin{verbatim}
2098 ip ru add from 193.233.7.83 nat 192.203.80.144 table 1 prio 320
2099 \end{verbatim}
2100
2101 \item Delete the unused default rule:
2102 \begin{verbatim}
2103 ip ru del prio 32767
2104 \end{verbatim}
2105
2106 \end{itemize}
2107
2108
2109
2110 \subsection{{\tt ip rule show} --- list rules}
2111 \label{IP-RULE-SHOW}
2112
2113 \paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
2114
2115
2116 \paragraph{Arguments:} Good news, this is one command that has no arguments.
2117
2118 \paragraph{Output format:}
2119
2120 \begin{verbatim}
2121 kuznet@amber:~ $ ip ru ls
2122 0: from all lookup local
2123 200: from 192.203.80.0/24 to 193.233.7.0/24 lookup main
2124 210: from 192.203.80.0/24 to 192.203.80.0/24 lookup main
2125 220: from 192.203.80.0/24 lookup inr.ruhep realms inr.ruhep/radio-msu
2126 300: from 193.233.7.83 to 193.233.7.0/24 lookup main
2127 310: from 193.233.7.83 to 192.203.80.0/24 lookup main
2128 320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
2129 32766: from all lookup main
2130 kuznet@amber:~ $
2131 \end{verbatim}
2132
2133 In the first column is the rule priority value followed
2134 by a colon. Then the selectors follow. Each key is prefixed
2135 with the same keyword that was used to create the rule.
2136
2137 The keyword \verb|lookup| is followed by a routing table identifier,
2138 as it is recorded in the file \verb|/etc/iproute2/rt_tables|.
2139
2140 If the rule does NAT (f.e.\ rule \#320), it is shown by the keyword
2141 \verb|map-to| followed by the start of the block of addresses to map.
2142
2143 The sense of this example is pretty simple. The prefixes
2144 192.203.80.0/24 and 193.233.7.0/24 form the internal network, but
2145 they are routed differently when the packets leave it.
2146 Besides that, the host 193.233.7.83 is translated into
2147 another prefix to look like 192.203.80.144 when talking
2148 to the outer world.
2149
2150
2151
2152 \section{{\tt ip maddress} --- multicast addresses management}
2153 \label{IP-MADDR}
2154
2155 \paragraph{Object:} \verb|maddress| objects are multicast addresses.
2156
2157 \paragraph{Commands:} \verb|add|, \verb|delete|, \verb|show| (or \verb|list|).
2158
2159 \subsection{{\tt ip maddress show} --- list multicast addresses}
2160
2161 \paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
2162
2163 \paragraph{Arguments:}
2164
2165 \begin{itemize}
2166
2167 \item \verb|dev NAME| (default)
2168
2169 --- the device name.
2170
2171 \end{itemize}
2172
2173 \paragraph{Output format:}
2174
2175 \begin{verbatim}
2176 kuznet@alisa:~ $ ip maddr ls dummy
2177 2: dummy
2178 link 33:33:00:00:00:01
2179 link 01:00:5e:00:00:01
2180 inet 224.0.0.1 users 2
2181 inet6 ff02::1
2182 kuznet@alisa:~ $
2183 \end{verbatim}
2184
2185 The first line of the output shows the interface index and its name.
2186 Then the multicast address list follows. Each line starts with the
2187 protocol identifier. The word \verb|link| denotes a link layer
2188 multicast addresses.
2189
2190 If a multicast address has more than one user, the number
2191 of users is shown after the \verb|users| keyword.
2192
2193 One additional feature not present in the example above
2194 is the \verb|static| flag, which indicates that the address was joined
2195 with \verb|ip maddr add|. See the following subsection.
2196
2197
2198
2199 \subsection{{\tt ip maddress add} --- add a multicast address\\
2200 {\tt ip maddress delete} --- delete a multicast address}
2201
2202 \paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|, \verb|d|.
2203
2204 \paragraph{Description:} these commands attach/detach
2205 a static link layer multicast address to listen on the interface.
2206 Note that it is impossible to join protocol multicast groups
2207 statically. This command only manages link layer addresses.
2208
2209
2210 \paragraph{Arguments:}
2211
2212 \begin{itemize}
2213 \item \verb|address LLADDRESS| (default)
2214
2215 --- the link layer multicast address.
2216
2217 \item \verb|dev NAME|
2218
2219 --- the device to join/leave this multicast address.
2220
2221 \end{itemize}
2222
2223
2224 \paragraph{Example:} Let us continue with the example from the previous subsection.
2225
2226 \begin{verbatim}
2227 netadm@alisa:~ # ip maddr add 33:33:00:00:00:01 dev dummy
2228 netadm@alisa:~ # ip -0 maddr ls dummy
2229 2: dummy
2230 link 33:33:00:00:00:01 users 2 static
2231 link 01:00:5e:00:00:01
2232 netadm@alisa:~ # ip maddr del 33:33:00:00:00:01 dev dummy
2233 \end{verbatim}
2234
2235 \begin{NB}
2236 Neither \verb|ip| nor the kernel check for multicast address validity.
2237 Particularly, this means that you can try to load a unicast address
2238 instead of a multicast address. Most drivers will ignore such addresses,
2239 but several (f.e.\ Tulip) will intern it to their on-board filter.
2240 The effects may be strange. Namely, the addresses become additional
2241 local link addresses and, if you loaded the address of another host
2242 to the router, wait for duplicated packets on the wire.
2243 It is not a bug, but rather a hole in the API and intra-kernel interfaces.
2244 This feature is really more useful for traffic monitoring, but using it
2245 with Linux-2.2 you {\em have to\/} be sure that the host is not
2246 a router and, especially, that it is not a transparent proxy or masquerading
2247 agent.
2248 \end{NB}
2249
2250
2251
2252 \section{{\tt ip mroute} --- multicast routing cache management}
2253 \label{IP-MROUTE}
2254
2255 \paragraph{Abbreviations:} \verb|mroute|, \verb|mr|.
2256
2257 \paragraph{Object:} \verb|mroute| objects are multicast routing cache
2258 entries created by a user level mrouting daemon
2259 (f.e.\ \verb|pimd| or \verb|mrouted|).
2260
2261 Due to the limitations of the current interface to the multicast routing
2262 engine, it is impossible to change \verb|mroute| objects administratively,
2263 so we may only display them. This limitation will be removed
2264 in the future.
2265
2266 \paragraph{Commands:} \verb|show| (or \verb|list|).
2267
2268
2269 \subsection{{\tt ip mroute show} --- list mroute cache entries}
2270
2271 \paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
2272
2273 \paragraph{Arguments:}
2274
2275 \begin{itemize}
2276 \item \verb|to PREFIX| (default)
2277
2278 --- the prefix selecting the destination multicast addresses to list.
2279
2280
2281 \item \verb|iif NAME|
2282
2283 --- the interface on which multicast packets are received.
2284
2285
2286 \item \verb|from PREFIX|
2287
2288 --- the prefix selecting the IP source addresses of the multicast route.
2289
2290
2291 \end{itemize}
2292
2293 \paragraph{Output format:}
2294
2295 \begin{verbatim}
2296 kuznet@amber:~ $ ip mroute ls
2297 (193.232.127.6, 224.0.1.39) Iif: unresolved
2298 (193.232.244.34, 224.0.1.40) Iif: unresolved
2299 (193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
2300 kuznet@amber:~ $
2301 \end{verbatim}
2302
2303 Each line shows one (S,G) entry in the multicast routing cache,
2304 where S is the source address and G is the multicast group. \verb|Iif| is
2305 the interface on which multicast packets are expected to arrive.
2306 If the word \verb|unresolved| is there instead of the interface name,
2307 it means that the routing daemon still hasn't resolved this entry.
2308 The keyword \verb|oifs| is followed by a list of output interfaces, separated
2309 by spaces. If a multicast routing entry is created with non-trivial
2310 TTL scope, administrative distances are appended to the device names
2311 in the \verb|oifs| list.
2312
2313 \paragraph{Statistics:} The \verb|-statistics| option also prints the
2314 number of packets and bytes forwarded along this route and
2315 the number of packets that arrived on the wrong interface, if this number is not zero.
2316
2317 \begin{verbatim}
2318 kuznet@amber:~ $ ip -s mr ls 224.66/16
2319 (193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
2320 9383 packets, 300256 bytes
2321 kuznet@amber:~ $
2322 \end{verbatim}
2323
2324
2325 \section{{\tt ip tunnel} --- tunnel configuration}
2326 \label{IP-TUNNEL}
2327
2328 \paragraph{Abbreviations:} \verb|tunnel|, \verb|tunl|.
2329
2330 \paragraph{Object:} \verb|tunnel| objects are tunnels, encapsulating
2331 packets in IPv4 packets and then sending them over the IP infrastructure.
2332
2333 \paragraph{Commands:} \verb|add|, \verb|delete|, \verb|change|, \verb|show|
2334 (or \verb|list|).
2335
2336 \paragraph{See also:} A more informal discussion of tunneling
2337 over IP and the \verb|ip tunnel| command can be found in~\cite{IP-TUNNELS}.
2338
2339 \subsection{{\tt ip tunnel add} --- add a new tunnel\\
2340 {\tt ip tunnel change} --- change an existing tunnel\\
2341 {\tt ip tunnel delete} --- destroy a tunnel}
2342
2343 \paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
2344 \verb|delete|, \verb|del|, \verb|d|.
2345
2346
2347 \paragraph{Arguments:}
2348
2349 \begin{itemize}
2350
2351 \item \verb|name NAME| (default)
2352
2353 --- select the tunnel device name.
2354
2355 \item \verb|mode MODE|
2356
2357 --- set the tunnel mode. Three modes are currently available:
2358 \verb|ipip|, \verb|sit| and \verb|gre|.
2359
2360 \item \verb|remote ADDRESS|
2361
2362 --- set the remote endpoint of the tunnel.
2363
2364 \item \verb|local ADDRESS|
2365
2366 --- set the fixed local address for tunneled packets.
2367 It must be an address on another interface of this host.
2368
2369 \item \verb|ttl N|
2370
2371 --- set a fixed TTL \verb|N| on tunneled packets.
2372 \verb|N| is a number in the range 1--255. 0 is a special value
2373 meaning that packets inherit the TTL value.
2374 The default value is: \verb|inherit|.
2375
2376 \item \verb|tos T| or \verb|dsfield T|
2377
2378 --- set a fixed TOS \verb|T| on tunneled packets.
2379 The default value is: \verb|inherit|.
2380
2381
2382
2383 \item \verb|dev NAME|
2384
2385 --- bind the tunnel to the device \verb|NAME| so that
2386 tunneled packets will only be routed via this device and will
2387 not be able to escape to another device when the route to endpoint changes.
2388
2389 \item \verb|nopmtudisc|
2390
2391 --- disable Path MTU Discovery on this tunnel.
2392 It is enabled by default. Note that a fixed ttl is incompatible
2393 with this option: tunnelling with a fixed ttl always makes pmtu discovery.
2394
2395 \item \verb|key K|, \verb|ikey K|, \verb|okey K|
2396
2397 --- (only GRE tunnels) use keyed GRE with key \verb|K|. \verb|K| is
2398 either a number or an IP address-like dotted quad.
2399 The \verb|key| parameter sets the key to use in both directions.
2400 The \verb|ikey| and \verb|okey| parameters set different keys for input and output.
2401
2402
2403 \item \verb|csum|, \verb|icsum|, \verb|ocsum|
2404
2405 --- (only GRE tunnels) generate/require checksums for tunneled packets.
2406 The \verb|ocsum| flag calculates checksums for outgoing packets.
2407 The \verb|icsum| flag requires that all input packets have the correct
2408 checksum. The \verb|csum| flag is equivalent to the combination
2409 ``\verb|icsum| \verb|ocsum|''.
2410
2411 \item \verb|seq|, \verb|iseq|, \verb|oseq|
2412
2413 --- (only GRE tunnels) serialize packets.
2414 The \verb|oseq| flag enables sequencing of outgoing packets.
2415 The \verb|iseq| flag requires that all input packets are serialized.
2416 The \verb|seq| flag is equivalent to the combination ``\verb|iseq| \verb|oseq|''.
2417
2418 \begin{NB}
2419 I think this option does not
2420 work. At least, I did not test it, did not debug it and
2421 do not even understand how it is supposed to work or for what
2422 purpose Cisco planned to use it. Do not use it.
2423 \end{NB}
2424
2425
2426 \end{itemize}
2427
2428 \paragraph{Example:} Create a pointopoint IPv6 tunnel with maximal TTL of 32.
2429 \begin{verbatim}
2430 netadm@amber:~ # ip tunl add Cisco mode sit remote 192.31.7.104 \
2431 local 192.203.80.142 ttl 32
2432 \end{verbatim}
2433
2434 \subsection{{\tt ip tunnel show} --- list tunnels}
2435
2436 \paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
2437
2438
2439 \paragraph{Arguments:} None.
2440
2441 \paragraph{Output format:}
2442 \begin{verbatim}
2443 kuznet@amber:~ $ ip tunl ls Cisco
2444 Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
2445 kuznet@amber:~ $
2446 \end{verbatim}
2447 The line starts with the tunnel device name followed by a colon.
2448 Then the tunnel mode follows. The parameters of the tunnel are listed
2449 with the same keywords that were used when creating the tunnel.
2450
2451 \paragraph{Statistics:}
2452
2453 \begin{verbatim}
2454 kuznet@amber:~ $ ip -s tunl ls Cisco
2455 Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
2456 RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts
2457 12566 1707516 0 0 0 0
2458 TX: Packets Bytes Errors DeadLoop NoRoute NoBufs
2459 13445 1879677 0 0 0 0
2460 kuznet@amber:~ $
2461 \end{verbatim}
2462 Essentially, these numbers are the same as the numbers
2463 printed with {\tt ip -s link show}
2464 (sec.\ref{IP-LINK-SHOW}, p.\pageref{IP-LINK-SHOW}) but the tags are different
2465 to reflect that they are tunnel specific.
2466 \begin{itemize}
2467 \item \verb|CsumErrs| --- the total number of packets dropped
2468 because of checksum failures for a GRE tunnel with checksumming enabled.
2469 \item \verb|OutOfSeq| --- the total number of packets dropped
2470 because they arrived out of sequence for a GRE tunnel with
2471 serialization enabled.
2472 \item \verb|Mcasts| --- the total number of multicast packets
2473 received on a broadcast GRE tunnel.
2474 \item \verb|DeadLoop| --- the total number of packets which were not
2475 transmitted because the tunnel is looped back to itself.
2476 \item \verb|NoRoute| --- the total number of packets which were not
2477 transmitted because there is no IP route to the remote endpoint.
2478 \item \verb|NoBufs| --- the total number of packets which were not
2479 transmitted because the kernel failed to allocate a buffer.
2480 \end{itemize}
2481
2482
2483 \section{{\tt ip monitor} and {\tt rtmon} --- state monitoring}
2484 \label{IP-MONITOR}
2485
2486 The \verb|ip| utility can monitor the state of devices, addresses
2487 and routes continuously. This option has a slightly different format.
2488 Namely,
2489 the \verb|monitor| command is the first in the command line and then
2490 the object list follows:
2491 \begin{verbatim}
2492 ip monitor [ file FILE ] [ all | OBJECT-LIST ]
2493 \end{verbatim}
2494 \verb|OBJECT-LIST| is the list of object types that we want to monitor.
2495 It may contain \verb|link|, \verb|address| and \verb|route|.
2496 If no \verb|file| argument is given, \verb|ip| opens RTNETLINK,
2497 listens on it and dumps state changes in the format described
2498 in previous sections.
2499
2500 If a file name is given, it does not listen on RTNETLINK,
2501 but opens the file containing RTNETLINK messages saved in binary format
2502 and dumps them. Such a history file can be generated with the
2503 \verb|rtmon| utility. This utility has a command line syntax similar to
2504 \verb|ip monitor|.
2505 Ideally, \verb|rtmon| should be started before
2506 the first network configuration command is issued. F.e.\ if
2507 you insert:
2508 \begin{verbatim}
2509 rtmon file /var/log/rtmon.log
2510 \end{verbatim}
2511 in a startup script, you will be able to view the full history
2512 later.
2513
2514 Certainly, it is possible to start \verb|rtmon| at any time.
2515 It prepends the history with the state snapshot dumped at the moment
2516 of starting.
2517
2518
2519 \section{Route realms and policy propagation, {\tt rtacct}}
2520 \label{RT-REALMS}
2521
2522 On routers using OSPF ASE or, especially, the BGP protocol, routing
2523 tables may be huge. If we want to classify or to account for the packets
2524 per route, we will have to keep lots of information. Even worse, if we
2525 want to distinguish the packets not only by their destination, but
2526 also by their source, the task gets quadratic complexity and its solution
2527 is physically impossible.
2528
2529 One approach to propagating the policy from routing protocols
2530 to the forwarding engine has been proposed in~\cite{IOS-BGP-PP}.
2531 Essentially, Cisco Policy Propagation via BGP is based on the fact
2532 that dedicated routers all have the RIB (Routing Information Base)
2533 close to the forwarding engine, so policy routing rules can
2534 check all the route attributes, including ASPATH information
2535 and community strings.
2536
2537 The Linux architecture, splitting the RIB (maintained by a user level
2538 daemon) and the kernel based FIB (Forwarding Information Base),
2539 does not allow such a simple approach.
2540
2541 It is to our fortune because there is another solution
2542 which allows even more flexible policy and richer semantics.
2543
2544 Namely, routes can be clustered together in user space, based on their
2545 attributes. F.e.\ a BGP router knows route ASPATH, its community;
2546 an OSPF router knows the route tag or its area. The administrator, when adding
2547 routes manually, also knows their nature. Providing that the number of such
2548 aggregates (we call them {\em realms\/}) is low, the task of full
2549 classification both by source and destination becomes quite manageable.
2550
2551 So each route may be assigned to a realm. It is assumed that
2552 this identification is made by a routing daemon, but static routes
2553 can also be handled manually with \verb|ip route| (see sec.\ref{IP-ROUTE},
2554 p.\pageref{IP-ROUTE}).
2555 \begin{NB}
2556 There is a patch to \verb|gated|, allowing classification of routes
2557 to realms with all the set of policy rules implemented in \verb|gated|:
2558 by prefix, by ASPATH, by origin, by tag etc.
2559 \end{NB}
2560
2561 To facilitate the construction (f.e.\ in case the routing
2562 daemon is not aware of realms), missing realms may be completed
2563 with routing policy rules, see sec.~\ref{IP-RULE}, p.\pageref{IP-RULE}.
2564
2565 For each packet the kernel calculates a tuple of realms: source realm
2566 and destination realm, using the following algorithm:
2567
2568 \begin{enumerate}
2569 \item If the route has a realm, the destination realm of the packet is set to it.
2570 \item If the rule has a source realm, the source realm of the packet is set to it.
2571 If the destination realm was not inherited from the route and the rule has a destination realm,
2572 it is also set.
2573 \item If at least one of the realms is still unknown, the kernel finds
2574 the reversed route to the source of the packet.
2575 \item If the source realm is still unknown, get it from the reversed route.
2576 \item If one of the realms is still unknown, swap the realms of reversed
2577 routes and apply step 2 again.
2578 \end{enumerate}
2579
2580 After this procedure is completed we know what realm the packet
2581 arrived from and the realm where it is going to propagate to.
2582 If some of the realms are unknown, they are initialized to zero
2583 (or realm \verb|unknown|).
2584
2585 The main application of realms is the TC \verb|route| classifier~\cite{TC-CREF},
2586 where they are used to help assign packets to traffic classes,
2587 to account, police and schedule them according to this
2588 classification.
2589
2590 A much simpler but still very useful application is incoming packet
2591 accounting by realms. The kernel gathers a packet statistics summary
2592 which can be viewed with the \verb|rtacct| utility.
2593 \begin{verbatim}
2594 kuznet@amber:~ $ rtacct russia
2595 Realm BytesTo PktsTo BytesFrom PktsFrom
2596 russia 20576778 169176 47080168 153805
2597 kuznet@amber:~ $
2598 \end{verbatim}
2599 This shows that this router received 153805 packets from
2600 the realm \verb|russia| and forwarded 169176 packets to \verb|russia|.
2601 The realm \verb|russia| consists of routes with ASPATHs not leaving
2602 Russia.
2603
2604 Note that locally originating packets are not accounted here,
2605 \verb|rtacct| shows incoming packets only. Using the \verb|route|
2606 classifier (see~\cite{TC-CREF}) you can get even more detailed
2607 accounting information about outgoing packets, optionally
2608 summarizing traffic not only by source or destination, but
2609 by any pair of source and destination realms.
2610
2611
2612 \begin{thebibliography}{99}
2613 \addcontentsline{toc}{section}{References}
2614 \bibitem{RFC-NDISC} T.~Narten, E.~Nordmark, W.~Simpson.
2615 ``Neighbor Discovery for IP Version 6 (IPv6)'', RFC-2461.
2616
2617 \bibitem{RFC-ADDRCONF} S.~Thomson, T.~Narten.
2618 ``IPv6 Stateless Address Autoconfiguration'', RFC-2462.
2619
2620 \bibitem{RFC1812} F.~Baker.
2621 ``Requirements for IP Version 4 Routers'', RFC-1812.
2622
2623 \bibitem{RFC1122} R.~T.~Braden.
2624 ``Requirements for Internet hosts --- communication layers'', RFC-1122.
2625
2626 \bibitem{IOS} ``Cisco IOS Release 12.0 Network Protocols
2627 Command Reference, Part 1'' and
2628 ``Cisco IOS Release 12.0 Quality of Service Solutions
2629 Configuration Guide: Configuring Policy-Based Routing'',\\
2630 http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
2631
2632 \bibitem{IP-TUNNELS} A.~N.~Kuznetsov.
2633 ``Tunnels over IP in Linux-2.2'', \\
2634 In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
2635
2636 \bibitem{TC-CREF} A.~N.~Kuznetsov. ``TC Command Reference'',\\
2637 In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
2638
2639 \bibitem{IOS-BGP-PP} ``Cisco IOS Release 12.0 Quality of Service Solutions
2640 Configuration Guide: Configuring QoS Policy Propagation via
2641 Border Gateway Protocol'',\\
2642 http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
2643
2644 \bibitem{RFC-DHCP} R.~Droms.
2645 ``Dynamic Host Configuration Protocol.'', RFC-2131
2646
2647 \end{thebibliography}
2648
2649
2650
2651
2652 \appendix
2653 \addcontentsline{toc}{section}{Appendix}
2654
2655 \section{Source address selection}
2656 \label{ADDR-SEL}
2657
2658 When a host creates an IP packet, it must select some source
2659 address. Correct source address selection is a critical procedure,
2660 because it gives the receiver the information needed to deliver a
2661 reply. If the source is selected incorrectly, in the best case,
2662 the backward path may appear different to the forward one which
2663 is harmful for performance. In the worst case, when the addresses
2664 are administratively scoped, the reply may be lost entirely.
2665
2666 Linux-2.2 selects source addresses using the following algorithm:
2667
2668 \begin{itemize}
2669 \item
2670 The application may select a source address explicitly with \verb|bind(2)|
2671 syscall or supplying it to \verb|sendmsg(2)| via the ancillary data object
2672 \verb|IP_PKTINFO|. In this case the kernel only checks the validity
2673 of the address and never tries to ``improve'' an incorrect user choice,
2674 generating an error instead.
2675 \begin{NB}
2676 Never say ``Never''. The sysctl option \verb|ip_dynaddr| breaks
2677 this axiom. It has been made deliberately with the purpose
2678 of automatically reselecting the address on hosts with dynamic dial-out interfaces.
2679 However, this hack {\em must not\/} be used on multihomed hosts
2680 and especially on routers: it would break them.
2681 \end{NB}
2682
2683
2684 \item Otherwise, IP routing tables can contain an explicit source
2685 address hint for this destination. The hint is set with the \verb|src| parameter
2686 to the \verb|ip route| command, sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}.
2687
2688
2689 \item Otherwise, the kernel searches through the list of addresses
2690 attached to the interface through which the packets will be routed.
2691 The search strategies are different for IP and IPv6. Namely:
2692
2693 \begin{itemize}
2694 \item IPv6 searches for the first valid, not deprecated address
2695 with the same scope as the destination.
2696
2697 \item IP searches for the first valid address with a scope wider
2698 than the scope of the destination but it prefers addresses
2699 which fall to the same subnet as the nexthop of the route
2700 to the destination. Unlike IPv6, the scopes of IPv4 destinations
2701 are not encoded in their addresses but are supplied
2702 in routing tables instead (the \verb|scope| parameter to the \verb|ip route| command,
2703 sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}).
2704
2705 \end{itemize}
2706
2707
2708 \item Otherwise, if the scope of the destination is \verb|link| or \verb|host|,
2709 the algorithm fails and returns a zero source address.
2710
2711 \item Otherwise, all interfaces are scanned to search for an address
2712 with an appropriate scope. The loopback device \verb|lo| is always the first
2713 in the search list, so that if an address with global scope (not 127.0.0.1!)
2714 is configured on loopback, it is always preferred.
2715
2716 \end{itemize}
2717
2718
2719 \section{Proxy ARP/NDISC}
2720 \label{PROXY-NEIGH}
2721
2722 Routers may answer ARP/NDISC solicitations on behalf of other hosts.
2723 In Linux-2.2 proxy ARP on an interface may be enabled
2724 by setting the kernel \verb|sysctl| variable
2725 \verb|/proc/sys/net/ipv4/conf/<dev>/proxy_arp| to 1. After this, the router
2726 starts to answer ARP requests on the interface \verb|<dev>|, provided
2727 the route to the requested destination does {\em not\/} go back via the same
2728 device.
2729
2730 The variable \verb|/proc/sys/net/ipv4/conf/all/proxy_arp| enables proxy
2731 ARP on all the IP devices.
2732
2733 However, this approach fails in the case of IPv6 because the router
2734 must join the solicited node multicast address to listen for the corresponding
2735 NDISC queries. It means that proxy NDISC is possible only on a per destination
2736 basis.
2737
2738 Logically, proxy ARP/NDISC is not a kernel task. It can easily be implemented
2739 in user space. However, similar functionality was present in BSD kernels
2740 and in Linux-2.0, so we have to preserve it at least to the extent that
2741 is standardized in BSD.
2742 \begin{NB}
2743 Linux-2.0 ARP had a feature called {\em subnet\/} proxy ARP.
2744 It is replaced with the sysctl flag in Linux-2.2.
2745 \end{NB}
2746
2747
2748 The \verb|ip| utility provides a way to manage proxy ARP/NDISC
2749 with the \verb|ip neigh| command, namely:
2750 \begin{verbatim}
2751 ip neigh add proxy ADDRESS [ dev NAME ]
2752 \end{verbatim}
2753 adds a new proxy ARP/NDISC record and
2754 \begin{verbatim}
2755 ip neigh del proxy ADDRESS [ dev NAME ]
2756 \end{verbatim}
2757 deletes it.
2758
2759 If the name of the device is not given, the router will answer solicitations
2760 for address \verb|ADDRESS| on all devices, otherwise it will only serve
2761 the device \verb|NAME|. Even if the proxy entry is created with
2762 \verb|ip neigh|, the router {\em will not\/} answer a query if the route
2763 to the destination goes back via the interface from which the solicitation
2764 was received.
2765
2766 It is important to emphasize that proxy entries have {\em no\/}
2767 parameters other than these (IP/IPv6 address and optional device).
2768 Particularly, the entry does not store any link layer address.
2769 It always advertises the station address of the interface
2770 on which it sends advertisements (i.e. it's own station address).
2771
2772 \section{Route NAT status}
2773 \label{ROUTE-NAT}
2774
2775 NAT (or ``Network Address Translation'') remaps some parts
2776 of the IP address space into other ones. Linux-2.2 route NAT is supposed
2777 to be used to facilitate policy routing by rewriting addresses
2778 to other routing domains or to help while renumbering sites
2779 to another prefix.
2780
2781 \paragraph{What it is not:}
2782 It is necessary to emphasize that {\em it is not supposed\/}
2783 to be used to compress address space or to split load.
2784 This is not missing functionality but a design principle.
2785 Route NAT is {\em stateless\/}. It does not hold any state
2786 about translated sessions. This means that it handles any number
2787 of sessions flawlessly. But it also means that it is {\em static\/}.
2788 It cannot detect the moment when the last TCP client stops
2789 using an address. For the same reason, it will not help to split
2790 load between several servers.
2791 \begin{NB}
2792 It is a pretty commonly held belief that it is useful to split load between
2793 several servers with NAT. This is a mistake. All you get from this
2794 is the requirement that the router keep the state of all the TCP connections
2795 going via it. Well, if the router is so powerful, run apache on it. 8)
2796 \end{NB}
2797
2798 The second feature: it does not touch packet payload,
2799 does not try to ``improve'' broken protocols by looking
2800 through its data and mangling it. It mangles IP addresses,
2801 only IP addresses and nothing but IP addresses.
2802 This also, is not missing any functionality.
2803
2804 To resume: if you need to compress address space or keep
2805 active FTP clients happy, your choice is not route NAT but masquerading,
2806 port forwarding, NAPT etc.
2807 \begin{NB}
2808 By the way, you may also want to look at
2809 http://www.suse.com/\~mha/HyperNews/get/linux-ip-nat.html
2810 \end{NB}
2811
2812
2813 \paragraph{How it works.}
2814 Some part of the address space is reserved for dummy addresses
2815 which will look for all the world like some host addresses
2816 inside your network. No other hosts may use these addresses,
2817 however other routers may also be configured to translate them.
2818 \begin{NB}
2819 A great advantage of route NAT is that it may be used not
2820 only in stub networks but in environments with arbitrarily complicated
2821 structure. It does not firewall, it {\em forwards.}
2822 \end{NB}
2823 These addresses are selected by the \verb|ip route| command
2824 (sec.\ref{IP-ROUTE-ADD}, p.\pageref{IP-ROUTE-ADD}). F.e.\
2825 \begin{verbatim}
2826 ip route add nat 192.203.80.144 via 193.233.7.83
2827 \end{verbatim}
2828 states that the single address 192.203.80.144 is a dummy NAT address.
2829 For all the world it looks like a host address inside our network.
2830 For neighbouring hosts and routers it looks like the local address
2831 of the translating router. The router answers ARP for it, advertises
2832 this address as routed via it, {\em et al\/}. When the router
2833 receives a packet destined for 192.203.80.144, it replaces
2834 this address with 193.233.7.83 which is the address of some real
2835 host and forwards the packet. If you need to remap
2836 blocks of addresses, you may use a command like:
2837 \begin{verbatim}
2838 ip route add nat 192.203.80.192/26 via 193.233.7.64
2839 \end{verbatim}
2840 This command will map a block of 63 addresses 192.203.80.192-255 to
2841 193.233.7.64-127.
2842
2843 When an internal host (193.233.7.83 in the example above)
2844 sends something to the outer world and these packets are forwarded
2845 by our router, it should translate the source address 193.233.7.83
2846 into 192.203.80.144. This task is solved by setting a special
2847 policy rule (sec.\ref{IP-RULE-ADD}, p.\pageref{IP-RULE-ADD}):
2848 \begin{verbatim}
2849 ip rule add prio 320 from 193.233.7.83 nat 192.203.80.144
2850 \end{verbatim}
2851 This rule says that the source address 193.233.7.83
2852 should be translated into 192.203.80.144 before forwarding.
2853 It is important that the address after the \verb|nat| keyword
2854 is some NAT address, declared by {\tt ip route add nat}.
2855 If it is just a random address the router will not map to it.
2856 \begin{NB}
2857 The exception is when the address is a local address of this
2858 router (or 0.0.0.0) and masquerading is configured in the linux-2.2
2859 kernel. In this case the router will masquerade the packets as this address.
2860 If 0.0.0.0 is selected, the result is equivalent to one
2861 obtained with firewalling rules. Otherwise, you have the way
2862 to order Linux to masquerade to this fixed address.
2863 NAT mechanism used in linux-2.4 is more flexible than
2864 masquerading, so that this feature has lost meaning and disabled.
2865 \end{NB}
2866
2867 If the network has non-trivial internal structure, it is
2868 useful and even necessary to add rules disabling translation
2869 when a packet does not leave this network. Let us return to the
2870 example from sec.\ref{IP-RULE-SHOW} (p.\pageref{IP-RULE-SHOW}).
2871 \begin{verbatim}
2872 300: from 193.233.7.83 to 193.233.7.0/24 lookup main
2873 310: from 193.233.7.83 to 192.203.80.0/24 lookup main
2874 320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
2875 \end{verbatim}
2876 This block of rules causes normal forwarding when
2877 packets from 193.233.7.83 do not leave networks 193.233.7/24
2878 and 192.203.80/24. Also, if the \verb|inr.ruhep| table does not
2879 contain a route to the destination (which means that the routing
2880 domain owning addresses from 192.203.80/24 is dead), no translation
2881 will occur. Otherwise, the packets are translated.
2882
2883 \paragraph{How to only translate selected ports:}
2884 If you only want to translate selected ports (f.e.\ http)
2885 and leave the rest intact, you may use \verb|ipchains|
2886 to \verb|fwmark| a class of packets.
2887 Suppose you did and all the packets from 193.233.7.83
2888 destined for port 80 are marked with marker 0x1234 in input fwchain.
2889 In this case you may replace rule \#320 with:
2890 \begin{verbatim}
2891 320: from 193.233.7.83 fwmark 1234 lookup main map-to 192.203.80.144
2892 \end{verbatim}
2893 and translation will only be enabled for outgoing http requests.
2894
2895 \section{Example: minimal host setup}
2896 \label{EXAMPLE-SETUP}
2897
2898 The following script gives an example of a fault safe
2899 setup of IP (and IPv6, if it is compiled into the kernel)
2900 in the common case of a node attached to a single broadcast
2901 network. A more advanced script, which may be used both on multihomed
2902 hosts and on routers, is described in the following
2903 section.
2904
2905 The utilities used in the script may be found in the
2906 directory ftp://ftp.inr.ac.ru/ip-routing/:
2907 \begin{enumerate}
2908 \item \verb|ip| --- package \verb|iproute2|.
2909 \item \verb|arping| --- package \verb|iputils|.
2910 \item \verb|rdisc| --- package \verb|iputils|.
2911 \end{enumerate}
2912 \begin{NB}
2913 It also refers to a DHCP client, \verb|dhcpcd|. I should refrain from
2914 recommending a good DHCP client to use. All that I can
2915 say is that ISC \verb|dhcp-2.0b1pl6| patched with the patch that
2916 can be found in the \verb|dhcp.bootp.rarp| subdirectory of
2917 the same ftp site {\em does\/} work,
2918 at least on Ethernet and Token Ring.
2919 \end{NB}
2920
2921 \begin{verbatim}
2922 #! /bin/bash
2923 \end{verbatim}
2924 \begin{flushleft}
2925 \# {\bf Usage: \verb|ifone ADDRESS[/PREFIX-LENGTH] [DEVICE]|}\\
2926 \# {\bf Parameters:}\\
2927 \# \$1 --- Static IP address, optionally followed by prefix length.\\
2928 \# \$2 --- Device name. If it is missing, \verb|eth0| is asssumed.\\
2929 \# F.e. \verb|ifone 193.233.7.90|
2930 \end{flushleft}
2931 \begin{verbatim}
2932 dev=$2
2933 : ${dev:=eth0}
2934 ipaddr=
2935 \end{verbatim}
2936 \# Parse IP address, splitting prefix length.
2937 \begin{verbatim}
2938 if [ "$1" != "" ]; then
2939 ipaddr=${1%/*}
2940 if [ "$1" != "$ipaddr" ]; then
2941 pfxlen=${1#*/}
2942 fi
2943 : ${pfxlen:=24}
2944 fi
2945 pfx="${ipaddr}/${pfxlen}"
2946 \end{verbatim}
2947
2948 \begin{flushleft}
2949 \# {\bf Step 0} --- enable loopback.\\
2950 \#\\
2951 \# This step is necessary on any networked box before attempt\\
2952 \# to configure any other device.\\
2953 \end{flushleft}
2954 \begin{verbatim}
2955 ip link set up dev lo
2956 ip addr add 127.0.0.1/8 dev lo brd + scope host
2957 \end{verbatim}
2958 \begin{flushleft}
2959 \# IPv6 autoconfigure themself on loopback.\\
2960 \#\\
2961 \# If user gave loopback as device, we add the address as alias and exit.
2962 \end{flushleft}
2963 \begin{verbatim}
2964 if [ "$dev" = "lo" ]; then
2965 if [ "$ipaddr" != "" -a "$ipaddr" != "127.0.0.1" ]; then
2966 ip address add $ipaddr dev $dev
2967 exit $?
2968 fi
2969 exit 0
2970 fi
2971 \end{verbatim}
2972
2973 \noindent\# {\bf Step 1} --- enable device \verb|$dev|
2974
2975 \begin{verbatim}
2976 if ! ip link set up dev $dev ; then
2977 echo "Cannot enable interface $dev. Aborting." 1>&2
2978 exit 1
2979 fi
2980 \end{verbatim}
2981 \begin{flushleft}
2982 \# The interface is \verb|UP|. IPv6 started stateless autoconfiguration itself,\\
2983 \# and its configuration finishes here. However,\\
2984 \# IP still needs some static preconfigured address.
2985 \end{flushleft}
2986 \begin{verbatim}
2987 if [ "$ipaddr" = "" ]; then
2988 echo "No address for $dev is configured, trying DHCP..." 1>&2
2989 dhcpcd
2990 exit $?
2991 fi
2992 \end{verbatim}
2993
2994 \begin{flushleft}
2995 \# {\bf Step 2} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
2996 \# Send two probes and wait for result for 3 seconds.\\
2997 \# If the interface opens slower f.e.\ due to long media detection,\\
2998 \# you want to increase the timeout.\\
2999 \end{flushleft}
3000 \begin{verbatim}
3001 if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
3002 echo "Address $ipaddr is busy, trying DHCP..." 1>&2
3003 dhcpcd
3004 exit $?
3005 fi
3006 \end{verbatim}
3007 \begin{flushleft}
3008 \# OK, the address is unique, we may add it on the interface.\\
3009 \#\\
3010 \# {\bf Step 3} --- Configure the address on the interface.
3011 \end{flushleft}
3012
3013 \begin{verbatim}
3014 if ! ip address add $pfx brd + dev $dev; then
3015 echo "Failed to add $pfx on $dev, trying DHCP..." 1>&2
3016 dhcpcd
3017 exit $?
3018 fi
3019 \end{verbatim}
3020
3021 \noindent\# {\bf Step 4} --- Announce our presence on the link.
3022 \begin{verbatim}
3023 arping -A -c 1 -I $dev $ipaddr
3024 noarp=$?
3025 ( sleep 2;
3026 arping -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
3027 \end{verbatim}
3028
3029 \begin{flushleft}
3030 \# {\bf Step 5} (optional) --- Add some control routes.\\
3031 \#\\
3032 \# 1. Prohibit link local multicast addresses.\\
3033 \# 2. Prohibit link local (alias, limited) broadcast.\\
3034 \# 3. Add default multicast route.
3035 \end{flushleft}
3036 \begin{verbatim}
3037 ip route add unreachable 224.0.0.0/24
3038 ip route add unreachable 255.255.255.255
3039 if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
3040 ip route add 224.0.0.0/4 dev $dev scope global
3041 fi
3042 \end{verbatim}
3043
3044 \begin{flushleft}
3045 \# {\bf Step 6} --- Add fallback default route with huge metric.\\
3046 \# If a proxy ARP server is present on the interface, we will be\\
3047 \# able to talk to all the Internet without further configuration.\\
3048 \# It is not so cheap though and we still hope that this route\\
3049 \# will be overridden by more correct one by rdisc.\\
3050 \# Do not make this step if the device is not ARPable,\\
3051 \# because dead nexthop detection does not work on them.
3052 \end{flushleft}
3053 \begin{verbatim}
3054 if [ "$noarp" = "0" ]; then
3055 ip ro add default dev $dev metric 30000 scope global
3056 fi
3057 \end{verbatim}
3058
3059 \begin{flushleft}
3060 \# {\bf Step 7} --- Restart router discovery and exit.
3061 \end{flushleft}
3062 \begin{verbatim}
3063 killall -HUP rdisc || rdisc -fs
3064 exit 0
3065 \end{verbatim}
3066
3067
3068 \section{Example: {\protect\tt ifcfg} --- interface address management}
3069 \label{EXAMPLE-IFCFG}
3070
3071 This is a simplistic script replacing one option of \verb|ifconfig|,
3072 namely, IP address management. It not only adds
3073 addresses, but also carries out Duplicate Address Detection~\cite{RFC-DHCP},
3074 sends unsolicited ARP to update the caches of other hosts sharing
3075 the interface, adds some control routes and restarts Router Discovery
3076 when it is necessary.
3077
3078 I strongly recommend using it {\em instead\/} of \verb|ifconfig| both
3079 on hosts and on routers.
3080
3081 \begin{verbatim}
3082 #! /bin/bash
3083 \end{verbatim}
3084 \begin{flushleft}
3085 \# {\bf Usage: \verb?ifcfg DEVICE[:ALIAS] [add|del] ADDRESS[/LENGTH] [PEER]?}\\
3086 \# {\bf Parameters:}\\
3087 \# ---Device name. It may have alias suffix, separated by colon.\\
3088 \# ---Command: add, delete or stop.\\
3089 \# ---IP address, optionally followed by prefix length.\\
3090 \# ---Optional peer address for pointopoint interfaces.\\
3091 \# F.e. \verb|ifcfg eth0 193.233.7.90/24|
3092
3093 \noindent\# This function determines, whether it is router or host.\\
3094 \# It returns 0, if the host is apparently not router.
3095 \end{flushleft}
3096 \begin{verbatim}
3097 CheckForwarding () {
3098 local sbase fwd
3099 sbase=/proc/sys/net/ipv4/conf
3100 fwd=0
3101 if [ -d $sbase ]; then
3102 for dir in $sbase/*/forwarding; do
3103 fwd=$[$fwd + `cat $dir`]
3104 done
3105 else
3106 fwd=2
3107 fi
3108 return $fwd
3109 }
3110 \end{verbatim}
3111 \begin{flushleft}
3112 \# This function restarts Router Discovery.\\
3113 \end{flushleft}
3114 \begin{verbatim}
3115 RestartRDISC () {
3116 killall -HUP rdisc || rdisc -fs
3117 }
3118 \end{verbatim}
3119 \begin{flushleft}
3120 \# Calculate ABC "natural" mask length\\
3121 \# Arg: \$1 = dotquad address
3122 \end{flushleft}
3123 \begin{verbatim}
3124 ABCMaskLen () {
3125 local class;
3126 class=${1%%.*}
3127 if [ $class -eq 0 -o $class -ge 224 ]; then return 0
3128 elif [ $class -ge 192 ]; then return 24
3129 elif [ $class -ge 128 ]; then return 16
3130 else return 8 ; fi
3131 }
3132 \end{verbatim}
3133
3134
3135 \begin{flushleft}
3136 \# {\bf MAIN()}\\
3137 \#\\
3138 \# Strip alias suffix separated by colon.
3139 \end{flushleft}
3140 \begin{verbatim}
3141 label="label $1"
3142 ldev=$1
3143 dev=${1%:*}
3144 if [ "$dev" = "" -o "$1" = "help" ]; then
3145 echo "Usage: ifcfg DEV [[add|del [ADDR[/LEN]] [PEER] | stop]" 1>&2
3146 echo " add - add new address" 1>&2
3147 echo " del - delete address" 1>&2
3148 echo " stop - completely disable IP" 1>&2
3149 exit 1
3150 fi
3151 shift
3152
3153 CheckForwarding
3154 fwd=$?
3155 \end{verbatim}
3156 \begin{flushleft}
3157 \# Parse command. If it is ``stop'', flush and exit.
3158 \end{flushleft}
3159 \begin{verbatim}
3160 deleting=0
3161 case "$1" in
3162 add) shift ;;
3163 stop)
3164 if [ "$ldev" != "$dev" ]; then
3165 echo "Cannot stop alias $ldev" 1>&2
3166 exit 1;
3167 fi
3168 ip -4 addr flush dev $dev $label || exit 1
3169 if [ $fwd -eq 0 ]; then RestartRDISC; fi
3170 exit 0 ;;
3171 del*)
3172 deleting=1; shift ;;
3173 *)
3174 esac
3175 \end{verbatim}
3176 \begin{flushleft}
3177 \# Parse prefix, split prefix length, separated by slash.
3178 \end{flushleft}
3179 \begin{verbatim}
3180 ipaddr=
3181 pfxlen=
3182 if [ "$1" != "" ]; then
3183 ipaddr=${1%/*}
3184 if [ "$1" != "$ipaddr" ]; then
3185 pfxlen=${1#*/}
3186 fi
3187 if [ "$ipaddr" = "" ]; then
3188 echo "$1 is bad IP address." 1>&2
3189 exit 1
3190 fi
3191 fi
3192 shift
3193 \end{verbatim}
3194 \begin{flushleft}
3195 \# If peer address is present, prefix length is 32.\\
3196 \# Otherwise, if prefix length was not given, guess it.
3197 \end{flushleft}
3198 \begin{verbatim}
3199 peer=$1
3200 if [ "$peer" != "" ]; then
3201 if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then
3202 echo "Peer address with non-trivial netmask." 1>&2
3203 exit 1
3204 fi
3205 pfx="$ipaddr peer $peer"
3206 else
3207 if [ "$pfxlen" = "" ]; then
3208 ABCMaskLen $ipaddr
3209 pfxlen=$?
3210 fi
3211 pfx="$ipaddr/$pfxlen"
3212 fi
3213 if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then
3214 label=
3215 fi
3216 \end{verbatim}
3217 \begin{flushleft}
3218 \# If deletion was requested, delete the address and restart RDISC
3219 \end{flushleft}
3220 \begin{verbatim}
3221 if [ $deleting -ne 0 ]; then
3222 ip addr del $pfx dev $dev $label || exit 1
3223 if [ $fwd -eq 0 ]; then RestartRDISC; fi
3224 exit 0
3225 fi
3226 \end{verbatim}
3227 \begin{flushleft}
3228 \# Start interface initialization.\\
3229 \#\\
3230 \# {\bf Step 0} --- enable device \verb|$dev|
3231 \end{flushleft}
3232 \begin{verbatim}
3233 if ! ip link set up dev $dev ; then
3234 echo "Error: cannot enable interface $dev." 1>&2
3235 exit 1
3236 fi
3237 if [ "$ipaddr" = "" ]; then exit 0; fi
3238 \end{verbatim}
3239 \begin{flushleft}
3240 \# {\bf Step 1} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
3241 \# Send two probes and wait for result for 3 seconds.\\
3242 \# If the interface opens slower f.e.\ due to long media detection,\\
3243 \# you want to increase the timeout.\\
3244 \end{flushleft}
3245 \begin{verbatim}
3246 if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
3247 echo "Error: some host already uses address $ipaddr on $dev." 1>&2
3248 exit 1
3249 fi
3250 \end{verbatim}
3251 \begin{flushleft}
3252 \# OK, the address is unique. We may add it to the interface.\\
3253 \#\\
3254 \# {\bf Step 2} --- Configure the address on the interface.
3255 \end{flushleft}
3256 \begin{verbatim}
3257 if ! ip address add $pfx brd + dev $dev $label; then
3258 echo "Error: failed to add $pfx on $dev." 1>&2
3259 exit 1
3260 fi
3261 \end{verbatim}
3262 \noindent\# {\bf Step 3} --- Announce our presence on the link
3263 \begin{verbatim}
3264 arping -q -A -c 1 -I $dev $ipaddr
3265 noarp=$?
3266 ( sleep 2 ;
3267 arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
3268 \end{verbatim}
3269 \begin{flushleft}
3270 \# {\bf Step 4} (optional) --- Add some control routes.\\
3271 \#\\
3272 \# 1. Prohibit link local multicast addresses.\\
3273 \# 2. Prohibit link local (alias, limited) broadcast.\\
3274 \# 3. Add default multicast route.
3275 \end{flushleft}
3276 \begin{verbatim}
3277 ip route add unreachable 224.0.0.0/24 >& /dev/null
3278 ip route add unreachable 255.255.255.255 >& /dev/null
3279 if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
3280 ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null
3281 fi
3282 \end{verbatim}
3283 \begin{flushleft}
3284 \# {\bf Step 5} --- Add fallback default route with huge metric.\\
3285 \# If a proxy ARP server is present on the interface, we will be\\
3286 \# able to talk to all the Internet without further configuration.\\
3287 \# Do not make this step on router or if the device is not ARPable.\\
3288 \# because dead nexthop detection does not work on them.
3289 \end{flushleft}
3290 \begin{verbatim}
3291 if [ $fwd -eq 0 ]; then
3292 if [ $noarp -eq 0 ]; then
3293 ip ro append default dev $dev metric 30000 scope global
3294 elif [ "$peer" != "" ]; then
3295 if ping -q -c 2 -w 4 $peer ; then
3296 ip ro append default via $peer dev $dev metric 30001
3297 fi
3298 fi
3299 RestartRDISC
3300 fi
3301
3302 exit 0
3303 \end{verbatim}
3304 \begin{flushleft}
3305 \# End of {\bf MAIN()}
3306 \end{flushleft}
3307
3308
3309 \end{document}