2 * Copyright (c) 2009, 2010 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <arpa/inet.h>
23 #include <linux/if_tun.h>
25 #include <linux/types.h>
26 #include <linux/ethtool.h>
27 #include <linux/pkt_sched.h>
28 #include <linux/rtnetlink.h>
29 #include <linux/sockios.h>
30 #include <linux/version.h>
31 #include <sys/types.h>
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
34 #include <netpacket/packet.h>
35 #include <net/ethernet.h>
37 #include <linux/if_tunnel.h>
38 #include <net/if_arp.h>
39 #include <net/if_packet.h>
40 #include <net/route.h>
41 #include <netinet/in.h>
48 #include "dynamic-string.h"
49 #include "fatal-signal.h"
50 #include "netdev-provider.h"
51 #include "netdev-vport.h"
54 #include "openflow/openflow.h"
55 #include "openvswitch/gre.h"
57 #include "poll-loop.h"
58 #include "rtnetlink.h"
59 #include "socket-util.h"
63 #define THIS_MODULE VLM_netdev_linux
66 /* These were introduced in Linux 2.6.14, so they might be missing if we have
68 #ifndef ADVERTISED_Pause
69 #define ADVERTISED_Pause (1 << 13)
71 #ifndef ADVERTISED_Asym_Pause
72 #define ADVERTISED_Asym_Pause (1 << 14)
75 static struct rtnetlink_notifier netdev_linux_cache_notifier
;
76 static int cache_notifier_refcount
;
79 VALID_IFINDEX
= 1 << 0,
80 VALID_ETHERADDR
= 1 << 1,
84 VALID_CARRIER
= 1 << 5,
85 VALID_IS_PSEUDO
= 1 << 6, /* Represents is_internal and is_tap. */
86 VALID_POLICING
= 1 << 7,
87 VALID_HAVE_VPORT_STATS
= 1 << 8
95 struct netdev_dev_linux
{
96 struct netdev_dev netdev_dev
;
98 struct shash_node
*shash_node
;
99 unsigned int cache_valid
;
101 /* The following are figured out "on demand" only. They are only valid
102 * when the corresponding VALID_* bit in 'cache_valid' is set. */
104 uint8_t etheraddr
[ETH_ADDR_LEN
];
105 struct in_addr address
, netmask
;
109 bool is_internal
; /* Is this an openvswitch internal device? */
110 bool is_tap
; /* Is this a tuntap device? */
111 uint32_t kbits_rate
; /* Policing data. */
112 uint32_t kbits_burst
;
113 bool have_vport_stats
;
116 struct tap_state tap
;
120 struct netdev_linux
{
121 struct netdev netdev
;
125 /* An AF_INET socket (used for ioctl operations). */
126 static int af_inet_sock
= -1;
128 /* A Netlink routing socket that is not subscribed to any multicast groups. */
129 static struct nl_sock
*rtnl_sock
;
131 struct netdev_linux_notifier
{
132 struct netdev_notifier notifier
;
136 static struct shash netdev_linux_notifiers
=
137 SHASH_INITIALIZER(&netdev_linux_notifiers
);
138 static struct rtnetlink_notifier netdev_linux_poll_notifier
;
140 /* This is set pretty low because we probably won't learn anything from the
141 * additional log messages. */
142 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
144 static int netdev_linux_init(void);
146 static int netdev_linux_do_ethtool(const char *name
, struct ethtool_cmd
*,
147 int cmd
, const char *cmd_name
);
148 static int netdev_linux_do_ioctl(const char *name
, struct ifreq
*, int cmd
,
149 const char *cmd_name
);
150 static int netdev_linux_get_ipv4(const struct netdev
*, struct in_addr
*,
151 int cmd
, const char *cmd_name
);
152 static int get_flags(const struct netdev
*, int *flagsp
);
153 static int set_flags(struct netdev
*, int flags
);
154 static int do_get_ifindex(const char *netdev_name
);
155 static int get_ifindex(const struct netdev
*, int *ifindexp
);
156 static int do_set_addr(struct netdev
*netdev
,
157 int ioctl_nr
, const char *ioctl_name
,
158 struct in_addr addr
);
159 static int get_etheraddr(const char *netdev_name
, uint8_t ea
[ETH_ADDR_LEN
]);
160 static int set_etheraddr(const char *netdev_name
, int hwaddr_family
,
161 const uint8_t[ETH_ADDR_LEN
]);
162 static int get_stats_via_netlink(int ifindex
, struct netdev_stats
*stats
);
163 static int get_stats_via_proc(const char *netdev_name
, struct netdev_stats
*stats
);
166 is_netdev_linux_class(const struct netdev_class
*netdev_class
)
168 return netdev_class
->init
== netdev_linux_init
;
171 static struct netdev_dev_linux
*
172 netdev_dev_linux_cast(const struct netdev_dev
*netdev_dev
)
174 const struct netdev_class
*netdev_class
= netdev_dev_get_class(netdev_dev
);
175 assert(is_netdev_linux_class(netdev_class
));
177 return CONTAINER_OF(netdev_dev
, struct netdev_dev_linux
, netdev_dev
);
180 static struct netdev_linux
*
181 netdev_linux_cast(const struct netdev
*netdev
)
183 struct netdev_dev
*netdev_dev
= netdev_get_dev(netdev
);
184 const struct netdev_class
*netdev_class
= netdev_dev_get_class(netdev_dev
);
185 assert(is_netdev_linux_class(netdev_class
));
187 return CONTAINER_OF(netdev
, struct netdev_linux
, netdev
);
191 netdev_linux_init(void)
193 static int status
= -1;
195 /* Create AF_INET socket. */
196 af_inet_sock
= socket(AF_INET
, SOCK_DGRAM
, 0);
197 status
= af_inet_sock
>= 0 ? 0 : errno
;
199 VLOG_ERR("failed to create inet socket: %s", strerror(status
));
202 /* Create rtnetlink socket. */
204 status
= nl_sock_create(NETLINK_ROUTE
, 0, 0, 0, &rtnl_sock
);
206 VLOG_ERR_RL(&rl
, "failed to create rtnetlink socket: %s",
215 netdev_linux_run(void)
217 rtnetlink_notifier_run();
221 netdev_linux_wait(void)
223 rtnetlink_notifier_wait();
227 netdev_linux_cache_cb(const struct rtnetlink_change
*change
,
228 void *aux OVS_UNUSED
)
230 struct netdev_dev_linux
*dev
;
232 struct netdev_dev
*base_dev
= netdev_dev_from_name(change
->ifname
);
234 const struct netdev_class
*netdev_class
=
235 netdev_dev_get_class(base_dev
);
237 if (is_netdev_linux_class(netdev_class
)) {
238 dev
= netdev_dev_linux_cast(base_dev
);
239 dev
->cache_valid
= 0;
243 struct shash device_shash
;
244 struct shash_node
*node
;
246 shash_init(&device_shash
);
247 netdev_dev_get_devices(&netdev_linux_class
, &device_shash
);
248 SHASH_FOR_EACH (node
, &device_shash
) {
250 dev
->cache_valid
= 0;
252 shash_destroy(&device_shash
);
256 /* Creates the netdev device of 'type' with 'name'. */
258 netdev_linux_create_system(const char *name
, const char *type OVS_UNUSED
,
259 const struct shash
*args
, struct netdev_dev
**netdev_devp
)
261 struct netdev_dev_linux
*netdev_dev
;
264 if (!shash_is_empty(args
)) {
265 VLOG_WARN("%s: arguments for system devices should be empty", name
);
268 if (!cache_notifier_refcount
) {
269 error
= rtnetlink_notifier_register(&netdev_linux_cache_notifier
,
270 netdev_linux_cache_cb
, NULL
);
275 cache_notifier_refcount
++;
277 netdev_dev
= xzalloc(sizeof *netdev_dev
);
278 netdev_dev_init(&netdev_dev
->netdev_dev
, name
, &netdev_linux_class
);
280 *netdev_devp
= &netdev_dev
->netdev_dev
;
284 /* For most types of netdevs we open the device for each call of
285 * netdev_open(). However, this is not the case with tap devices,
286 * since it is only possible to open the device once. In this
287 * situation we share a single file descriptor, and consequently
288 * buffers, across all readers. Therefore once data is read it will
289 * be unavailable to other reads for tap devices. */
291 netdev_linux_create_tap(const char *name
, const char *type OVS_UNUSED
,
292 const struct shash
*args
, struct netdev_dev
**netdev_devp
)
294 struct netdev_dev_linux
*netdev_dev
;
295 struct tap_state
*state
;
296 static const char tap_dev
[] = "/dev/net/tun";
300 if (!shash_is_empty(args
)) {
301 VLOG_WARN("%s: arguments for TAP devices should be empty", name
);
304 netdev_dev
= xzalloc(sizeof *netdev_dev
);
305 state
= &netdev_dev
->state
.tap
;
307 /* Open tap device. */
308 state
->fd
= open(tap_dev
, O_RDWR
);
311 VLOG_WARN("opening \"%s\" failed: %s", tap_dev
, strerror(error
));
315 /* Create tap device. */
316 ifr
.ifr_flags
= IFF_TAP
| IFF_NO_PI
;
317 strncpy(ifr
.ifr_name
, name
, sizeof ifr
.ifr_name
);
318 if (ioctl(state
->fd
, TUNSETIFF
, &ifr
) == -1) {
319 VLOG_WARN("%s: creating tap device failed: %s", name
,
325 /* Make non-blocking. */
326 error
= set_nonblocking(state
->fd
);
331 netdev_dev_init(&netdev_dev
->netdev_dev
, name
, &netdev_tap_class
);
332 *netdev_devp
= &netdev_dev
->netdev_dev
;
341 destroy_tap(struct netdev_dev_linux
*netdev_dev
)
343 struct tap_state
*state
= &netdev_dev
->state
.tap
;
345 if (state
->fd
>= 0) {
350 /* Destroys the netdev device 'netdev_dev_'. */
352 netdev_linux_destroy(struct netdev_dev
*netdev_dev_
)
354 struct netdev_dev_linux
*netdev_dev
= netdev_dev_linux_cast(netdev_dev_
);
355 const char *type
= netdev_dev_get_type(netdev_dev_
);
357 if (!strcmp(type
, "system")) {
358 cache_notifier_refcount
--;
360 if (!cache_notifier_refcount
) {
361 rtnetlink_notifier_unregister(&netdev_linux_cache_notifier
);
363 } else if (!strcmp(type
, "tap")) {
364 destroy_tap(netdev_dev
);
371 netdev_linux_open(struct netdev_dev
*netdev_dev_
, int ethertype
,
372 struct netdev
**netdevp
)
374 struct netdev_dev_linux
*netdev_dev
= netdev_dev_linux_cast(netdev_dev_
);
375 struct netdev_linux
*netdev
;
376 enum netdev_flags flags
;
379 /* Allocate network device. */
380 netdev
= xzalloc(sizeof *netdev
);
382 netdev_init(&netdev
->netdev
, netdev_dev_
);
384 error
= netdev_get_flags(&netdev
->netdev
, &flags
);
385 if (error
== ENODEV
) {
389 if (!strcmp(netdev_dev_get_type(netdev_dev_
), "tap") &&
390 !netdev_dev
->state
.tap
.opened
) {
392 /* We assume that the first user of the tap device is the primary user
393 * and give them the tap FD. Subsequent users probably just expect
394 * this to be a system device so open it normally to avoid send/receive
395 * directions appearing to be reversed. */
396 netdev
->fd
= netdev_dev
->state
.tap
.fd
;
397 netdev_dev
->state
.tap
.opened
= true;
398 } else if (ethertype
!= NETDEV_ETH_TYPE_NONE
) {
399 struct sockaddr_ll sll
;
403 /* Create file descriptor. */
404 protocol
= (ethertype
== NETDEV_ETH_TYPE_ANY
? ETH_P_ALL
405 : ethertype
== NETDEV_ETH_TYPE_802_2
? ETH_P_802_2
407 netdev
->fd
= socket(PF_PACKET
, SOCK_RAW
, htons(protocol
));
408 if (netdev
->fd
< 0) {
413 /* Set non-blocking mode. */
414 error
= set_nonblocking(netdev
->fd
);
419 /* Get ethernet device index. */
420 error
= get_ifindex(&netdev
->netdev
, &ifindex
);
425 /* Bind to specific ethernet device. */
426 memset(&sll
, 0, sizeof sll
);
427 sll
.sll_family
= AF_PACKET
;
428 sll
.sll_ifindex
= ifindex
;
430 (struct sockaddr
*) &sll
, sizeof sll
) < 0) {
432 VLOG_ERR("bind to %s failed: %s", netdev_dev_get_name(netdev_dev_
),
437 /* Between the socket() and bind() calls above, the socket receives all
438 * packets of the requested type on all system interfaces. We do not
439 * want to receive that data, but there is no way to avoid it. So we
440 * must now drain out the receive queue. */
441 error
= drain_rcvbuf(netdev
->fd
);
447 *netdevp
= &netdev
->netdev
;
451 netdev_uninit(&netdev
->netdev
, true);
455 /* Closes and destroys 'netdev'. */
457 netdev_linux_close(struct netdev
*netdev_
)
459 struct netdev_linux
*netdev
= netdev_linux_cast(netdev_
);
461 if (netdev
->fd
> 0 && strcmp(netdev_get_type(netdev_
), "tap")) {
467 /* Initializes 'svec' with a list of the names of all known network devices. */
469 netdev_linux_enumerate(struct svec
*svec
)
471 struct if_nameindex
*names
;
473 names
= if_nameindex();
477 for (i
= 0; names
[i
].if_name
!= NULL
; i
++) {
478 svec_add(svec
, names
[i
].if_name
);
480 if_freenameindex(names
);
483 VLOG_WARN("could not obtain list of network device names: %s",
490 netdev_linux_recv(struct netdev
*netdev_
, void *data
, size_t size
)
492 struct netdev_linux
*netdev
= netdev_linux_cast(netdev_
);
494 if (netdev
->fd
< 0) {
495 /* Device was opened with NETDEV_ETH_TYPE_NONE. */
500 ssize_t retval
= read(netdev
->fd
, data
, size
);
503 } else if (errno
!= EINTR
) {
504 if (errno
!= EAGAIN
) {
505 VLOG_WARN_RL(&rl
, "error receiving Ethernet packet on %s: %s",
506 strerror(errno
), netdev_get_name(netdev_
));
513 /* Registers with the poll loop to wake up from the next call to poll_block()
514 * when a packet is ready to be received with netdev_recv() on 'netdev'. */
516 netdev_linux_recv_wait(struct netdev
*netdev_
)
518 struct netdev_linux
*netdev
= netdev_linux_cast(netdev_
);
519 if (netdev
->fd
>= 0) {
520 poll_fd_wait(netdev
->fd
, POLLIN
);
524 /* Discards all packets waiting to be received from 'netdev'. */
526 netdev_linux_drain(struct netdev
*netdev_
)
528 struct netdev_linux
*netdev
= netdev_linux_cast(netdev_
);
529 if (netdev
->fd
< 0) {
531 } else if (!strcmp(netdev_get_type(netdev_
), "tap")) {
533 int error
= netdev_linux_do_ioctl(netdev_get_name(netdev_
), &ifr
,
534 SIOCGIFTXQLEN
, "SIOCGIFTXQLEN");
538 drain_fd(netdev
->fd
, ifr
.ifr_qlen
);
541 return drain_rcvbuf(netdev
->fd
);
545 /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive
546 * errno value. Returns EAGAIN without blocking if the packet cannot be queued
547 * immediately. Returns EMSGSIZE if a partial packet was transmitted or if
548 * the packet is too big or too small to transmit on the device.
550 * The caller retains ownership of 'buffer' in all cases.
552 * The kernel maintains a packet transmission queue, so the caller is not
553 * expected to do additional queuing of packets. */
555 netdev_linux_send(struct netdev
*netdev_
, const void *data
, size_t size
)
557 struct netdev_linux
*netdev
= netdev_linux_cast(netdev_
);
559 /* XXX should support sending even if 'ethertype' was NETDEV_ETH_TYPE_NONE.
561 if (netdev
->fd
< 0) {
566 ssize_t retval
= write(netdev
->fd
, data
, size
);
568 /* The Linux AF_PACKET implementation never blocks waiting for room
569 * for packets, instead returning ENOBUFS. Translate this into
570 * EAGAIN for the caller. */
571 if (errno
== ENOBUFS
) {
573 } else if (errno
== EINTR
) {
575 } else if (errno
!= EAGAIN
) {
576 VLOG_WARN_RL(&rl
, "error sending Ethernet packet on %s: %s",
577 netdev_get_name(netdev_
), strerror(errno
));
580 } else if (retval
!= size
) {
581 VLOG_WARN_RL(&rl
, "sent partial Ethernet packet (%zd bytes of "
582 "%zu) on %s", retval
, size
, netdev_get_name(netdev_
));
590 /* Registers with the poll loop to wake up from the next call to poll_block()
591 * when the packet transmission queue has sufficient room to transmit a packet
592 * with netdev_send().
594 * The kernel maintains a packet transmission queue, so the client is not
595 * expected to do additional queuing of packets. Thus, this function is
596 * unlikely to ever be used. It is included for completeness. */
598 netdev_linux_send_wait(struct netdev
*netdev_
)
600 struct netdev_linux
*netdev
= netdev_linux_cast(netdev_
);
601 if (netdev
->fd
< 0) {
603 } else if (strcmp(netdev_get_type(netdev_
), "tap")) {
604 poll_fd_wait(netdev
->fd
, POLLOUT
);
606 /* TAP device always accepts packets.*/
607 poll_immediate_wake();
611 /* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful,
612 * otherwise a positive errno value. */
614 netdev_linux_set_etheraddr(struct netdev
*netdev_
,
615 const uint8_t mac
[ETH_ADDR_LEN
])
617 struct netdev_dev_linux
*netdev_dev
=
618 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
621 if (!(netdev_dev
->cache_valid
& VALID_ETHERADDR
)
622 || !eth_addr_equals(netdev_dev
->etheraddr
, mac
)) {
623 error
= set_etheraddr(netdev_get_name(netdev_
), ARPHRD_ETHER
, mac
);
625 netdev_dev
->cache_valid
|= VALID_ETHERADDR
;
626 memcpy(netdev_dev
->etheraddr
, mac
, ETH_ADDR_LEN
);
634 /* Returns a pointer to 'netdev''s MAC address. The caller must not modify or
635 * free the returned buffer. */
637 netdev_linux_get_etheraddr(const struct netdev
*netdev_
,
638 uint8_t mac
[ETH_ADDR_LEN
])
640 struct netdev_dev_linux
*netdev_dev
=
641 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
642 if (!(netdev_dev
->cache_valid
& VALID_ETHERADDR
)) {
643 int error
= get_etheraddr(netdev_get_name(netdev_
),
644 netdev_dev
->etheraddr
);
648 netdev_dev
->cache_valid
|= VALID_ETHERADDR
;
650 memcpy(mac
, netdev_dev
->etheraddr
, ETH_ADDR_LEN
);
654 /* Returns the maximum size of transmitted (and received) packets on 'netdev',
655 * in bytes, not including the hardware header; thus, this is typically 1500
656 * bytes for Ethernet devices. */
658 netdev_linux_get_mtu(const struct netdev
*netdev_
, int *mtup
)
660 struct netdev_dev_linux
*netdev_dev
=
661 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
662 if (!(netdev_dev
->cache_valid
& VALID_MTU
)) {
666 error
= netdev_linux_do_ioctl(netdev_get_name(netdev_
), &ifr
,
667 SIOCGIFMTU
, "SIOCGIFMTU");
671 netdev_dev
->mtu
= ifr
.ifr_mtu
;
672 netdev_dev
->cache_valid
|= VALID_MTU
;
674 *mtup
= netdev_dev
->mtu
;
678 /* Returns the ifindex of 'netdev', if successful, as a positive number.
679 * On failure, returns a negative errno value. */
681 netdev_linux_get_ifindex(const struct netdev
*netdev
)
685 error
= get_ifindex(netdev
, &ifindex
);
686 return error
? -error
: ifindex
;
690 netdev_linux_get_carrier(const struct netdev
*netdev_
, bool *carrier
)
692 struct netdev_dev_linux
*netdev_dev
=
693 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
698 if (!(netdev_dev
->cache_valid
& VALID_CARRIER
)) {
702 fn
= xasprintf("/sys/class/net/%s/carrier",
703 netdev_get_name(netdev_
));
704 fd
= open(fn
, O_RDONLY
);
707 VLOG_WARN_RL(&rl
, "%s: open failed: %s", fn
, strerror(error
));
711 retval
= read(fd
, line
, sizeof line
);
714 if (error
== EINVAL
) {
715 /* This is the normal return value when we try to check carrier
716 * if the network device is not up. */
718 VLOG_WARN_RL(&rl
, "%s: read failed: %s", fn
, strerror(error
));
721 } else if (retval
== 0) {
723 VLOG_WARN_RL(&rl
, "%s: unexpected end of file", fn
);
727 if (line
[0] != '0' && line
[0] != '1') {
729 VLOG_WARN_RL(&rl
, "%s: value is %c (expected 0 or 1)",
733 netdev_dev
->carrier
= line
[0] != '0';
734 netdev_dev
->cache_valid
|= VALID_CARRIER
;
736 *carrier
= netdev_dev
->carrier
;
747 /* Check whether we can we use RTM_GETLINK to get network device statistics.
748 * In pre-2.6.19 kernels, this was only available if wireless extensions were
751 check_for_working_netlink_stats(void)
753 /* Decide on the netdev_get_stats() implementation to use. Netlink is
754 * preferable, so if that works, we'll use it. */
755 int ifindex
= do_get_ifindex("lo");
757 VLOG_WARN("failed to get ifindex for lo, "
758 "obtaining netdev stats from proc");
761 struct netdev_stats stats
;
762 int error
= get_stats_via_netlink(ifindex
, &stats
);
764 VLOG_DBG("obtaining netdev stats via rtnetlink");
767 VLOG_INFO("RTM_GETLINK failed (%s), obtaining netdev stats "
768 "via proc (you are probably running a pre-2.6.19 "
769 "kernel)", strerror(error
));
775 /* Brings the 'is_internal' and 'is_tap' members of 'netdev_dev' up-to-date. */
777 netdev_linux_update_is_pseudo(struct netdev_dev_linux
*netdev_dev
)
779 if (!(netdev_dev
->cache_valid
& VALID_IS_PSEUDO
)) {
780 const char *name
= netdev_dev_get_name(&netdev_dev
->netdev_dev
);
781 const char *type
= netdev_dev_get_type(&netdev_dev
->netdev_dev
);
783 netdev_dev
->is_tap
= !strcmp(type
, "tap");
784 netdev_dev
->is_internal
= false;
785 if (!netdev_dev
->is_tap
) {
786 struct ethtool_drvinfo drvinfo
;
789 memset(&drvinfo
, 0, sizeof drvinfo
);
790 error
= netdev_linux_do_ethtool(name
,
791 (struct ethtool_cmd
*)&drvinfo
,
795 if (!error
&& !strcmp(drvinfo
.driver
, "openvswitch")) {
796 netdev_dev
->is_internal
= true;
800 netdev_dev
->cache_valid
|= VALID_IS_PSEUDO
;
805 swap_uint64(uint64_t *a
, uint64_t *b
)
812 /* Retrieves current device stats for 'netdev'. */
814 netdev_linux_get_stats(const struct netdev
*netdev_
,
815 struct netdev_stats
*stats
)
817 struct netdev_dev_linux
*netdev_dev
=
818 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
819 static int use_netlink_stats
= -1;
822 COVERAGE_INC(netdev_get_stats
);
824 if (netdev_dev
->have_vport_stats
||
825 !(netdev_dev
->cache_valid
& VALID_HAVE_VPORT_STATS
)) {
827 error
= netdev_vport_get_stats(netdev_
, stats
);
828 netdev_dev
->have_vport_stats
= !error
;
829 netdev_dev
->cache_valid
|= VALID_HAVE_VPORT_STATS
;
832 if (!netdev_dev
->have_vport_stats
) {
833 if (use_netlink_stats
< 0) {
834 use_netlink_stats
= check_for_working_netlink_stats();
836 if (use_netlink_stats
) {
839 error
= get_ifindex(netdev_
, &ifindex
);
841 error
= get_stats_via_netlink(ifindex
, stats
);
844 error
= get_stats_via_proc(netdev_get_name(netdev_
), stats
);
848 /* If this port is an internal port then the transmit and receive stats
849 * will appear to be swapped relative to the other ports since we are the
850 * one sending the data, not a remote computer. For consistency, we swap
851 * them back here. This does not apply if we are getting stats from the
852 * vport layer because it always tracks stats from the perspective of the
854 netdev_linux_update_is_pseudo(netdev_dev
);
855 if (!error
&& !netdev_dev
->have_vport_stats
&&
856 (netdev_dev
->is_internal
|| netdev_dev
->is_tap
)) {
857 swap_uint64(&stats
->rx_packets
, &stats
->tx_packets
);
858 swap_uint64(&stats
->rx_bytes
, &stats
->tx_bytes
);
859 swap_uint64(&stats
->rx_errors
, &stats
->tx_errors
);
860 swap_uint64(&stats
->rx_dropped
, &stats
->tx_dropped
);
861 stats
->rx_length_errors
= 0;
862 stats
->rx_over_errors
= 0;
863 stats
->rx_crc_errors
= 0;
864 stats
->rx_frame_errors
= 0;
865 stats
->rx_fifo_errors
= 0;
866 stats
->rx_missed_errors
= 0;
867 stats
->tx_aborted_errors
= 0;
868 stats
->tx_carrier_errors
= 0;
869 stats
->tx_fifo_errors
= 0;
870 stats
->tx_heartbeat_errors
= 0;
871 stats
->tx_window_errors
= 0;
877 /* Stores the features supported by 'netdev' into each of '*current',
878 * '*advertised', '*supported', and '*peer' that are non-null. Each value is a
879 * bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if
880 * successful, otherwise a positive errno value. */
882 netdev_linux_get_features(struct netdev
*netdev
,
883 uint32_t *current
, uint32_t *advertised
,
884 uint32_t *supported
, uint32_t *peer
)
886 struct ethtool_cmd ecmd
;
889 memset(&ecmd
, 0, sizeof ecmd
);
890 error
= netdev_linux_do_ethtool(netdev_get_name(netdev
), &ecmd
,
891 ETHTOOL_GSET
, "ETHTOOL_GSET");
896 /* Supported features. */
898 if (ecmd
.supported
& SUPPORTED_10baseT_Half
) {
899 *supported
|= OFPPF_10MB_HD
;
901 if (ecmd
.supported
& SUPPORTED_10baseT_Full
) {
902 *supported
|= OFPPF_10MB_FD
;
904 if (ecmd
.supported
& SUPPORTED_100baseT_Half
) {
905 *supported
|= OFPPF_100MB_HD
;
907 if (ecmd
.supported
& SUPPORTED_100baseT_Full
) {
908 *supported
|= OFPPF_100MB_FD
;
910 if (ecmd
.supported
& SUPPORTED_1000baseT_Half
) {
911 *supported
|= OFPPF_1GB_HD
;
913 if (ecmd
.supported
& SUPPORTED_1000baseT_Full
) {
914 *supported
|= OFPPF_1GB_FD
;
916 if (ecmd
.supported
& SUPPORTED_10000baseT_Full
) {
917 *supported
|= OFPPF_10GB_FD
;
919 if (ecmd
.supported
& SUPPORTED_TP
) {
920 *supported
|= OFPPF_COPPER
;
922 if (ecmd
.supported
& SUPPORTED_FIBRE
) {
923 *supported
|= OFPPF_FIBER
;
925 if (ecmd
.supported
& SUPPORTED_Autoneg
) {
926 *supported
|= OFPPF_AUTONEG
;
928 if (ecmd
.supported
& SUPPORTED_Pause
) {
929 *supported
|= OFPPF_PAUSE
;
931 if (ecmd
.supported
& SUPPORTED_Asym_Pause
) {
932 *supported
|= OFPPF_PAUSE_ASYM
;
935 /* Advertised features. */
937 if (ecmd
.advertising
& ADVERTISED_10baseT_Half
) {
938 *advertised
|= OFPPF_10MB_HD
;
940 if (ecmd
.advertising
& ADVERTISED_10baseT_Full
) {
941 *advertised
|= OFPPF_10MB_FD
;
943 if (ecmd
.advertising
& ADVERTISED_100baseT_Half
) {
944 *advertised
|= OFPPF_100MB_HD
;
946 if (ecmd
.advertising
& ADVERTISED_100baseT_Full
) {
947 *advertised
|= OFPPF_100MB_FD
;
949 if (ecmd
.advertising
& ADVERTISED_1000baseT_Half
) {
950 *advertised
|= OFPPF_1GB_HD
;
952 if (ecmd
.advertising
& ADVERTISED_1000baseT_Full
) {
953 *advertised
|= OFPPF_1GB_FD
;
955 if (ecmd
.advertising
& ADVERTISED_10000baseT_Full
) {
956 *advertised
|= OFPPF_10GB_FD
;
958 if (ecmd
.advertising
& ADVERTISED_TP
) {
959 *advertised
|= OFPPF_COPPER
;
961 if (ecmd
.advertising
& ADVERTISED_FIBRE
) {
962 *advertised
|= OFPPF_FIBER
;
964 if (ecmd
.advertising
& ADVERTISED_Autoneg
) {
965 *advertised
|= OFPPF_AUTONEG
;
967 if (ecmd
.advertising
& ADVERTISED_Pause
) {
968 *advertised
|= OFPPF_PAUSE
;
970 if (ecmd
.advertising
& ADVERTISED_Asym_Pause
) {
971 *advertised
|= OFPPF_PAUSE_ASYM
;
974 /* Current settings. */
975 if (ecmd
.speed
== SPEED_10
) {
976 *current
= ecmd
.duplex
? OFPPF_10MB_FD
: OFPPF_10MB_HD
;
977 } else if (ecmd
.speed
== SPEED_100
) {
978 *current
= ecmd
.duplex
? OFPPF_100MB_FD
: OFPPF_100MB_HD
;
979 } else if (ecmd
.speed
== SPEED_1000
) {
980 *current
= ecmd
.duplex
? OFPPF_1GB_FD
: OFPPF_1GB_HD
;
981 } else if (ecmd
.speed
== SPEED_10000
) {
982 *current
= OFPPF_10GB_FD
;
987 if (ecmd
.port
== PORT_TP
) {
988 *current
|= OFPPF_COPPER
;
989 } else if (ecmd
.port
== PORT_FIBRE
) {
990 *current
|= OFPPF_FIBER
;
994 *current
|= OFPPF_AUTONEG
;
997 /* Peer advertisements. */
1003 /* Set the features advertised by 'netdev' to 'advertise'. */
1005 netdev_linux_set_advertisements(struct netdev
*netdev
, uint32_t advertise
)
1007 struct ethtool_cmd ecmd
;
1010 memset(&ecmd
, 0, sizeof ecmd
);
1011 error
= netdev_linux_do_ethtool(netdev_get_name(netdev
), &ecmd
,
1012 ETHTOOL_GSET
, "ETHTOOL_GSET");
1017 ecmd
.advertising
= 0;
1018 if (advertise
& OFPPF_10MB_HD
) {
1019 ecmd
.advertising
|= ADVERTISED_10baseT_Half
;
1021 if (advertise
& OFPPF_10MB_FD
) {
1022 ecmd
.advertising
|= ADVERTISED_10baseT_Full
;
1024 if (advertise
& OFPPF_100MB_HD
) {
1025 ecmd
.advertising
|= ADVERTISED_100baseT_Half
;
1027 if (advertise
& OFPPF_100MB_FD
) {
1028 ecmd
.advertising
|= ADVERTISED_100baseT_Full
;
1030 if (advertise
& OFPPF_1GB_HD
) {
1031 ecmd
.advertising
|= ADVERTISED_1000baseT_Half
;
1033 if (advertise
& OFPPF_1GB_FD
) {
1034 ecmd
.advertising
|= ADVERTISED_1000baseT_Full
;
1036 if (advertise
& OFPPF_10GB_FD
) {
1037 ecmd
.advertising
|= ADVERTISED_10000baseT_Full
;
1039 if (advertise
& OFPPF_COPPER
) {
1040 ecmd
.advertising
|= ADVERTISED_TP
;
1042 if (advertise
& OFPPF_FIBER
) {
1043 ecmd
.advertising
|= ADVERTISED_FIBRE
;
1045 if (advertise
& OFPPF_AUTONEG
) {
1046 ecmd
.advertising
|= ADVERTISED_Autoneg
;
1048 if (advertise
& OFPPF_PAUSE
) {
1049 ecmd
.advertising
|= ADVERTISED_Pause
;
1051 if (advertise
& OFPPF_PAUSE_ASYM
) {
1052 ecmd
.advertising
|= ADVERTISED_Asym_Pause
;
1054 return netdev_linux_do_ethtool(netdev_get_name(netdev
), &ecmd
,
1055 ETHTOOL_SSET
, "ETHTOOL_SSET");
1058 /* If 'netdev_name' is the name of a VLAN network device (e.g. one created with
1059 * vconfig(8)), sets '*vlan_vid' to the VLAN VID associated with that device
1060 * and returns 0. Otherwise returns a errno value (specifically ENOENT if
1061 * 'netdev_name' is the name of a network device that is not a VLAN device) and
1062 * sets '*vlan_vid' to -1. */
1064 netdev_linux_get_vlan_vid(const struct netdev
*netdev
, int *vlan_vid
)
1066 const char *netdev_name
= netdev_get_name(netdev
);
1067 struct ds line
= DS_EMPTY_INITIALIZER
;
1068 FILE *stream
= NULL
;
1072 COVERAGE_INC(netdev_get_vlan_vid
);
1073 fn
= xasprintf("/proc/net/vlan/%s", netdev_name
);
1074 stream
= fopen(fn
, "r");
1080 if (ds_get_line(&line
, stream
)) {
1081 if (ferror(stream
)) {
1083 VLOG_ERR_RL(&rl
, "error reading \"%s\": %s", fn
, strerror(errno
));
1086 VLOG_ERR_RL(&rl
, "unexpected end of file reading \"%s\"", fn
);
1091 if (!sscanf(ds_cstr(&line
), "%*s VID: %d", vlan_vid
)) {
1093 VLOG_ERR_RL(&rl
, "parse error reading \"%s\" line 1: \"%s\"",
1094 fn
, ds_cstr(&line
));
1112 #define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress"
1113 #define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk mtu 65535 drop flowid :1"
1115 /* Remove ingress policing from 'netdev'. Returns 0 if successful, otherwise a
1116 * positive errno value.
1118 * This function is equivalent to running
1119 * /sbin/tc qdisc del dev %s handle ffff: ingress
1120 * but it is much, much faster.
1123 netdev_linux_remove_policing(struct netdev
*netdev
)
1125 struct netdev_dev_linux
*netdev_dev
=
1126 netdev_dev_linux_cast(netdev_get_dev(netdev
));
1127 const char *netdev_name
= netdev_get_name(netdev
);
1129 struct ofpbuf request
;
1130 struct ofpbuf
*reply
;
1131 struct tcmsg
*tcmsg
;
1135 error
= get_ifindex(netdev
, &ifindex
);
1140 ofpbuf_init(&request
, 0);
1141 nl_msg_put_nlmsghdr(&request
, sizeof *tcmsg
, RTM_DELQDISC
, NLM_F_REQUEST
);
1142 tcmsg
= ofpbuf_put_zeros(&request
, sizeof *tcmsg
);
1143 tcmsg
->tcm_family
= AF_UNSPEC
;
1144 tcmsg
->tcm_ifindex
= ifindex
;
1145 tcmsg
->tcm_handle
= 0xffff0000;
1146 tcmsg
->tcm_parent
= TC_H_INGRESS
;
1147 nl_msg_put_string(&request
, TCA_KIND
, "ingress");
1148 nl_msg_put_unspec(&request
, TCA_OPTIONS
, NULL
, 0);
1149 error
= nl_sock_transact(rtnl_sock
, &request
, &reply
);
1150 ofpbuf_uninit(&request
);
1151 ofpbuf_delete(reply
);
1152 if (error
&& error
!= ENOENT
&& error
!= EINVAL
) {
1153 VLOG_WARN_RL(&rl
, "%s: removing policing failed: %s",
1154 netdev_name
, strerror(error
));
1158 netdev_dev
->kbits_rate
= 0;
1159 netdev_dev
->kbits_burst
= 0;
1160 netdev_dev
->cache_valid
|= VALID_POLICING
;
1164 /* Attempts to set input rate limiting (policing) policy. */
1166 netdev_linux_set_policing(struct netdev
*netdev
,
1167 uint32_t kbits_rate
, uint32_t kbits_burst
)
1169 struct netdev_dev_linux
*netdev_dev
=
1170 netdev_dev_linux_cast(netdev_get_dev(netdev
));
1171 const char *netdev_name
= netdev_get_name(netdev
);
1174 COVERAGE_INC(netdev_set_policing
);
1176 kbits_burst
= (!kbits_rate
? 0 /* Force to 0 if no rate specified. */
1177 : !kbits_burst
? 1000 /* Default to 1000 kbits if 0. */
1178 : kbits_burst
); /* Stick with user-specified value. */
1180 if (netdev_dev
->cache_valid
& VALID_POLICING
1181 && netdev_dev
->kbits_rate
== kbits_rate
1182 && netdev_dev
->kbits_burst
== kbits_burst
) {
1183 /* Assume that settings haven't changed since we last set them. */
1187 netdev_linux_remove_policing(netdev
);
1189 snprintf(command
, sizeof(command
), POLICE_ADD_CMD
, netdev_name
);
1190 if (system(command
) != 0) {
1191 VLOG_WARN_RL(&rl
, "%s: problem adding policing", netdev_name
);
1195 snprintf(command
, sizeof(command
), POLICE_CONFIG_CMD
, netdev_name
,
1196 kbits_rate
, kbits_burst
);
1197 if (system(command
) != 0) {
1198 VLOG_WARN_RL(&rl
, "%s: problem configuring policing",
1203 netdev_dev
->kbits_rate
= kbits_rate
;
1204 netdev_dev
->kbits_burst
= kbits_burst
;
1205 netdev_dev
->cache_valid
|= VALID_POLICING
;
1212 netdev_linux_get_in4(const struct netdev
*netdev_
,
1213 struct in_addr
*address
, struct in_addr
*netmask
)
1215 struct netdev_dev_linux
*netdev_dev
=
1216 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
1218 if (!(netdev_dev
->cache_valid
& VALID_IN4
)) {
1221 error
= netdev_linux_get_ipv4(netdev_
, &netdev_dev
->address
,
1222 SIOCGIFADDR
, "SIOCGIFADDR");
1227 error
= netdev_linux_get_ipv4(netdev_
, &netdev_dev
->netmask
,
1228 SIOCGIFNETMASK
, "SIOCGIFNETMASK");
1233 netdev_dev
->cache_valid
|= VALID_IN4
;
1235 *address
= netdev_dev
->address
;
1236 *netmask
= netdev_dev
->netmask
;
1237 return address
->s_addr
== INADDR_ANY
? EADDRNOTAVAIL
: 0;
1241 netdev_linux_set_in4(struct netdev
*netdev_
, struct in_addr address
,
1242 struct in_addr netmask
)
1244 struct netdev_dev_linux
*netdev_dev
=
1245 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
1248 error
= do_set_addr(netdev_
, SIOCSIFADDR
, "SIOCSIFADDR", address
);
1250 netdev_dev
->cache_valid
|= VALID_IN4
;
1251 netdev_dev
->address
= address
;
1252 netdev_dev
->netmask
= netmask
;
1253 if (address
.s_addr
!= INADDR_ANY
) {
1254 error
= do_set_addr(netdev_
, SIOCSIFNETMASK
,
1255 "SIOCSIFNETMASK", netmask
);
1262 parse_if_inet6_line(const char *line
,
1263 struct in6_addr
*in6
, char ifname
[16 + 1])
1265 uint8_t *s6
= in6
->s6_addr
;
1266 #define X8 "%2"SCNx8
1268 " "X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8
1269 "%*x %*x %*x %*x %16s\n",
1270 &s6
[0], &s6
[1], &s6
[2], &s6
[3],
1271 &s6
[4], &s6
[5], &s6
[6], &s6
[7],
1272 &s6
[8], &s6
[9], &s6
[10], &s6
[11],
1273 &s6
[12], &s6
[13], &s6
[14], &s6
[15],
1277 /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address (if
1278 * 'in6' is non-null) and returns true. Otherwise, returns false. */
1280 netdev_linux_get_in6(const struct netdev
*netdev_
, struct in6_addr
*in6
)
1282 struct netdev_dev_linux
*netdev_dev
=
1283 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
1284 if (!(netdev_dev
->cache_valid
& VALID_IN6
)) {
1288 netdev_dev
->in6
= in6addr_any
;
1290 file
= fopen("/proc/net/if_inet6", "r");
1292 const char *name
= netdev_get_name(netdev_
);
1293 while (fgets(line
, sizeof line
, file
)) {
1294 struct in6_addr in6
;
1295 char ifname
[16 + 1];
1296 if (parse_if_inet6_line(line
, &in6
, ifname
)
1297 && !strcmp(name
, ifname
))
1299 netdev_dev
->in6
= in6
;
1305 netdev_dev
->cache_valid
|= VALID_IN6
;
1307 *in6
= netdev_dev
->in6
;
1312 make_in4_sockaddr(struct sockaddr
*sa
, struct in_addr addr
)
1314 struct sockaddr_in sin
;
1315 memset(&sin
, 0, sizeof sin
);
1316 sin
.sin_family
= AF_INET
;
1317 sin
.sin_addr
= addr
;
1320 memset(sa
, 0, sizeof *sa
);
1321 memcpy(sa
, &sin
, sizeof sin
);
1325 do_set_addr(struct netdev
*netdev
,
1326 int ioctl_nr
, const char *ioctl_name
, struct in_addr addr
)
1329 strncpy(ifr
.ifr_name
, netdev_get_name(netdev
), sizeof ifr
.ifr_name
);
1330 make_in4_sockaddr(&ifr
.ifr_addr
, addr
);
1332 return netdev_linux_do_ioctl(netdev_get_name(netdev
), &ifr
, ioctl_nr
,
1336 /* Adds 'router' as a default IP gateway. */
1338 netdev_linux_add_router(struct netdev
*netdev OVS_UNUSED
, struct in_addr router
)
1340 struct in_addr any
= { INADDR_ANY
};
1344 memset(&rt
, 0, sizeof rt
);
1345 make_in4_sockaddr(&rt
.rt_dst
, any
);
1346 make_in4_sockaddr(&rt
.rt_gateway
, router
);
1347 make_in4_sockaddr(&rt
.rt_genmask
, any
);
1348 rt
.rt_flags
= RTF_UP
| RTF_GATEWAY
;
1349 COVERAGE_INC(netdev_add_router
);
1350 error
= ioctl(af_inet_sock
, SIOCADDRT
, &rt
) < 0 ? errno
: 0;
1352 VLOG_WARN("ioctl(SIOCADDRT): %s", strerror(error
));
1358 netdev_linux_get_next_hop(const struct in_addr
*host
, struct in_addr
*next_hop
,
1361 static const char fn
[] = "/proc/net/route";
1366 *netdev_name
= NULL
;
1367 stream
= fopen(fn
, "r");
1368 if (stream
== NULL
) {
1369 VLOG_WARN_RL(&rl
, "%s: open failed: %s", fn
, strerror(errno
));
1374 while (fgets(line
, sizeof line
, stream
)) {
1377 uint32_t dest
, gateway
, mask
;
1378 int refcnt
, metric
, mtu
;
1379 unsigned int flags
, use
, window
, irtt
;
1382 "%16s %"SCNx32
" %"SCNx32
" %04X %d %u %d %"SCNx32
1384 iface
, &dest
, &gateway
, &flags
, &refcnt
,
1385 &use
, &metric
, &mask
, &mtu
, &window
, &irtt
) != 11) {
1387 VLOG_WARN_RL(&rl
, "%s: could not parse line %d: %s",
1391 if (!(flags
& RTF_UP
)) {
1392 /* Skip routes that aren't up. */
1396 /* The output of 'dest', 'mask', and 'gateway' were given in
1397 * network byte order, so we don't need need any endian
1398 * conversions here. */
1399 if ((dest
& mask
) == (host
->s_addr
& mask
)) {
1401 /* The host is directly reachable. */
1402 next_hop
->s_addr
= 0;
1404 /* To reach the host, we must go through a gateway. */
1405 next_hop
->s_addr
= gateway
;
1407 *netdev_name
= xstrdup(iface
);
1418 /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be
1419 * successfully retrieved, it stores the corresponding MAC address in 'mac' and
1420 * returns 0. Otherwise, it returns a positive errno value; in particular,
1421 * ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */
1423 netdev_linux_arp_lookup(const struct netdev
*netdev
,
1424 uint32_t ip
, uint8_t mac
[ETH_ADDR_LEN
])
1427 struct sockaddr_in sin
;
1430 memset(&r
, 0, sizeof r
);
1431 sin
.sin_family
= AF_INET
;
1432 sin
.sin_addr
.s_addr
= ip
;
1434 memcpy(&r
.arp_pa
, &sin
, sizeof sin
);
1435 r
.arp_ha
.sa_family
= ARPHRD_ETHER
;
1437 strncpy(r
.arp_dev
, netdev_get_name(netdev
), sizeof r
.arp_dev
);
1438 COVERAGE_INC(netdev_arp_lookup
);
1439 retval
= ioctl(af_inet_sock
, SIOCGARP
, &r
) < 0 ? errno
: 0;
1441 memcpy(mac
, r
.arp_ha
.sa_data
, ETH_ADDR_LEN
);
1442 } else if (retval
!= ENXIO
) {
1443 VLOG_WARN_RL(&rl
, "%s: could not look up ARP entry for "IP_FMT
": %s",
1444 netdev_get_name(netdev
), IP_ARGS(&ip
), strerror(retval
));
1450 nd_to_iff_flags(enum netdev_flags nd
)
1453 if (nd
& NETDEV_UP
) {
1456 if (nd
& NETDEV_PROMISC
) {
1463 iff_to_nd_flags(int iff
)
1465 enum netdev_flags nd
= 0;
1469 if (iff
& IFF_PROMISC
) {
1470 nd
|= NETDEV_PROMISC
;
1476 netdev_linux_update_flags(struct netdev
*netdev
, enum netdev_flags off
,
1477 enum netdev_flags on
, enum netdev_flags
*old_flagsp
)
1479 int old_flags
, new_flags
;
1482 error
= get_flags(netdev
, &old_flags
);
1484 *old_flagsp
= iff_to_nd_flags(old_flags
);
1485 new_flags
= (old_flags
& ~nd_to_iff_flags(off
)) | nd_to_iff_flags(on
);
1486 if (new_flags
!= old_flags
) {
1487 error
= set_flags(netdev
, new_flags
);
1494 poll_notify(struct list
*list
)
1496 struct netdev_linux_notifier
*notifier
;
1497 LIST_FOR_EACH (notifier
, struct netdev_linux_notifier
, node
, list
) {
1498 struct netdev_notifier
*n
= ¬ifier
->notifier
;
1504 netdev_linux_poll_cb(const struct rtnetlink_change
*change
,
1505 void *aux OVS_UNUSED
)
1508 struct list
*list
= shash_find_data(&netdev_linux_notifiers
,
1514 struct shash_node
*node
;
1515 SHASH_FOR_EACH (node
, &netdev_linux_notifiers
) {
1516 poll_notify(node
->data
);
1522 netdev_linux_poll_add(struct netdev
*netdev
,
1523 void (*cb
)(struct netdev_notifier
*), void *aux
,
1524 struct netdev_notifier
**notifierp
)
1526 const char *netdev_name
= netdev_get_name(netdev
);
1527 struct netdev_linux_notifier
*notifier
;
1530 if (shash_is_empty(&netdev_linux_notifiers
)) {
1531 int error
= rtnetlink_notifier_register(&netdev_linux_poll_notifier
,
1532 netdev_linux_poll_cb
, NULL
);
1538 list
= shash_find_data(&netdev_linux_notifiers
, netdev_name
);
1540 list
= xmalloc(sizeof *list
);
1542 shash_add(&netdev_linux_notifiers
, netdev_name
, list
);
1545 notifier
= xmalloc(sizeof *notifier
);
1546 netdev_notifier_init(¬ifier
->notifier
, netdev
, cb
, aux
);
1547 list_push_back(list
, ¬ifier
->node
);
1548 *notifierp
= ¬ifier
->notifier
;
1553 netdev_linux_poll_remove(struct netdev_notifier
*notifier_
)
1555 struct netdev_linux_notifier
*notifier
=
1556 CONTAINER_OF(notifier_
, struct netdev_linux_notifier
, notifier
);
1559 /* Remove 'notifier' from its list. */
1560 list
= list_remove(¬ifier
->node
);
1561 if (list_is_empty(list
)) {
1562 /* The list is now empty. Remove it from the hash and free it. */
1563 const char *netdev_name
= netdev_get_name(notifier
->notifier
.netdev
);
1564 shash_delete(&netdev_linux_notifiers
,
1565 shash_find(&netdev_linux_notifiers
, netdev_name
));
1570 /* If that was the last notifier, unregister. */
1571 if (shash_is_empty(&netdev_linux_notifiers
)) {
1572 rtnetlink_notifier_unregister(&netdev_linux_poll_notifier
);
1576 const struct netdev_class netdev_linux_class
= {
1583 netdev_linux_create_system
,
1584 netdev_linux_destroy
,
1585 NULL
, /* reconfigure */
1590 netdev_linux_enumerate
,
1593 netdev_linux_recv_wait
,
1597 netdev_linux_send_wait
,
1599 netdev_linux_set_etheraddr
,
1600 netdev_linux_get_etheraddr
,
1601 netdev_linux_get_mtu
,
1602 netdev_linux_get_ifindex
,
1603 netdev_linux_get_carrier
,
1604 netdev_linux_get_stats
,
1605 netdev_vport_set_stats
,
1607 netdev_linux_get_features
,
1608 netdev_linux_set_advertisements
,
1609 netdev_linux_get_vlan_vid
,
1610 netdev_linux_set_policing
,
1612 netdev_linux_get_in4
,
1613 netdev_linux_set_in4
,
1614 netdev_linux_get_in6
,
1615 netdev_linux_add_router
,
1616 netdev_linux_get_next_hop
,
1617 netdev_linux_arp_lookup
,
1619 netdev_linux_update_flags
,
1621 netdev_linux_poll_add
,
1622 netdev_linux_poll_remove
,
1625 const struct netdev_class netdev_tap_class
= {
1632 netdev_linux_create_tap
,
1633 netdev_linux_destroy
,
1634 NULL
, /* reconfigure */
1639 NULL
, /* enumerate */
1642 netdev_linux_recv_wait
,
1646 netdev_linux_send_wait
,
1648 netdev_linux_set_etheraddr
,
1649 netdev_linux_get_etheraddr
,
1650 netdev_linux_get_mtu
,
1651 netdev_linux_get_ifindex
,
1652 netdev_linux_get_carrier
,
1653 netdev_linux_get_stats
,
1654 NULL
, /* set_stats */
1656 netdev_linux_get_features
,
1657 netdev_linux_set_advertisements
,
1658 netdev_linux_get_vlan_vid
,
1659 netdev_linux_set_policing
,
1661 netdev_linux_get_in4
,
1662 netdev_linux_set_in4
,
1663 netdev_linux_get_in6
,
1664 netdev_linux_add_router
,
1665 netdev_linux_get_next_hop
,
1666 netdev_linux_arp_lookup
,
1668 netdev_linux_update_flags
,
1670 netdev_linux_poll_add
,
1671 netdev_linux_poll_remove
,
1676 get_stats_via_netlink(int ifindex
, struct netdev_stats
*stats
)
1678 /* Policy for RTNLGRP_LINK messages.
1680 * There are *many* more fields in these messages, but currently we only
1681 * care about these fields. */
1682 static const struct nl_policy rtnlgrp_link_policy
[] = {
1683 [IFLA_IFNAME
] = { .type
= NL_A_STRING
, .optional
= false },
1684 [IFLA_STATS
] = { .type
= NL_A_UNSPEC
, .optional
= true,
1685 .min_len
= sizeof(struct rtnl_link_stats
) },
1688 struct ofpbuf request
;
1689 struct ofpbuf
*reply
;
1690 struct ifinfomsg
*ifi
;
1691 const struct rtnl_link_stats
*rtnl_stats
;
1692 struct nlattr
*attrs
[ARRAY_SIZE(rtnlgrp_link_policy
)];
1695 ofpbuf_init(&request
, 0);
1696 nl_msg_put_nlmsghdr(&request
, sizeof *ifi
, RTM_GETLINK
, NLM_F_REQUEST
);
1697 ifi
= ofpbuf_put_zeros(&request
, sizeof *ifi
);
1698 ifi
->ifi_family
= PF_UNSPEC
;
1699 ifi
->ifi_index
= ifindex
;
1700 error
= nl_sock_transact(rtnl_sock
, &request
, &reply
);
1701 ofpbuf_uninit(&request
);
1706 if (!nl_policy_parse(reply
, NLMSG_HDRLEN
+ sizeof(struct ifinfomsg
),
1707 rtnlgrp_link_policy
,
1708 attrs
, ARRAY_SIZE(rtnlgrp_link_policy
))) {
1709 ofpbuf_delete(reply
);
1713 if (!attrs
[IFLA_STATS
]) {
1714 VLOG_WARN_RL(&rl
, "RTM_GETLINK reply lacks stats");
1715 ofpbuf_delete(reply
);
1719 rtnl_stats
= nl_attr_get(attrs
[IFLA_STATS
]);
1720 stats
->rx_packets
= rtnl_stats
->rx_packets
;
1721 stats
->tx_packets
= rtnl_stats
->tx_packets
;
1722 stats
->rx_bytes
= rtnl_stats
->rx_bytes
;
1723 stats
->tx_bytes
= rtnl_stats
->tx_bytes
;
1724 stats
->rx_errors
= rtnl_stats
->rx_errors
;
1725 stats
->tx_errors
= rtnl_stats
->tx_errors
;
1726 stats
->rx_dropped
= rtnl_stats
->rx_dropped
;
1727 stats
->tx_dropped
= rtnl_stats
->tx_dropped
;
1728 stats
->multicast
= rtnl_stats
->multicast
;
1729 stats
->collisions
= rtnl_stats
->collisions
;
1730 stats
->rx_length_errors
= rtnl_stats
->rx_length_errors
;
1731 stats
->rx_over_errors
= rtnl_stats
->rx_over_errors
;
1732 stats
->rx_crc_errors
= rtnl_stats
->rx_crc_errors
;
1733 stats
->rx_frame_errors
= rtnl_stats
->rx_frame_errors
;
1734 stats
->rx_fifo_errors
= rtnl_stats
->rx_fifo_errors
;
1735 stats
->rx_missed_errors
= rtnl_stats
->rx_missed_errors
;
1736 stats
->tx_aborted_errors
= rtnl_stats
->tx_aborted_errors
;
1737 stats
->tx_carrier_errors
= rtnl_stats
->tx_carrier_errors
;
1738 stats
->tx_fifo_errors
= rtnl_stats
->tx_fifo_errors
;
1739 stats
->tx_heartbeat_errors
= rtnl_stats
->tx_heartbeat_errors
;
1740 stats
->tx_window_errors
= rtnl_stats
->tx_window_errors
;
1742 ofpbuf_delete(reply
);
1748 get_stats_via_proc(const char *netdev_name
, struct netdev_stats
*stats
)
1750 static const char fn
[] = "/proc/net/dev";
1755 stream
= fopen(fn
, "r");
1757 VLOG_WARN_RL(&rl
, "%s: open failed: %s", fn
, strerror(errno
));
1762 while (fgets(line
, sizeof line
, stream
)) {
1765 #define X64 "%"SCNu64
1768 X64 X64 X64 X64 X64 X64 X64
"%*u"
1769 X64 X64 X64 X64 X64 X64 X64
"%*u",
1775 &stats
->rx_fifo_errors
,
1776 &stats
->rx_frame_errors
,
1782 &stats
->tx_fifo_errors
,
1784 &stats
->tx_carrier_errors
) != 15) {
1785 VLOG_WARN_RL(&rl
, "%s:%d: parse error", fn
, ln
);
1786 } else if (!strcmp(devname
, netdev_name
)) {
1787 stats
->rx_length_errors
= UINT64_MAX
;
1788 stats
->rx_over_errors
= UINT64_MAX
;
1789 stats
->rx_crc_errors
= UINT64_MAX
;
1790 stats
->rx_missed_errors
= UINT64_MAX
;
1791 stats
->tx_aborted_errors
= UINT64_MAX
;
1792 stats
->tx_heartbeat_errors
= UINT64_MAX
;
1793 stats
->tx_window_errors
= UINT64_MAX
;
1799 VLOG_WARN_RL(&rl
, "%s: no stats for %s", fn
, netdev_name
);
1805 get_flags(const struct netdev
*netdev
, int *flags
)
1810 error
= netdev_linux_do_ioctl(netdev_get_name(netdev
), &ifr
, SIOCGIFFLAGS
,
1812 *flags
= ifr
.ifr_flags
;
1817 set_flags(struct netdev
*netdev
, int flags
)
1821 ifr
.ifr_flags
= flags
;
1822 return netdev_linux_do_ioctl(netdev_get_name(netdev
), &ifr
, SIOCSIFFLAGS
,
1827 do_get_ifindex(const char *netdev_name
)
1831 strncpy(ifr
.ifr_name
, netdev_name
, sizeof ifr
.ifr_name
);
1832 COVERAGE_INC(netdev_get_ifindex
);
1833 if (ioctl(af_inet_sock
, SIOCGIFINDEX
, &ifr
) < 0) {
1834 VLOG_WARN_RL(&rl
, "ioctl(SIOCGIFINDEX) on %s device failed: %s",
1835 netdev_name
, strerror(errno
));
1838 return ifr
.ifr_ifindex
;
1842 get_ifindex(const struct netdev
*netdev_
, int *ifindexp
)
1844 struct netdev_dev_linux
*netdev_dev
=
1845 netdev_dev_linux_cast(netdev_get_dev(netdev_
));
1847 if (!(netdev_dev
->cache_valid
& VALID_IFINDEX
)) {
1848 int ifindex
= do_get_ifindex(netdev_get_name(netdev_
));
1852 netdev_dev
->cache_valid
|= VALID_IFINDEX
;
1853 netdev_dev
->ifindex
= ifindex
;
1855 *ifindexp
= netdev_dev
->ifindex
;
1860 get_etheraddr(const char *netdev_name
, uint8_t ea
[ETH_ADDR_LEN
])
1865 memset(&ifr
, 0, sizeof ifr
);
1866 strncpy(ifr
.ifr_name
, netdev_name
, sizeof ifr
.ifr_name
);
1867 COVERAGE_INC(netdev_get_hwaddr
);
1868 if (ioctl(af_inet_sock
, SIOCGIFHWADDR
, &ifr
) < 0) {
1869 VLOG_ERR("ioctl(SIOCGIFHWADDR) on %s device failed: %s",
1870 netdev_name
, strerror(errno
));
1873 hwaddr_family
= ifr
.ifr_hwaddr
.sa_family
;
1874 if (hwaddr_family
!= AF_UNSPEC
&& hwaddr_family
!= ARPHRD_ETHER
) {
1875 VLOG_WARN("%s device has unknown hardware address family %d",
1876 netdev_name
, hwaddr_family
);
1878 memcpy(ea
, ifr
.ifr_hwaddr
.sa_data
, ETH_ADDR_LEN
);
1883 set_etheraddr(const char *netdev_name
, int hwaddr_family
,
1884 const uint8_t mac
[ETH_ADDR_LEN
])
1888 memset(&ifr
, 0, sizeof ifr
);
1889 strncpy(ifr
.ifr_name
, netdev_name
, sizeof ifr
.ifr_name
);
1890 ifr
.ifr_hwaddr
.sa_family
= hwaddr_family
;
1891 memcpy(ifr
.ifr_hwaddr
.sa_data
, mac
, ETH_ADDR_LEN
);
1892 COVERAGE_INC(netdev_set_hwaddr
);
1893 if (ioctl(af_inet_sock
, SIOCSIFHWADDR
, &ifr
) < 0) {
1894 VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s",
1895 netdev_name
, strerror(errno
));
1902 netdev_linux_do_ethtool(const char *name
, struct ethtool_cmd
*ecmd
,
1903 int cmd
, const char *cmd_name
)
1907 memset(&ifr
, 0, sizeof ifr
);
1908 strncpy(ifr
.ifr_name
, name
, sizeof ifr
.ifr_name
);
1909 ifr
.ifr_data
= (caddr_t
) ecmd
;
1912 COVERAGE_INC(netdev_ethtool
);
1913 if (ioctl(af_inet_sock
, SIOCETHTOOL
, &ifr
) == 0) {
1916 if (errno
!= EOPNOTSUPP
) {
1917 VLOG_WARN_RL(&rl
, "ethtool command %s on network device %s "
1918 "failed: %s", cmd_name
, name
, strerror(errno
));
1920 /* The device doesn't support this operation. That's pretty
1921 * common, so there's no point in logging anything. */
1928 netdev_linux_do_ioctl(const char *name
, struct ifreq
*ifr
, int cmd
,
1929 const char *cmd_name
)
1931 strncpy(ifr
->ifr_name
, name
, sizeof ifr
->ifr_name
);
1932 if (ioctl(af_inet_sock
, cmd
, ifr
) == -1) {
1933 VLOG_DBG_RL(&rl
, "%s: ioctl(%s) failed: %s", name
, cmd_name
,
1941 netdev_linux_get_ipv4(const struct netdev
*netdev
, struct in_addr
*ip
,
1942 int cmd
, const char *cmd_name
)
1947 ifr
.ifr_addr
.sa_family
= AF_INET
;
1948 error
= netdev_linux_do_ioctl(netdev_get_name(netdev
), &ifr
, cmd
, cmd_name
);
1950 const struct sockaddr_in
*sin
= (struct sockaddr_in
*) &ifr
.ifr_addr
;
1951 *ip
= sin
->sin_addr
;