From: David Ahern Date: Thu, 6 Aug 2020 16:21:35 +0000 (+0000) Subject: Merge branch 'main' into next X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=e572e3af0d24718cc10546609784d81530a60141;hp=53159d81156f7fab53b97ceb4750d8b9c3c10fcc;p=mirror_iproute2.git Merge branch 'main' into next Conflicts: bridge/fdb.c man/man8/bridge.8 Signed-off-by: David Ahern --- diff --git a/bridge/fdb.c b/bridge/fdb.c index 06a2254b..37465e46 100644 --- a/bridge/fdb.c +++ b/bridge/fdb.c @@ -38,9 +38,9 @@ static void usage(void) fprintf(stderr, "Usage: bridge fdb { add | append | del | replace } ADDR dev DEV\n" " [ self ] [ master ] [ use ] [ router ] [ extern_learn ]\n" - " [ sticky ] [ local | static | dynamic ] [ dst IPADDR ]\n" - " [ vlan VID ] [ port PORT] [ vni VNI ] [ via DEV ]\n" - " [ src_vni VNI ]\n" + " [ sticky ] [ local | static | dynamic ] [ vlan VID ]\n" + " { [ dst IPADDR ] [ port PORT] [ vni VNI ] | [ nhid NHID ] }\n" + " [ via DEV ] [ src_vni VNI ]\n" " bridge fdb [ show [ br BRDEV ] [ brport DEV ] [ vlan VID ]\n" " [ state STATE ] [ dynamic ] ]\n" " bridge fdb get [ to ] LLADDR [ br BRDEV ] { brport | dev } DEV\n" @@ -245,6 +245,10 @@ int print_fdb(struct nlmsghdr *n, void *arg) ll_index_to_name(ifindex)); } + if (tb[NDA_NH_ID]) + print_uint(PRINT_ANY, "nhid", "nhid %u ", + rta_getattr_u32(tb[NDA_NH_ID])); + if (tb[NDA_LINK_NETNSID]) print_uint(PRINT_ANY, "linkNetNsId", "link-netnsid %d ", @@ -400,6 +404,7 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) unsigned int via = 0; char *endptr; short vid = -1; + __u32 nhid = 0; while (argc > 0) { if (strcmp(*argv, "dev") == 0) { @@ -411,6 +416,10 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) duparg2("dst", *argv); get_addr(&dst, *argv, preferred_family); dst_ok = 1; + } else if (strcmp(*argv, "nhid") == 0) { + NEXT_ARG(); + if (get_u32(&nhid, *argv, 0)) + invarg("\"id\" value is invalid\n", *argv); } else if (strcmp(*argv, "port") == 0) { NEXT_ARG(); @@ -485,6 +494,11 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) return -1; } + if (nhid && (dst_ok || port || vni != ~0)) { + fprintf(stderr, "dst, port, vni are mutually exclusive with nhid\n"); + return -1; + } + /* Assume self */ if (!(req.ndm.ndm_flags&(NTF_SELF|NTF_MASTER))) req.ndm.ndm_flags |= NTF_SELF; @@ -506,6 +520,8 @@ static int fdb_modify(int cmd, int flags, int argc, char **argv) if (vid >= 0) addattr16(&req.n, sizeof(req), NDA_VLAN, vid); + if (nhid > 0) + addattr32(&req.n, sizeof(req), NDA_NH_ID, nhid); if (port) { unsigned short dport; diff --git a/devlink/devlink.c b/devlink/devlink.c index 0982faef..8ec96c01 100644 --- a/devlink/devlink.c +++ b/devlink/devlink.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "version.h" #include "list.h" @@ -300,6 +301,7 @@ static void ifname_map_free(struct ifname_map *ifname_map) #define DL_OPT_TRAP_POLICER_RATE BIT(35) #define DL_OPT_TRAP_POLICER_BURST BIT(36) #define DL_OPT_HEALTH_REPORTER_AUTO_DUMP BIT(37) +#define DL_OPT_PORT_FUNCTION_HW_ADDR BIT(38) struct dl_opts { uint64_t present; /* flags of present items */ @@ -345,6 +347,8 @@ struct dl_opts { uint32_t trap_policer_id; uint64_t trap_policer_rate; uint64_t trap_policer_burst; + char port_function_hw_addr[MAX_ADDR_LEN]; + uint32_t port_function_hw_addr_len; }; struct dl { @@ -436,6 +440,127 @@ static void __pr_out_indent_newline(struct dl *dl) pr_out(" "); } +static bool is_binary_eol(int i) +{ + return !(i%16); +} + +static void pr_out_binary_value(struct dl *dl, uint8_t *data, uint32_t len) +{ + int i = 0; + + while (i < len) { + if (dl->json_output) + print_int(PRINT_JSON, NULL, NULL, data[i]); + else + pr_out("%02x ", data[i]); + i++; + if (!dl->json_output && is_binary_eol(i)) + __pr_out_newline(); + } + if (!dl->json_output && !is_binary_eol(i)) + __pr_out_newline(); +} + +static void pr_out_name(struct dl *dl, const char *name) +{ + __pr_out_indent_newline(dl); + if (dl->json_output) + print_string(PRINT_JSON, name, NULL, NULL); + else + pr_out("%s:", name); +} + +static void pr_out_u64(struct dl *dl, const char *name, uint64_t val) +{ + __pr_out_indent_newline(dl); + if (val == (uint64_t) -1) + return print_string_name_value(name, "unlimited"); + + if (dl->json_output) + print_u64(PRINT_JSON, name, NULL, val); + else + pr_out("%s %"PRIu64, name, val); +} + +static void pr_out_section_start(struct dl *dl, const char *name) +{ + if (dl->json_output) { + open_json_object(NULL); + open_json_object(name); + } +} + +static void pr_out_section_end(struct dl *dl) +{ + if (dl->json_output) { + if (dl->arr_last.present) + close_json_array(PRINT_JSON, NULL); + close_json_object(); + close_json_object(); + } +} + +static void pr_out_array_start(struct dl *dl, const char *name) +{ + if (dl->json_output) { + open_json_array(PRINT_JSON, name); + } else { + __pr_out_indent_inc(); + __pr_out_newline(); + pr_out("%s:", name); + __pr_out_indent_inc(); + __pr_out_newline(); + } +} + +static void pr_out_array_end(struct dl *dl) +{ + if (dl->json_output) { + close_json_array(PRINT_JSON, NULL); + } else { + __pr_out_indent_dec(); + __pr_out_indent_dec(); + } +} + +static void pr_out_object_start(struct dl *dl, const char *name) +{ + if (dl->json_output) { + open_json_object(name); + } else { + __pr_out_indent_inc(); + __pr_out_newline(); + pr_out("%s:", name); + __pr_out_indent_inc(); + __pr_out_newline(); + } +} + +static void pr_out_object_end(struct dl *dl) +{ + if (dl->json_output) { + close_json_object(); + } else { + __pr_out_indent_dec(); + __pr_out_indent_dec(); + } +} + +static void pr_out_entry_start(struct dl *dl) +{ + if (dl->json_output) + open_json_object(NULL); +} + +static void pr_out_entry_end(struct dl *dl) +{ + if (dl->json_output) + close_json_object(); + else + __pr_out_newline(); +} + static void check_indent_newline(struct dl *dl) { __pr_out_indent_newline(dl); @@ -456,6 +581,8 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PORT_NETDEV_IFINDEX] = MNL_TYPE_U32, [DEVLINK_ATTR_PORT_NETDEV_NAME] = MNL_TYPE_NUL_STRING, [DEVLINK_ATTR_PORT_IBDEV_NAME] = MNL_TYPE_NUL_STRING, + [DEVLINK_ATTR_PORT_LANES] = MNL_TYPE_U32, + [DEVLINK_ATTR_PORT_SPLITTABLE] = MNL_TYPE_U8, [DEVLINK_ATTR_SB_INDEX] = MNL_TYPE_U32, [DEVLINK_ATTR_SB_SIZE] = MNL_TYPE_U32, [DEVLINK_ATTR_SB_INGRESS_POOL_COUNT] = MNL_TYPE_U16, @@ -523,6 +650,7 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_REGION_CHUNK_LEN] = MNL_TYPE_U64, [DEVLINK_ATTR_INFO_DRIVER_NAME] = MNL_TYPE_STRING, [DEVLINK_ATTR_INFO_SERIAL_NUMBER] = MNL_TYPE_STRING, + [DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER] = MNL_TYPE_STRING, [DEVLINK_ATTR_INFO_VERSION_FIXED] = MNL_TYPE_NESTED, [DEVLINK_ATTR_INFO_VERSION_RUNNING] = MNL_TYPE_NESTED, [DEVLINK_ATTR_INFO_VERSION_STORED] = MNL_TYPE_NESTED, @@ -594,6 +722,30 @@ static int attr_stats_cb(const struct nlattr *attr, void *data) return MNL_CB_OK; } +static const enum mnl_attr_data_type +devlink_function_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { + [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR ] = MNL_TYPE_BINARY, +}; + +static int function_attr_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type; + + /* Allow the tool to work on top of newer kernels that might contain + * more attributes. + */ + if (mnl_attr_type_valid(attr, DEVLINK_PORT_FUNCTION_ATTR_MAX) < 0) + return MNL_CB_OK; + + type = mnl_attr_get_type(attr); + if (mnl_attr_validate(attr, devlink_function_policy[type]) < 0) + return MNL_CB_ERROR; + + tb[type] = attr; + return MNL_CB_OK; +} + static int ifname_map_cb(const struct nlmsghdr *nlh, void *data) { struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; @@ -1165,6 +1317,17 @@ static int trap_action_get(const char *actionstr, return 0; } +static int hw_addr_parse(const char *addrstr, char *hw_addr, uint32_t *len) +{ + int alen; + + alen = ll_addr_a2n(hw_addr, MAX_ADDR_LEN, addrstr); + if (alen < 0) + return -EINVAL; + *len = alen; + return 0; +} + struct dl_args_metadata { uint64_t o_flag; char err_msg[DL_ARGS_REQUIRED_MAX_ERR_LEN]; @@ -1195,6 +1358,7 @@ static const struct dl_args_metadata dl_args_required[] = { {DL_OPT_HEALTH_REPORTER_NAME, "Reporter's name is expected."}, {DL_OPT_TRAP_NAME, "Trap's name is expected."}, {DL_OPT_TRAP_GROUP_NAME, "Trap group's name is expected."}, + {DL_OPT_PORT_FUNCTION_HW_ADDR, "Port function's hardware address is expected."}, }; static int dl_args_finding_required_validate(uint64_t o_required, @@ -1561,6 +1725,20 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, if (err) return err; o_found |= DL_OPT_TRAP_POLICER_BURST; + } else if (dl_argv_match(dl, "hw_addr") && + (o_all & DL_OPT_PORT_FUNCTION_HW_ADDR)) { + const char *addrstr; + + dl_arg_inc(dl); + err = dl_argv_str(dl, &addrstr); + if (err) + return err; + err = hw_addr_parse(addrstr, opts->port_function_hw_addr, + &opts->port_function_hw_addr_len); + if (err) + return err; + o_found |= DL_OPT_PORT_FUNCTION_HW_ADDR; + } else { pr_err("Unknown option \"%s\"\n", dl_argv(dl)); return -EINVAL; @@ -1577,6 +1755,18 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required, return dl_args_finding_required_validate(o_required, o_found); } +static void +dl_function_attr_put(struct nlmsghdr *nlh, const struct dl_opts *opts) +{ + struct nlattr *nest; + + nest = mnl_attr_nest_start(nlh, DEVLINK_ATTR_PORT_FUNCTION); + mnl_attr_put(nlh, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, + opts->port_function_hw_addr_len, + opts->port_function_hw_addr); + mnl_attr_nest_end(nlh, nest); +} + static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) { struct dl_opts *opts = &dl->opts; @@ -1700,6 +1890,8 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl) if (opts->present & DL_OPT_TRAP_POLICER_BURST) mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, opts->trap_policer_burst); + if (opts->present & DL_OPT_PORT_FUNCTION_HW_ADDR) + dl_function_attr_put(nlh, opts); } static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl, @@ -1923,7 +2115,19 @@ static void __pr_out_port_handle_start(struct dl *dl, const char *bus_name, open_json_object(buf); } } else { - pr_out("%s:", buf); + if (array) { + if (should_arr_last_port_handle_end(dl, bus_name, dev_name, port_index)) + __pr_out_indent_dec(); + if (should_arr_last_port_handle_start(dl, bus_name, + dev_name, port_index)) { + pr_out("%s:", buf); + __pr_out_newline(); + __pr_out_indent_inc(); + arr_last_port_handle_set(dl, bus_name, dev_name, port_index); + } + } else { + pr_out("%s:", buf); + } } } @@ -1959,49 +2163,6 @@ static void pr_out_port_handle_end(struct dl *dl) pr_out("\n"); } -static void pr_out_u64(struct dl *dl, const char *name, uint64_t val) -{ - __pr_out_indent_newline(dl); - if (val == (uint64_t) -1) - return print_string_name_value(name, "unlimited"); - - if (dl->json_output) - print_u64(PRINT_JSON, name, NULL, val); - else - pr_out("%s %"PRIu64, name, val); -} - -static bool is_binary_eol(int i) -{ - return !(i%16); -} - -static void pr_out_binary_value(struct dl *dl, uint8_t *data, uint32_t len) -{ - int i = 0; - - while (i < len) { - if (dl->json_output) - print_int(PRINT_JSON, NULL, NULL, data[i]); - else - pr_out("%02x ", data[i]); - i++; - if (!dl->json_output && is_binary_eol(i)) - __pr_out_newline(); - } - if (!dl->json_output && !is_binary_eol(i)) - __pr_out_newline(); -} - -static void pr_out_name(struct dl *dl, const char *name) -{ - __pr_out_indent_newline(dl); - if (dl->json_output) - print_string(PRINT_JSON, name, NULL, NULL); - else - pr_out("%s:", name); -} - static void pr_out_region_chunk_start(struct dl *dl, uint64_t addr) { if (dl->json_output) { @@ -2043,84 +2204,6 @@ static void pr_out_region_chunk(struct dl *dl, uint8_t *data, uint32_t len, pr_out_region_chunk_end(dl); } -static void pr_out_section_start(struct dl *dl, const char *name) -{ - if (dl->json_output) { - open_json_object(NULL); - open_json_object(name); - } -} - -static void pr_out_section_end(struct dl *dl) -{ - if (dl->json_output) { - if (dl->arr_last.present) - close_json_array(PRINT_JSON, NULL); - close_json_object(); - close_json_object(); - } -} - -static void pr_out_array_start(struct dl *dl, const char *name) -{ - if (dl->json_output) { - open_json_array(PRINT_JSON, name); - } else { - __pr_out_indent_inc(); - __pr_out_newline(); - pr_out("%s:", name); - __pr_out_indent_inc(); - __pr_out_newline(); - } -} - -static void pr_out_array_end(struct dl *dl) -{ - if (dl->json_output) { - close_json_array(PRINT_JSON, NULL); - } else { - __pr_out_indent_dec(); - __pr_out_indent_dec(); - } -} - -static void pr_out_object_start(struct dl *dl, const char *name) -{ - if (dl->json_output) { - open_json_object(name); - } else { - __pr_out_indent_inc(); - __pr_out_newline(); - pr_out("%s:", name); - __pr_out_indent_inc(); - __pr_out_newline(); - } -} - -static void pr_out_object_end(struct dl *dl) -{ - if (dl->json_output) { - close_json_object(); - } else { - __pr_out_indent_dec(); - __pr_out_indent_dec(); - } -} - -static void pr_out_entry_start(struct dl *dl) -{ - if (dl->json_output) - open_json_object(NULL); -} - -static void pr_out_entry_end(struct dl *dl) -{ - if (dl->json_output) - close_json_object(); - else - __pr_out_newline(); -} - static void pr_out_stats(struct dl *dl, struct nlattr *nla_stats) { struct nlattr *tb[DEVLINK_ATTR_STATS_MAX + 1] = {}; @@ -2899,6 +2982,16 @@ static void pr_out_info(struct dl *dl, const struct nlmsghdr *nlh, print_string(PRINT_ANY, "serial_number", "serial_number %s", mnl_attr_get_str(nla_sn)); } + + if (tb[DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER]) { + struct nlattr *nla_bsn = tb[DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER]; + + if (!dl->json_output) + __pr_out_newline(); + check_indent_newline(dl); + print_string(PRINT_ANY, "board.serial_number", "board.serial_number %s", + mnl_attr_get_str(nla_bsn)); + } __pr_out_indent_dec(); if (has_versions) { @@ -2934,6 +3027,7 @@ static int cmd_versions_show_cb(const struct nlmsghdr *nlh, void *data) tb[DEVLINK_ATTR_INFO_VERSION_STORED]; has_info = tb[DEVLINK_ATTR_INFO_DRIVER_NAME] || tb[DEVLINK_ATTR_INFO_SERIAL_NUMBER] || + tb[DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER] || has_versions; if (has_info) @@ -3210,6 +3304,8 @@ static void cmd_port_help(void) pr_err(" devlink port set DEV/PORT_INDEX [ type { eth | ib | auto} ]\n"); pr_err(" devlink port split DEV/PORT_INDEX count COUNT\n"); pr_err(" devlink port unsplit DEV/PORT_INDEX\n"); + pr_err(" devlink port function set DEV/PORT_INDEX [ hw_addr ADDR ]\n"); + pr_err(" devlink port health show [ DEV/PORT_INDEX reporter REPORTER_NAME ]\n"); } static const char *port_type_name(uint32_t type) @@ -3257,6 +3353,37 @@ static void pr_out_port_pfvf_num(struct dl *dl, struct nlattr **tb) } } +static void pr_out_port_function(struct dl *dl, struct nlattr **tb_port) +{ + struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {}; + unsigned char *data; + SPRINT_BUF(hw_addr); + uint32_t len; + int err; + + if (!tb_port[DEVLINK_ATTR_PORT_FUNCTION]) + return; + + err = mnl_attr_parse_nested(tb_port[DEVLINK_ATTR_PORT_FUNCTION], + function_attr_cb, tb); + if (err != MNL_CB_OK) + return; + + if (!tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]) + return; + + len = mnl_attr_get_payload_len(tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]); + data = mnl_attr_get_payload(tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]); + + pr_out_object_start(dl, "function"); + check_indent_newline(dl); + print_string(PRINT_ANY, "hw_addr", "hw_addr %s", + ll_addr_n2a(data, len, 0, hw_addr, sizeof(hw_addr))); + if (!dl->json_output) + __pr_out_indent_dec(); + pr_out_object_end(dl); +} + static void pr_out_port(struct dl *dl, struct nlattr **tb) { struct nlattr *pt_attr = tb[DEVLINK_ATTR_PORT_TYPE]; @@ -3310,6 +3437,14 @@ static void pr_out_port(struct dl *dl, struct nlattr **tb) if (tb[DEVLINK_ATTR_PORT_SPLIT_GROUP]) print_uint(PRINT_ANY, "split_group", " split_group %u", mnl_attr_get_u32(tb[DEVLINK_ATTR_PORT_SPLIT_GROUP])); + if (tb[DEVLINK_ATTR_PORT_SPLITTABLE]) + print_bool(PRINT_ANY, "splittable", " splittable %s", + mnl_attr_get_u8(tb[DEVLINK_ATTR_PORT_SPLITTABLE])); + if (tb[DEVLINK_ATTR_PORT_LANES]) + print_uint(PRINT_ANY, "lanes", " lanes %u", + mnl_attr_get_u32(tb[DEVLINK_ATTR_PORT_LANES])); + + pr_out_port_function(dl, tb); pr_out_port_handle_end(dl); } @@ -3395,6 +3530,41 @@ static int cmd_port_unsplit(struct dl *dl) return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); } +static void cmd_port_function_help(void) +{ + pr_err("Usage: devlink port function set DEV/PORT_INDEX [ hw_addr ADDR ]\n"); +} + +static int cmd_port_function_set(struct dl *dl) +{ + struct nlmsghdr *nlh; + int err; + + nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_PORT_SET, NLM_F_REQUEST | NLM_F_ACK); + + err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLEP | DL_OPT_PORT_FUNCTION_HW_ADDR, 0); + if (err) + return err; + + return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL); +} + +static int cmd_port_function(struct dl *dl) +{ + if (dl_argv_match(dl, "help") || dl_no_arg(dl)) { + cmd_port_function_help(); + return 0; + } else if (dl_argv_match(dl, "set")) { + dl_arg_inc(dl); + return cmd_port_function_set(dl); + } + pr_err("Command \"%s\" not found\n", dl_argv(dl)); + return -ENOENT; +} + +static int cmd_health(struct dl *dl); +static int __cmd_health_show(struct dl *dl, bool show_device, bool show_port); + static int cmd_port(struct dl *dl) { if (dl_argv_match(dl, "help")) { @@ -3413,6 +3583,18 @@ static int cmd_port(struct dl *dl) } else if (dl_argv_match(dl, "unsplit")) { dl_arg_inc(dl); return cmd_port_unsplit(dl); + } else if (dl_argv_match(dl, "function")) { + dl_arg_inc(dl); + return cmd_port_function(dl); + } else if (dl_argv_match(dl, "health")) { + dl_arg_inc(dl); + if (dl_argv_match(dl, "list") || dl_no_arg(dl) + || (dl_argv_match(dl, "show") && dl_argc(dl) == 1)) { + dl_arg_inc(dl); + return __cmd_health_show(dl, false, true); + } else { + return cmd_health(dl); + } } pr_err("Command \"%s\" not found\n", dl_argv(dl)); return -ENOENT; @@ -4345,7 +4527,8 @@ static void pr_out_flash_update(struct dl *dl, struct nlattr **tb) } static void pr_out_region(struct dl *dl, struct nlattr **tb); -static void pr_out_health(struct dl *dl, struct nlattr **tb_health); +static void pr_out_health(struct dl *dl, struct nlattr **tb_health, + bool show_device, bool show_port); static void pr_out_trap(struct dl *dl, struct nlattr **tb, bool array); static void pr_out_trap_group(struct dl *dl, struct nlattr **tb, bool array); static void pr_out_trap_policer(struct dl *dl, struct nlattr **tb, bool array); @@ -4424,7 +4607,7 @@ static int cmd_mon_show_cb(const struct nlmsghdr *nlh, void *data) !tb[DEVLINK_ATTR_HEALTH_REPORTER]) return MNL_CB_ERROR; pr_out_mon_header(genl->cmd); - pr_out_health(dl, tb); + pr_out_health(dl, tb, true, true); pr_out_mon_footer(); break; case DEVLINK_CMD_TRAP_GET: /* fall through */ @@ -6569,7 +6752,7 @@ static int cmd_health_set_params(struct dl *dl) nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_HEALTH_REPORTER_SET, NLM_F_REQUEST | NLM_F_ACK); - err = dl_argv_parse(dl, DL_OPT_HANDLE | DL_OPT_HEALTH_REPORTER_NAME, + err = dl_argv_parse(dl, DL_OPT_HANDLE | DL_OPT_HANDLEP | DL_OPT_HEALTH_REPORTER_NAME, DL_OPT_HEALTH_REPORTER_GRACEFUL_PERIOD | DL_OPT_HEALTH_REPORTER_AUTO_RECOVER | DL_OPT_HEALTH_REPORTER_AUTO_DUMP); @@ -6589,7 +6772,8 @@ static int cmd_health_dump_clear(struct dl *dl) NLM_F_REQUEST | NLM_F_ACK); err = dl_argv_parse_put(nlh, dl, - DL_OPT_HANDLE | DL_OPT_HEALTH_REPORTER_NAME, 0); + DL_OPT_HANDLE | DL_OPT_HANDLEP | + DL_OPT_HEALTH_REPORTER_NAME, 0); if (err) return err; @@ -6836,7 +7020,8 @@ static int cmd_health_object_common(struct dl *dl, uint8_t cmd, uint16_t flags) nlh = mnlg_msg_prepare(dl->nlg, cmd, flags | NLM_F_REQUEST | NLM_F_ACK); err = dl_argv_parse_put(nlh, dl, - DL_OPT_HANDLE | DL_OPT_HEALTH_REPORTER_NAME, 0); + DL_OPT_HANDLE | DL_OPT_HANDLEP | + DL_OPT_HEALTH_REPORTER_NAME, 0); if (err) return err; @@ -6869,7 +7054,8 @@ static int cmd_health_recover(struct dl *dl) NLM_F_REQUEST | NLM_F_ACK); err = dl_argv_parse_put(nlh, dl, - DL_OPT_HANDLE | DL_OPT_HEALTH_REPORTER_NAME, 0); + DL_OPT_HANDLE | DL_OPT_HANDLEP | + DL_OPT_HEALTH_REPORTER_NAME, 0); if (err) return err; @@ -6943,7 +7129,8 @@ static void pr_out_dump_report_timestamp(struct dl *dl, const struct nlattr *att print_string(PRINT_ANY, "last_dump_time", " last_dump_time %s", dump_time); } -static void pr_out_health(struct dl *dl, struct nlattr **tb_health) +static void pr_out_health(struct dl *dl, struct nlattr **tb_health, + bool print_device, bool print_port) { struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; enum devlink_health_reporter_state state; @@ -6960,7 +7147,20 @@ static void pr_out_health(struct dl *dl, struct nlattr **tb_health) !tb[DEVLINK_ATTR_HEALTH_REPORTER_STATE]) return; - pr_out_handle_start_arr(dl, tb_health); + if (!print_device && !print_port) + return; + if (print_port) { + if (!print_device && !tb_health[DEVLINK_ATTR_PORT_INDEX]) + return; + else if (tb_health[DEVLINK_ATTR_PORT_INDEX]) + pr_out_port_handle_start_arr(dl, tb_health, false); + } + if (print_device) { + if (!print_port && tb_health[DEVLINK_ATTR_PORT_INDEX]) + return; + else if (!tb_health[DEVLINK_ATTR_PORT_INDEX]) + pr_out_handle_start_arr(dl, tb_health); + } check_indent_newline(dl); print_string(PRINT_ANY, "reporter", "reporter %s", @@ -6994,25 +7194,33 @@ static void pr_out_health(struct dl *dl, struct nlattr **tb_health) pr_out_handle_end(dl); } +struct health_ctx { + struct dl *dl; + bool show_device; + bool show_port; +}; + static int cmd_health_show_cb(const struct nlmsghdr *nlh, void *data) { struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {}; - struct dl *dl = data; + struct health_ctx *ctx = data; + struct dl *dl = ctx->dl; mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb); if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] || !tb[DEVLINK_ATTR_HEALTH_REPORTER]) return MNL_CB_ERROR; - pr_out_health(dl, tb); + pr_out_health(dl, tb, ctx->show_device, ctx->show_port); return MNL_CB_OK; } -static int cmd_health_show(struct dl *dl) +static int __cmd_health_show(struct dl *dl, bool show_device, bool show_port) { struct nlmsghdr *nlh; + struct health_ctx ctx = { dl, show_device, show_port }; uint16_t flags = NLM_F_REQUEST | NLM_F_ACK; int err; @@ -7022,27 +7230,28 @@ static int cmd_health_show(struct dl *dl) flags); if (dl_argc(dl) > 0) { + ctx.show_port = true; err = dl_argv_parse_put(nlh, dl, - DL_OPT_HANDLE | + DL_OPT_HANDLE | DL_OPT_HANDLEP | DL_OPT_HEALTH_REPORTER_NAME, 0); if (err) return err; } pr_out_section_start(dl, "health"); - err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_health_show_cb, dl); + err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_health_show_cb, &ctx); pr_out_section_end(dl); return err; } static void cmd_health_help(void) { - pr_err("Usage: devlink health show [ dev DEV reporter REPORTER_NAME ]\n"); - pr_err(" devlink health recover DEV reporter REPORTER_NAME\n"); - pr_err(" devlink health diagnose DEV reporter REPORTER_NAME\n"); - pr_err(" devlink health dump show DEV reporter REPORTER_NAME\n"); - pr_err(" devlink health dump clear DEV reporter REPORTER_NAME\n"); - pr_err(" devlink health set DEV reporter REPORTER_NAME\n"); + pr_err("Usage: devlink health show [ { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME ]\n"); + pr_err(" devlink health recover { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); + pr_err(" devlink health diagnose { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); + pr_err(" devlink health dump show { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); + pr_err(" devlink health dump clear { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); + pr_err(" devlink health set { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME\n"); pr_err(" [ grace_period MSEC ]\n"); pr_err(" [ auto_recover { true | false } ]\n"); pr_err(" [ auto_dump { true | false } ]\n"); @@ -7056,7 +7265,7 @@ static int cmd_health(struct dl *dl) } else if (dl_argv_match(dl, "show") || dl_argv_match(dl, "list") || dl_no_arg(dl)) { dl_arg_inc(dl); - return cmd_health_show(dl); + return __cmd_health_show(dl, true, true); } else if (dl_argv_match(dl, "recover")) { dl_arg_inc(dl); return cmd_health_recover(dl); diff --git a/etc/iproute2/rt_protos b/etc/iproute2/rt_protos index b3a0ec8f..7cafddc1 100644 --- a/etc/iproute2/rt_protos +++ b/etc/iproute2/rt_protos @@ -14,7 +14,8 @@ 13 dnrouted 14 xorp 15 ntk -16 dhcp +16 dhcp +18 keepalived 42 babel 186 bgp 187 isis diff --git a/include/uapi/linux/atmioc.h b/include/uapi/linux/atmioc.h index cd7655e4..a9030bcc 100644 --- a/include/uapi/linux/atmioc.h +++ b/include/uapi/linux/atmioc.h @@ -5,7 +5,7 @@ /* - * See http://icawww1.epfl.ch/linux-atm/magic.html for the complete list of + * See https://icawww1.epfl.ch/linux-atm/magic.html for the complete list of * "magic" ioctl numbers. */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f21232ed..378eda20 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -189,6 +189,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_STRUCT_OPS, BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, + BPF_PROG_TYPE_SK_LOOKUP, }; enum bpf_attach_type { @@ -226,6 +227,9 @@ enum bpf_attach_type { BPF_CGROUP_INET4_GETSOCKNAME, BPF_CGROUP_INET6_GETSOCKNAME, BPF_XDP_DEVMAP, + BPF_CGROUP_INET_SOCK_RELEASE, + BPF_XDP_CPUMAP, + BPF_SK_LOOKUP, __MAX_BPF_ATTACH_TYPE }; @@ -653,7 +657,7 @@ union bpf_attr { * Map value associated to *key*, or **NULL** if no entry was * found. * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) * Description * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: @@ -671,13 +675,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * long bpf_map_delete_elem(struct bpf_map *map, const void *key) * Description * Delete entry with *key* from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) * Description * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. @@ -695,7 +699,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) @@ -775,7 +779,7 @@ union bpf_attr { * Return * The SMP id of the processor running the program. * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of @@ -792,7 +796,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) * Description * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper @@ -817,7 +821,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) * Description * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the @@ -849,7 +853,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) * Description * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack @@ -880,7 +884,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) * Description * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress @@ -916,7 +920,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * long bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -953,7 +957,7 @@ union bpf_attr { * Return * The classid, or 0 for the default unconfigured classid. * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) * Description * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update @@ -969,7 +973,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_vlan_pop(struct sk_buff *skb) + * long bpf_skb_vlan_pop(struct sk_buff *skb) * Description * Pop a VLAN header from the packet associated to *skb*. * @@ -981,7 +985,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be @@ -1032,7 +1036,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The @@ -1098,7 +1102,7 @@ union bpf_attr { * The value of the perf event counter read from the map, or a * negative error code in case of failure. * - * int bpf_redirect(u32 ifindex, u64 flags) + * long bpf_redirect(u32 ifindex, u64 flags) * Description * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ @@ -1145,7 +1149,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1190,7 +1194,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1207,7 +1211,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1276,7 +1280,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1294,7 +1298,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1304,7 +1308,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) * Description * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to @@ -1331,7 +1335,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * long bpf_skb_change_type(struct sk_buff *skb, u32 type) * Description * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except @@ -1358,7 +1362,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) * Description * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. @@ -1389,7 +1393,7 @@ union bpf_attr { * Return * A pointer to the current task struct. * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * long bpf_probe_write_user(void *dst, const void *src, u32 len) * Description * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in @@ -1408,7 +1412,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) * Description * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by @@ -1420,7 +1424,7 @@ union bpf_attr { * * 1, if the *skb* task does not belong to the cgroup2. * * A negative error code, if an error occurred. * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) * Description * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must @@ -1444,7 +1448,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) * Description * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes @@ -1500,7 +1504,7 @@ union bpf_attr { * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. * - * int bpf_get_numa_node_id(void) + * long bpf_get_numa_node_id(void) * Description * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA @@ -1511,7 +1515,7 @@ union bpf_attr { * Return * The id of current NUMA node. * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) * Description * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of @@ -1532,7 +1536,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper @@ -1547,7 +1551,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for @@ -1595,14 +1599,14 @@ union bpf_attr { * is returned (note that **overflowuid** might also be the actual * UID value for the socket). * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * long bpf_set_hash(struct sk_buff *skb, u32 hash) * Description * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * Return * 0 * - * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1621,16 +1625,19 @@ union bpf_attr { * * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, - * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, - * **TCP_BPF_SNDCWND_CLAMP**. + * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, + * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -1676,7 +1683,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1697,7 +1704,7 @@ union bpf_attr { * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1708,7 +1715,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to @@ -1727,7 +1734,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) * Description * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this @@ -1756,7 +1763,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) * Description * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type @@ -1806,7 +1813,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description * For en eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in @@ -1817,7 +1824,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1842,7 +1849,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_regs *regs, u64 rc) + * long bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. @@ -1867,7 +1874,7 @@ union bpf_attr { * Return * 0 * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) * Description * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to @@ -1911,7 +1918,7 @@ union bpf_attr { * be set is returned (which comes down to 0 if all bits were set * as required). * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -1925,7 +1932,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. @@ -1959,7 +1966,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been @@ -1977,7 +1984,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) * Description * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ @@ -2008,7 +2015,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) * Description * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing @@ -2026,7 +2033,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. @@ -2040,7 +2047,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) * Description * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. @@ -2056,7 +2063,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -2089,7 +2096,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2111,7 +2118,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be @@ -2142,7 +2149,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2161,7 +2168,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -2175,7 +2182,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. @@ -2189,7 +2196,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) * Description * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at @@ -2226,7 +2233,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs @@ -2241,7 +2248,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) * Description * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to @@ -2257,7 +2264,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) * Description * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter @@ -2286,7 +2293,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_repeat(void *ctx) + * long bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays @@ -2305,7 +2312,7 @@ union bpf_attr { * Return * 0 * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, @@ -2370,7 +2377,7 @@ union bpf_attr { * Return * A pointer to the local storage area. * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. @@ -2415,7 +2422,7 @@ union bpf_attr { * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will + * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or @@ -2452,7 +2459,7 @@ union bpf_attr { * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will + * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or @@ -2471,7 +2478,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(struct bpf_sock *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2479,7 +2486,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) * Description * Push an element *value* in *map*. *flags* is one of: * @@ -2489,19 +2496,19 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * long bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * long bpf_map_peek_elem(struct bpf_map *map, void *value) * Description * Get an element from *map* without removing it. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2517,7 +2524,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if @@ -2529,7 +2536,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. @@ -2543,7 +2550,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_lock(struct bpf_spin_lock *lock) + * long bpf_spin_lock(struct bpf_spin_lock *lock) * Description * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to @@ -2591,7 +2598,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * long bpf_spin_unlock(struct bpf_spin_lock *lock) * Description * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). @@ -2614,7 +2621,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * long bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** @@ -2651,7 +2658,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2666,7 +2673,7 @@ union bpf_attr { * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. @@ -2682,7 +2689,7 @@ union bpf_attr { * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided @@ -2701,7 +2708,7 @@ union bpf_attr { * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into @@ -2718,7 +2725,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) * Description * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. @@ -2735,7 +2742,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base @@ -2759,7 +2766,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the @@ -2810,7 +2817,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return @@ -2818,7 +2825,7 @@ union bpf_attr { * * **-ENOENT** if the bpf-local-storage cannot be found. * - * int bpf_send_signal(u32 sig) + * long bpf_send_signal(u32 sig) * Description * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. @@ -2859,7 +2866,7 @@ union bpf_attr { * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -2883,21 +2890,21 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the @@ -2941,7 +2948,7 @@ union bpf_attr { * including the trailing NUL character. On error, a negative * value. * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. @@ -2949,14 +2956,14 @@ union bpf_attr { * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * - * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_send_signal_thread(u32 sig) + * long bpf_send_signal_thread(u32 sig) * Description * Send signal *sig* to the thread corresponding to the current task. * Return @@ -2976,7 +2983,7 @@ union bpf_attr { * Return * The 64 bit jiffies * - * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* @@ -2995,7 +3002,7 @@ union bpf_attr { * * **-ENOENT** if architecture does not support branch records. * - * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. @@ -3007,7 +3014,7 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * - * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -3062,8 +3069,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description + * Helper is overloaded depending on BPF program type. This + * description applies to **BPF_PROG_TYPE_SCHED_CLS** and + * **BPF_PROG_TYPE_SCHED_ACT** programs. + * * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, * will cause *skb* to be delivered to the specified socket. @@ -3089,6 +3100,56 @@ union bpf_attr { * **-ESOCKTNOSUPPORT** if the socket type is not supported * (reuseport). * + * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) + * Description + * Helper is overloaded depending on BPF program type. This + * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. + * + * Select the *sk* as a result of a socket lookup. + * + * For the operation to succeed passed socket must be compatible + * with the packet description provided by the *ctx* object. + * + * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must + * be an exact match. While IP family (**AF_INET** or + * **AF_INET6**) must be compatible, that is IPv6 sockets + * that are not v6-only can be selected for IPv4 packets. + * + * Only TCP listeners and UDP unconnected sockets can be + * selected. *sk* can also be NULL to reset any previous + * selection. + * + * *flags* argument can combination of following values: + * + * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous + * socket selection, potentially done by a BPF program + * that ran before us. + * + * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip + * load-balancing within reuseport group for the socket + * being selected. + * + * On success *ctx->sk* will point to the selected socket. + * + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EAFNOSUPPORT** if socket family (*sk->family*) is + * not compatible with packet family (*ctx->family*). + * + * * **-EEXIST** if socket has been already selected, + * potentially by another program, and + * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. + * + * * **-EINVAL** if unsupported flags were specified. + * + * * **-EPROTOTYPE** if socket L4 protocol + * (*sk->protocol*) doesn't match packet protocol + * (*ctx->protocol*). + * + * * **-ESOCKTNOSUPPORT** if socket is not in allowed + * state (TCP listening or UDP unconnected). + * * u64 bpf_ktime_get_boot_ns(void) * Description * Return the time elapsed since system boot, in nanoseconds. @@ -3097,7 +3158,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. @@ -3126,7 +3187,7 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the @@ -3222,7 +3283,7 @@ union bpf_attr { * Return * Requested value, or 0, if *flags* are not recognized. * - * int bpf_csum_level(struct sk_buff *skb, u64 level) + * long bpf_csum_level(struct sk_buff *skb, u64 level) * Description * Change the skbs checksum level by one layer up or down, or * reset it entirely to none in order to have the stack perform @@ -3253,6 +3314,69 @@ union bpf_attr { * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * is returned or the error code -EACCES in case the skb is not * subject to CHECKSUM_UNNECESSARY. + * + * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *task*, which is a valid + * pointer to struct task_struct. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_task_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack= + * Return + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3390,7 +3514,14 @@ union bpf_attr { FN(ringbuf_submit), \ FN(ringbuf_discard), \ FN(ringbuf_query), \ - FN(csum_level), + FN(csum_level), \ + FN(skc_to_tcp6_sock), \ + FN(skc_to_tcp_sock), \ + FN(skc_to_tcp_timewait_sock), \ + FN(skc_to_tcp_request_sock), \ + FN(skc_to_udp6_sock), \ + FN(get_task_stack), \ + /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3532,6 +3663,12 @@ enum { BPF_RINGBUF_HDR_SZ = 8, }; +/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ +enum { + BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), + BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), +}; + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -3775,6 +3912,19 @@ struct bpf_devmap_val { } bpf_prog; }; +/* CPUMAP map-value layout + * + * The struct data-layout of map-value is a configuration interface. + * New members can only be added to the end of this structure. + */ +struct bpf_cpumap_val { + __u32 qsize; /* queue size to remote target CPU */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; +}; + enum sk_action { SK_DROP = 0, SK_PASS, @@ -3912,7 +4062,7 @@ struct bpf_link_info { /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on - * attach attach type). + * attach type). */ struct bpf_sock_addr { __u32 user_family; /* Allows 4-byte read, but no write. */ @@ -4261,4 +4411,19 @@ struct bpf_pidns_info { __u32 pid; __u32 tgid; }; + +/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ +struct bpf_sk_lookup { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ + __u32 remote_ip4; /* Network byte order */ + __u32 remote_ip6[4]; /* Network byte order */ + __u32 remote_port; /* Network byte order */ + __u32 local_ip4; /* Network byte order */ + __u32 local_ip6[4]; /* Network byte order */ + __u32 local_port; /* Host byte order */ +}; + #endif /* __LINUX_BPF_H__ */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7b600bf1..b7f23faa 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -451,6 +451,13 @@ enum devlink_attr { DEVLINK_ATTR_TRAP_POLICER_RATE, /* u64 */ DEVLINK_ATTR_TRAP_POLICER_BURST, /* u64 */ + DEVLINK_ATTR_PORT_FUNCTION, /* nested */ + + DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, /* string */ + + DEVLINK_ATTR_PORT_LANES, /* u32 */ + DEVLINK_ATTR_PORT_SPLITTABLE, /* u8 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, @@ -497,4 +504,12 @@ enum devlink_resource_unit { DEVLINK_RESOURCE_UNIT_ENTRY, }; +enum devlink_port_function_attr { + DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, + DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ + + __DEVLINK_PORT_FUNCTION_ATTR_MAX, + DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 +}; + #endif /* _LINUX_DEVLINK_H_ */ diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index 1dc7cc67..70b283ff 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -68,6 +68,7 @@ struct icmp6hdr { #define icmp6_mtu icmp6_dataun.un_data32[0] #define icmp6_unused icmp6_dataun.un_data32[0] #define icmp6_maxdelay icmp6_dataun.un_data16[0] +#define icmp6_datagram_len icmp6_dataun.un_data8[0] #define icmp6_router icmp6_dataun.u_nd_advt.router #define icmp6_solicited icmp6_dataun.u_nd_advt.solicited #define icmp6_override icmp6_dataun.u_nd_advt.override diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 73e2d862..0490db9a 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -166,6 +166,10 @@ enum { IFLA_BRIDGE_MRP_RING_STATE, IFLA_BRIDGE_MRP_RING_ROLE, IFLA_BRIDGE_MRP_START_TEST, + IFLA_BRIDGE_MRP_INFO, + IFLA_BRIDGE_MRP_IN_ROLE, + IFLA_BRIDGE_MRP_IN_STATE, + IFLA_BRIDGE_MRP_START_IN_TEST, __IFLA_BRIDGE_MRP_MAX, }; @@ -228,6 +232,58 @@ enum { #define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1) +enum { + IFLA_BRIDGE_MRP_INFO_UNSPEC, + IFLA_BRIDGE_MRP_INFO_RING_ID, + IFLA_BRIDGE_MRP_INFO_P_IFINDEX, + IFLA_BRIDGE_MRP_INFO_S_IFINDEX, + IFLA_BRIDGE_MRP_INFO_PRIO, + IFLA_BRIDGE_MRP_INFO_RING_STATE, + IFLA_BRIDGE_MRP_INFO_RING_ROLE, + IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, + IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, + IFLA_BRIDGE_MRP_INFO_I_IFINDEX, + IFLA_BRIDGE_MRP_INFO_IN_STATE, + IFLA_BRIDGE_MRP_INFO_IN_ROLE, + IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL, + IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS, + __IFLA_BRIDGE_MRP_INFO_MAX, +}; + +#define IFLA_BRIDGE_MRP_INFO_MAX (__IFLA_BRIDGE_MRP_INFO_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_IN_STATE_UNSPEC, + IFLA_BRIDGE_MRP_IN_STATE_IN_ID, + IFLA_BRIDGE_MRP_IN_STATE_STATE, + __IFLA_BRIDGE_MRP_IN_STATE_MAX, +}; + +#define IFLA_BRIDGE_MRP_IN_STATE_MAX (__IFLA_BRIDGE_MRP_IN_STATE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC, + IFLA_BRIDGE_MRP_IN_ROLE_RING_ID, + IFLA_BRIDGE_MRP_IN_ROLE_IN_ID, + IFLA_BRIDGE_MRP_IN_ROLE_ROLE, + IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX, + __IFLA_BRIDGE_MRP_IN_ROLE_MAX, +}; + +#define IFLA_BRIDGE_MRP_IN_ROLE_MAX (__IFLA_BRIDGE_MRP_IN_ROLE_MAX - 1) + +enum { + IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC, + IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID, + IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL, + IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS, + IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD, + __IFLA_BRIDGE_MRP_START_IN_TEST_MAX, +}; + +#define IFLA_BRIDGE_MRP_START_IN_TEST_MAX (__IFLA_BRIDGE_MRP_START_IN_TEST_MAX - 1) + struct br_mrp_instance { __u32 ring_id; __u32 p_ifindex; @@ -253,6 +309,25 @@ struct br_mrp_start_test { __u32 monitor; }; +struct br_mrp_in_state { + __u32 in_state; + __u16 in_id; +}; + +struct br_mrp_in_role { + __u32 ring_id; + __u32 in_role; + __u32 i_ifindex; + __u16 in_id; +}; + +struct br_mrp_start_in_test { + __u32 interval; + __u32 max_miss; + __u32 period; + __u16 in_id; +}; + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a8901a39..b1bdcfb8 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -170,12 +170,22 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + /* backwards compatibility for userspace */ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) #define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) @@ -342,6 +352,7 @@ enum { IFLA_BRPORT_ISOLATED, IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, + IFLA_BRPORT_MRP_IN_OPEN, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -904,7 +915,14 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR section */ +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; enum { IFLA_HSR_UNSPEC, @@ -914,6 +932,9 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ __IFLA_HSR_MAX, }; diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index ca59dc76..0f68bcff 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -123,6 +123,7 @@ struct in_addr { #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 #define IP_RECVFRAGSIZE 25 +#define IP_RECVERR_RFC4884 26 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index d016ac9f..7e3a58e6 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -179,6 +179,7 @@ struct in6_flowlabel_req { #define IPV6_LEAVE_ANYCAST 28 #define IPV6_MULTICAST_ALL 29 #define IPV6_ROUTER_ALERT_ISOLATE 30 +#define IPV6_RECVERR_RFC4884 31 /* IPV6_MTU_DISCOVER values */ #define IPV6_PMTUDISC_DONT 0 diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index f009abf1..cd83b4f8 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -65,6 +65,7 @@ enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, INET_DIAG_REQ_SK_BPF_STORAGES, + INET_DIAG_REQ_PROTOCOL, __INET_DIAG_REQ_MAX, }; diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 009b8f0b..32181230 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -86,4 +86,21 @@ enum { __MPTCP_PM_CMD_AFTER_LAST }; +#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0) +#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1) + +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; +}; + #endif /* _MPTCP_H */ diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index eefcda8c..dc8b7220 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -30,6 +30,7 @@ enum { NDA_SRC_VNI, NDA_PROTOCOL, /* Originator of entry */ NDA_NH_ID, + NDA_FDB_EXT_ATTRS, __NDA_MAX }; @@ -172,4 +173,27 @@ enum { }; #define NDTA_MAX (__NDTA_MAX - 1) + /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY: + * - FDB_NOTIFY_BIT - notify on activity/expire for any entry + * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications + */ +enum { + FDB_NOTIFY_BIT = (1 << 0), + FDB_NOTIFY_INACTIVE_BIT = (1 << 1) +}; + +/* embedded into NDA_FDB_EXT_ATTRS: + * [NDA_FDB_EXT_ATTRS] = { + * [NFEA_ACTIVITY_NOTIFY] + * ... + * } + */ +enum { + NFEA_UNSPEC, + NFEA_ACTIVITY_NOTIFY, + NFEA_DONT_REFRESH, + __NFEA_MAX +}; +#define NFEA_MAX (__NFEA_MAX - 1) + #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 7576209d..ee95f42f 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -578,6 +578,9 @@ enum { TCA_FLOWER_KEY_MPLS_OPTS, + TCA_FLOWER_KEY_HASH, /* u32 */ + TCA_FLOWER_KEY_HASH_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index a95f3ae7..9e7c2c60 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -257,6 +257,8 @@ enum { TCA_RED_STAB, TCA_RED_MAX_P, TCA_RED_FLAGS, /* bitfield32 */ + TCA_RED_EARLY_DROP_BLOCK, /* u32 */ + TCA_RED_MARK_BLOCK, /* u32 */ __TCA_RED_MAX, }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index bcb1ba4d..5ad84e66 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -257,12 +257,12 @@ enum { /* rtm_protocol */ -#define RTPROT_UNSPEC 0 -#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; - not used by current IPv4 */ -#define RTPROT_KERNEL 2 /* Route installed by kernel */ -#define RTPROT_BOOT 3 /* Route installed during boot */ -#define RTPROT_STATIC 4 /* Route installed by administrator */ +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ /* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; they are just passed from user and back as is. @@ -271,22 +271,23 @@ enum { avoid conflicts. */ -#define RTPROT_GATED 8 /* Apparently, GateD */ -#define RTPROT_RA 9 /* RDISC/ND router advertisements */ -#define RTPROT_MRT 10 /* Merit MRT */ -#define RTPROT_ZEBRA 11 /* Zebra */ -#define RTPROT_BIRD 12 /* BIRD */ -#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ -#define RTPROT_XORP 14 /* XORP */ -#define RTPROT_NTK 15 /* Netsukuku */ -#define RTPROT_DHCP 16 /* DHCP client */ -#define RTPROT_MROUTED 17 /* Multicast daemon */ -#define RTPROT_BABEL 42 /* Babel daemon */ -#define RTPROT_BGP 186 /* BGP Routes */ -#define RTPROT_ISIS 187 /* ISIS Routes */ -#define RTPROT_OSPF 188 /* OSPF Routes */ -#define RTPROT_RIP 189 /* RIP Routes */ -#define RTPROT_EIGRP 192 /* EIGRP Routes */ +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ /* rtm_scope @@ -775,6 +776,7 @@ enum { #define RTEXT_FILTER_BRVLAN (1 << 1) #define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) #define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) /* End of information exported to user level */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 7d91f4de..cee9f8e6 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -287,6 +287,7 @@ enum LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */ LINUX_MIB_TCPTIMEOUTREHASH, /* TCPTimeoutRehash */ LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ + LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index f4e1003b..ee670e88 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -313,6 +313,7 @@ enum { TCP_NLA_SRTT, /* smoothed RTT in usecs */ TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ + TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ }; /* for TCP_MD5SIG socket option */ diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h index 78b2591a..66b9973b 100644 --- a/include/uapi/linux/xdp_diag.h +++ b/include/uapi/linux/xdp_diag.h @@ -30,6 +30,7 @@ struct xdp_diag_msg { #define XDP_SHOW_RING_CFG (1 << 1) #define XDP_SHOW_UMEM (1 << 2) #define XDP_SHOW_MEMINFO (1 << 3) +#define XDP_SHOW_STATS (1 << 4) enum { XDP_DIAG_NONE, @@ -41,6 +42,7 @@ enum { XDP_DIAG_UMEM_FILL_RING, XDP_DIAG_UMEM_COMPLETION_RING, XDP_DIAG_MEMINFO, + XDP_DIAG_STATS, __XDP_DIAG_MAX, }; @@ -69,4 +71,13 @@ struct xdp_diag_umem { __u32 refs; }; +struct xdp_diag_stats { + __u64 n_rx_dropped; + __u64 n_rx_invalid; + __u64 n_rx_full; + __u64 n_fill_ring_empty; + __u64 n_tx_invalid; + __u64 n_tx_ring_empty; +}; + #endif /* _LINUX_XDP_DIAG_H */ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 50450f3f..eec67a2b 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -387,6 +387,7 @@ struct xfrm_usersa_info { }; #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +#define XFRM_SA_XFLAG_OSEQ_MAY_WRAP 2 struct xfrm_usersa_id { xfrm_address_t daddr; diff --git a/ip/ipnexthop.c b/ip/ipnexthop.c index c33cef0c..22c66491 100644 --- a/ip/ipnexthop.c +++ b/ip/ipnexthop.c @@ -20,6 +20,7 @@ static struct { unsigned int ifindex; unsigned int master; unsigned int proto; + unsigned int fdb; } filter; enum { @@ -39,7 +40,7 @@ static void usage(void) " ip nexthop { add | replace } id ID NH [ protocol ID ]\n" " ip nexthop { get| del } id ID\n" "SELECTOR := [ id ID ] [ dev DEV ] [ vrf NAME ] [ master DEV ]\n" - " [ groups ]\n" + " [ groups ] [ fdb ]\n" "NH := { blackhole | [ via ADDRESS ] [ dev DEV ] [ onlink ]\n" " [ encap ENCAPTYPE ENCAPHDR ] | group GROUP ] }\n" "GROUP := [ id[,weight]>//... ]\n" @@ -70,6 +71,12 @@ static int nh_dump_filter(struct nlmsghdr *nlh, int reqlen) return err; } + if (filter.fdb) { + err = addattr_l(nlh, reqlen, NHA_FDB, NULL, 0); + if (err) + return err; + } + return 0; } @@ -259,6 +266,9 @@ int print_nexthop(struct nlmsghdr *n, void *arg) if (tb[NHA_OIF]) print_rt_flags(fp, nhm->nh_flags); + if (tb[NHA_FDB]) + print_null(PRINT_ANY, "fdb", "fdb", NULL); + print_string(PRINT_FP, NULL, "%s", "\n"); close_json_object(); fflush(fp); @@ -385,6 +395,8 @@ static int ipnh_modify(int cmd, unsigned int flags, int argc, char **argv) addattr_l(&req.n, sizeof(req), NHA_BLACKHOLE, NULL, 0); if (req.nhm.nh_family == AF_UNSPEC) req.nhm.nh_family = AF_INET; + } else if (!strcmp(*argv, "fdb")) { + addattr_l(&req.n, sizeof(req), NHA_FDB, NULL, 0); } else if (!strcmp(*argv, "onlink")) { nh_flags |= RTNH_F_ONLINK; } else if (!strcmp(*argv, "group")) { @@ -487,6 +499,8 @@ static int ipnh_list_flush(int argc, char **argv, int action) if (get_unsigned(&proto, *argv, 0)) invarg("invalid protocol value", *argv); filter.proto = proto; + } else if (!matches(*argv, "fdb")) { + filter.fdb = 1; } else if (matches(*argv, "help") == 0) { usage(); } else { diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c index fec206ab..cac8ba25 100644 --- a/ip/ipxfrm.c +++ b/ip/ipxfrm.c @@ -953,6 +953,9 @@ void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo, XFRM_FLAG_PRINT(fp, extra_flags, XFRM_SA_XFLAG_DONT_ENCAP_DSCP, "dont-encap-dscp"); + XFRM_FLAG_PRINT(fp, extra_flags, + XFRM_SA_XFLAG_OSEQ_MAY_WRAP, + "oseq-may-wrap"); if (extra_flags) fprintf(fp, "%x", extra_flags); } diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index f4bf3356..ddf784ca 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -104,7 +104,7 @@ static void usage(void) "FLAG-LIST := [ FLAG-LIST ] FLAG\n" "FLAG := noecn | decap-dscp | nopmtudisc | wildrecv | icmp | af-unspec | align4 | esn\n" "EXTRA-FLAG-LIST := [ EXTRA-FLAG-LIST ] EXTRA-FLAG\n" - "EXTRA-FLAG := dont-encap-dscp\n" + "EXTRA-FLAG := dont-encap-dscp | oseq-may-wrap\n" "SELECTOR := [ src ADDR[/PLEN] ] [ dst ADDR[/PLEN] ] [ dev DEV ] [ UPSPEC ]\n" "UPSPEC := proto { { tcp | udp | sctp | dccp } [ sport PORT ] [ dport PORT ] |\n" " { icmp | ipv6-icmp | mobility-header } [ type NUMBER ] [ code NUMBER ] |\n" @@ -253,6 +253,8 @@ static int xfrm_state_extra_flag_parse(__u32 *extra_flags, int *argcp, char ***a while (1) { if (strcmp(*argv, "dont-encap-dscp") == 0) *extra_flags |= XFRM_SA_XFLAG_DONT_ENCAP_DSCP; + else if (strcmp(*argv, "oseq-may-wrap") == 0) + *extra_flags |= XFRM_SA_XFLAG_OSEQ_MAY_WRAP; else { PREV_ARG(); /* back track */ break; diff --git a/lib/rt_names.c b/lib/rt_names.c index 41cccfb8..c40d2e77 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -120,27 +120,28 @@ static void rtnl_tab_initialize(const char *file, char **tab, int size) } static char *rtnl_rtprot_tab[256] = { - [RTPROT_UNSPEC] = "unspec", - [RTPROT_REDIRECT] = "redirect", - [RTPROT_KERNEL] = "kernel", - [RTPROT_BOOT] = "boot", - [RTPROT_STATIC] = "static", - - [RTPROT_GATED] = "gated", - [RTPROT_RA] = "ra", - [RTPROT_MRT] = "mrt", - [RTPROT_ZEBRA] = "zebra", - [RTPROT_BIRD] = "bird", - [RTPROT_BABEL] = "babel", - [RTPROT_DNROUTED] = "dnrouted", - [RTPROT_XORP] = "xorp", - [RTPROT_NTK] = "ntk", - [RTPROT_DHCP] = "dhcp", - [RTPROT_BGP] = "bgp", - [RTPROT_ISIS] = "isis", - [RTPROT_OSPF] = "ospf", - [RTPROT_RIP] = "rip", - [RTPROT_EIGRP] = "eigrp", + [RTPROT_UNSPEC] = "unspec", + [RTPROT_REDIRECT] = "redirect", + [RTPROT_KERNEL] = "kernel", + [RTPROT_BOOT] = "boot", + [RTPROT_STATIC] = "static", + + [RTPROT_GATED] = "gated", + [RTPROT_RA] = "ra", + [RTPROT_MRT] = "mrt", + [RTPROT_ZEBRA] = "zebra", + [RTPROT_BIRD] = "bird", + [RTPROT_BABEL] = "babel", + [RTPROT_DNROUTED] = "dnrouted", + [RTPROT_XORP] = "xorp", + [RTPROT_NTK] = "ntk", + [RTPROT_DHCP] = "dhcp", + [RTPROT_KEEPALIVED] = "keepalived", + [RTPROT_BGP] = "bgp", + [RTPROT_ISIS] = "isis", + [RTPROT_OSPF] = "ospf", + [RTPROT_RIP] = "rip", + [RTPROT_EIGRP] = "eigrp", }; diff --git a/man/man8/bridge.8 b/man/man8/bridge.8 index 08c53f27..b0600576 100644 --- a/man/man8/bridge.8 +++ b/man/man8/bridge.8 @@ -67,18 +67,18 @@ bridge \- show / manipulate bridge addresses and devices .IR DEV " { " .BR local " | " static " | " dynamic " } [ " .BR self " ] [ " master " ] [ " router " ] [ " use " ] [ " extern_learn " ] [ " sticky " ] [ " +.B src_vni +.IR VNI " ] { [" .B dst .IR IPADDR " ] [ " -.B src_vni -.IR VNI " ] [" .B vni .IR VNI " ] [" .B port .IR PORT " ] [" .B via -.IR DEVICE " ] [" -.B src_vni -.IR VNI " ]" +.IR DEVICE " ] | " +.B nhid +.IR NHID " } " .ti -8 .BR "bridge fdb" " [ [ " show " ] [ " @@ -588,6 +588,11 @@ device name of the outgoing interface for the VXLAN device driver to reach the remote VXLAN tunnel endpoint. +.TP +.BI nhid " NHID " +ecmp nexthop group for the VXLAN device driver +to reach remote VXLAN tunnel endpoints. + .SS bridge fdb append - append a forwarding database entry This command adds a new fdb entry with an already known .IR LLADDR . diff --git a/man/man8/devlink-health.8 b/man/man8/devlink-health.8 index 215f549b..47b96135 100644 --- a/man/man8/devlink-health.8 +++ b/man/man8/devlink-health.8 @@ -19,37 +19,37 @@ devlink-health \- devlink health reporting and recovery .ti -8 .B devlink health show -.RI "[ " DEV "" +.RI "[ { " DEV " | " DEV/PORT_INDEX " }" .B reporter .RI ""REPORTER " ] " .ti -8 .B devlink health recover -.RI "" DEV "" +.RI "{ " DEV " | " DEV/PORT_INDEX " }" .B reporter .RI "" REPORTER "" .ti -8 .B devlink health diagnose -.RI "" DEV "" +.RI "{ " DEV " | " DEV/PORT_INDEX " }" .B reporter .RI "" REPORTER "" .ti -8 .B devlink health dump show -.RI "" DEV "" +.RI "{ " DEV " | " DEV/PORT_INDEX " }" .B reporter .RI "" REPORTER "" .ti -8 .B devlink health dump clear -.RI "" DEV "" +.RI "{ " DEV " | " DEV/PORT_INDEX " }" .B reporter .RI "" REPORTER "" .ti -8 .B devlink health set -.RI "" DEV "" +.RI "{ " DEV " | " DEV/PORT_INDEX " }" .B reporter .RI "" REPORTER "" [ @@ -64,15 +64,19 @@ devlink-health \- devlink health reporting and recovery .B devlink health help .SH "DESCRIPTION" -.SS devlink health show - Show status and configuration on all supported reporters on all devlink devices. +.SS devlink health show - Show status and configuration on all supported reporters. +Displays info about reporters registered on devlink devices and ports. .PP .I "DEV" - specifies the devlink device. +.br +.I DEV/PORT_INDEX +- specifies the devlink port. .PP .I "REPORTER" -- specifies the reporter's name registered on the devlink device. +- specifies the reporter's name registered on specified devlink device or port. .SS devlink health recover - Initiate a recovery operation on a reporter. This action performs a recovery and increases the recoveries counter on success. @@ -80,20 +84,26 @@ This action performs a recovery and increases the recoveries counter on success. .PP .I "DEV" - specifies the devlink device. +.br +.I DEV/PORT_INDEX +- specifies the devlink port. .PP .I "REPORTER" -- specifies the reporter's name registered on the devlink device. +- specifies the reporter's name registered on specified devlink device or port. .SS devlink health diagnose - Retrieve diagnostics data on a reporter. .PP -.I "DEV" +.I DEV - specifies the devlink device. +.br +.I DEV/PORT_INDEX +- specifies the devlink port. .PP .I "REPORTER" -- specifies the reporter's name registered on the devlink device. +- specifies the reporter's name registered on specified devlink device or port. .SS devlink health dump show - Display the last saved dump. @@ -111,10 +121,13 @@ reporter reports on an error or manually at the user's request. .PP .I "DEV" - specifies the devlink device. +.br +.I DEV/PORT_INDEX +- specifies the devlink port. .PP .I "REPORTER" -- specifies the reporter's name registered on the devlink device. +- specifies the reporter's name registered on specified devlink device or port. .SS devlink health dump clear - Delete the saved dump. Deleting the saved dump enables a generation of a new dump on @@ -126,10 +139,13 @@ the next "devlink health dump show" command. .PP .I "DEV" - specifies the devlink device. +.br +.I DEV/PORT_INDEX +- specifies the devlink port. .PP .I "REPORTER" -- specifies the reporter's name registered on the devlink device. +- specifies the reporter's name registered on specified devlink device or port. .SS devlink health set - Configure health reporter. Please note that some params are not supported on a reporter which @@ -138,10 +154,13 @@ doesn't support a recovery or dump method. .PP .I "DEV" - specifies the devlink device. +.br +.I DEV/PORT_INDEX +- specifies the devlink port. .PP .I "REPORTER" -- specifies the reporter's name registered on the devlink device. +- specifies the reporter's name registered on specified devlink device or port. .TP .BI grace_period " MSEC " @@ -159,38 +178,55 @@ Indicates whether the devlink should execute automatic dump on error. .PP devlink health show .RS 4 -List status and configuration of available reporters on devices. +List status and configuration of available reporters on devices and ports. +.RE +.PP +devlink health show pci/0000:00:09.0/1 reporter tx +.RS 4 +List status and configuration of tx reporter registered on port on pci/0000:00:09.0/1 .RE .PP -devlink health recover pci/0000:00:09.0 reporter tx +devlink health recover pci/0000:00:09.0 reporter fw_fatal .RS 4 -Initiate recovery on tx reporter registered on pci/0000:00:09.0. +Initiate recovery on fw_fatal reporter registered on device on pci/0000:00:09.0. .RE .PP -devlink health diagnose pci/0000:00:09.0 reporter tx +devlink health recover pci/0000:00:09.0/1 reporter tx +.RS 4 +Initiate recovery on tx reporter registered on port on pci/0000:00:09.0/1. +.RE +.PP +devlink health diagnose pci/0000:00:09.0 reporter fw .RS 4 List diagnostics data on the specified device and reporter. .RE .PP -devlink health dump show pci/0000:00:09.0 reporter tx +devlink health dump show pci/0000:00:09.0/1 reporter tx .RS 4 -Display the last saved dump on the specified device and reporter. +Display the last saved dump on the specified port and reporter. .RE .PP -devlink health dump clear pci/0000:00:09.0 reporter tx +devlink health dump clear pci/0000:00:09.0/1 reporter tx .RS 4 -Delete saved dump on the specified device and reporter. +Delete saved dump on the specified port and reporter. .RE .PP -devlink health set pci/0000:00:09.0 reporter tx grace_period 3500 +devlink health set pci/0000:00:09.0 reporter fw_fatal grace_period 3500 .RS 4 Set time interval between auto recoveries to minimum of 3500 msec on the specified device and reporter. .RE .PP -devlink health set pci/0000:00:09.0 reporter tx auto_recover false +devlink health set pci/0000:00:09.0/1 reporter tx grace_period 3500 +.RS 4 +Set time interval between auto recoveries to minimum of 3500 msec on +the specified port and reporter. +.RE +.PP +devlink health set pci/0000:00:09.0 reporter fw_fatal auto_recover false .RS 4 Turn off auto recovery on the specified device and reporter. + .RE .SH SEE ALSO .BR devlink (8), diff --git a/man/man8/devlink-port.8 b/man/man8/devlink-port.8 index 188bffb7..966faae6 100644 --- a/man/man8/devlink-port.8 +++ b/man/man8/devlink-port.8 @@ -39,6 +39,10 @@ devlink-port \- devlink port configuration .B devlink port show .RI "[ " DEV/PORT_INDEX " ]" +.ti -8 +.B devlink port health +.RI "{ " show " | " recover " | " diagnose " | " dump " | " set " }" + .ti -8 .B devlink port help @@ -91,6 +95,10 @@ Could be performed on any split port of the same split group. - specifies the devlink port to show. If this argument is omitted all ports are listed. +.SS devlink port health - devlink health reporting and recovery +Is an alias for +.BR devlink-health (8). + .SH "EXAMPLES" .PP devlink port show @@ -117,12 +125,23 @@ devlink port unsplit pci/0000:01:00.0/1 .RS 4 Unplit the specified previously split devlink port. .RE +.PP +devlink port health show +.RS 4 +Shows status and configuration of all supported reporters registered on all devlink ports. +.RE +.PP +devlink port health show pci/0000:01:00.0/1 reporter tx +.RS 4 +Shows status and configuration of tx reporter registered on pci/0000:01:00.0/1 devlink port. +.RE .SH SEE ALSO .BR devlink (8), .BR devlink-dev (8), .BR devlink-sb (8), .BR devlink-monitor (8), +.BR devlink-health (8), .br .SH AUTHOR diff --git a/man/man8/ip-nexthop.8 b/man/man8/ip-nexthop.8 index 68164f3c..4d55f4db 100644 --- a/man/man8/ip-nexthop.8 +++ b/man/man8/ip-nexthop.8 @@ -38,7 +38,8 @@ ip-nexthop \- nexthop object management .IR NAME " ] [ " .B master .IR DEV " ] [ " -.BR groups " ] " +.BR groups " ] [ " +.BR fdb " ]" .ti -8 .IR NH " := { " @@ -49,9 +50,11 @@ ip-nexthop \- nexthop object management .IR DEV " ] [ " .BR onlink " ] [ " .B encap -.IR ENCAP " ] | " +.IR ENCAP " ] [ " +.BR fdb " ] | " .B group -.IR GROUP " } " +.IR GROUP " [ " +.BR fdb " ] } " .ti -8 .IR ENCAP " := [ " @@ -125,6 +128,13 @@ weight (id,weight) and a '/' as a separator between entries. .TP .B blackhole create a blackhole nexthop +.TP +.B fdb +nexthop and nexthop groups for use with layer-2 fdb entries. +A fdb nexthop group can only have fdb nexthops. +Example: Used to represent a vxlan remote vtep ip. layer-2 vxlan +fdb entry pointing to an ecmp nexthop group containing multiple +remote vtep ips. .RE .TP @@ -148,6 +158,9 @@ show the nexthops using devices enslaved to given master device .TP .BI groups show only nexthop groups +.TP +.BI fdb +show only fdb nexthops and nexthop groups .RE .TP ip nexthop flush @@ -186,6 +199,17 @@ ip nexthop add id 4 group 1,5/2,11 Adds a nexthop with id 4. The nexthop is a group using nexthops with ids 1 and 2 with nexthop 1 at weight 5 and nexthop 2 at weight 11. .RE +.PP +ip nexthop add id 5 via 192.168.1.2 fdb +.RS 4 +Adds a fdb nexthop with id 5. +.RE +.PP +ip nexthop add id 7 group 5/6 fdb +.RS 4 +Adds a fdb nexthop group with id 7. A fdb nexthop group can only have +fdb nexthops. +.RE .SH SEE ALSO .br .BR ip (8) diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8 index aa28db49..4fa31651 100644 --- a/man/man8/ip-xfrm.8 +++ b/man/man8/ip-xfrm.8 @@ -217,7 +217,7 @@ ip-xfrm \- transform configuration .ti -8 .IR EXTRA-FLAG " := " -.B dont-encap-dscp +.BR dont-encap-dscp " | " oseq-may-wrap .ti -8 .BR "ip xfrm policy" " { " add " | " update " }" diff --git a/man/man8/rdma-resource.8 b/man/man8/rdma-resource.8 index 05030d0a..8d0d14c6 100644 --- a/man/man8/rdma-resource.8 +++ b/man/man8/rdma-resource.8 @@ -83,6 +83,11 @@ rdma res show qp link mlx5_4/1 lqpn 0-6 Limit to specific Local QPNs. .RE .PP +rdma res show qp link mlx5_4/1 lqpn 6 -r +.RS 4 +Driver specific details in raw format. +.RE +.PP rdma resource show cm_id dst-port 7174 .RS 4 Show CM_IDs with destination ip port of 7174. diff --git a/man/man8/rdma.8 b/man/man8/rdma.8 index 221bf334..c9e5d50d 100644 --- a/man/man8/rdma.8 +++ b/man/man8/rdma.8 @@ -51,6 +51,10 @@ If there were any errors during execution of the commands, the application retur .BR "\-d" , " --details" Output detailed information. Adding a second \-d includes driver-specific details. +.TP +.BR "\-r" , " --raw" +Output includes driver-specific details in raw format. + .TP .BR "\-p" , " --pretty" When combined with -j generate a pretty JSON output. diff --git a/man/man8/tc-red.8 b/man/man8/tc-red.8 index b5aaa986..662e4d8b 100644 --- a/man/man8/tc-red.8 +++ b/man/man8/tc-red.8 @@ -17,7 +17,11 @@ packets rate .B ] [ probability chance -.B ] [ adaptive ] +.B ] [ adaptive ] [ qevent early_drop block +index +.B ] [ qevent mark block +index +.B ] .SH DESCRIPTION Random Early Detection is a classless qdisc which manages its queue size @@ -134,6 +138,18 @@ Goal of Adaptive RED is to make 'probability' dynamic value between 1% and 50% t .B (max - min) / 2 .fi +.SH QEVENTS +See tc (8) for some general notes about qevents. The RED qdisc supports the +following qevents: + +.TP +early_drop +The associated block is executed when packets are early-dropped. This includes +non-ECT packets in ECN mode. +.TP +mark +The associated block is executed when packets are marked in ECN mode. + .SH EXAMPLE .P diff --git a/man/man8/tc.8 b/man/man8/tc.8 index 305bc569..7e9019f5 100644 --- a/man/man8/tc.8 +++ b/man/man8/tc.8 @@ -258,6 +258,25 @@ Traffic control filter that matches every packet. See .BR tc-matchall (8) for details. +.SH QEVENTS +Qdiscs may invoke user-configured actions when certain interesting events +take place in the qdisc. Each qevent can either be unused, or can have a +block attached to it. To this block are then attached filters using the "tc +block BLOCK_IDX" syntax. The block is executed when the qevent associated +with the attachment point takes place. For example, packet could be +dropped, or delayed, etc., depending on the qdisc and the qevent in +question. + +For example: +.PP +.RS +tc qdisc add dev eth0 root handle 1: red limit 500K avpkt 1K \\ + qevent early_drop block 10 +.RE +.RS +tc filter add block 10 matchall action mirred egress mirror dev eth1 +.RE + .SH CLASSLESS QDISCS The classless qdiscs are: .TP diff --git a/misc/ss.c b/misc/ss.c index 35066bf6..e5565725 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -63,6 +63,10 @@ #define AF_VSOCK PF_VSOCK #endif +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + #define BUF_CHUNK (1024 * 1024) /* Buffer chunk allocation size */ #define BUF_CHUNKS_MAX 5 /* Maximum number of allocated buffer chunks */ #define LEN_ALIGN(x) (((x) + 1) & ~1) @@ -189,6 +193,7 @@ static const char *dg_proto; enum { TCP_DB, + MPTCP_DB, DCCP_DB, UDP_DB, RAW_DB, @@ -209,7 +214,7 @@ enum { #define PACKET_DBM ((1<mptcpi_subflows) + out(" subflows:%d", s->mptcpi_subflows); + if (s->mptcpi_add_addr_signal) + out(" add_addr_signal:%d", s->mptcpi_add_addr_signal); + if (s->mptcpi_add_addr_signal) + out(" add_addr_accepted:%d", s->mptcpi_add_addr_accepted); + if (s->mptcpi_subflows_max) + out(" subflows_max:%d", s->mptcpi_subflows_max); + if (s->mptcpi_add_addr_signal_max) + out(" add_addr_signal_max:%d", s->mptcpi_add_addr_signal_max); + if (s->mptcpi_add_addr_accepted_max) + out(" add_addr_accepted_max:%d", s->mptcpi_add_addr_accepted_max); + if (s->mptcpi_flags & MPTCP_INFO_FLAG_FALLBACK) + out(" fallback"); + if (s->mptcpi_flags & MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED) + out(" remote_key"); + if (s->mptcpi_token) + out(" token:%x", s->mptcpi_token); + if (s->mptcpi_write_seq) + out(" write_seq:%llx", s->mptcpi_write_seq); + if (s->mptcpi_snd_una) + out(" snd_una:%llx", s->mptcpi_snd_una); + if (s->mptcpi_rcv_nxt) + out(" rcv_nxt:%llx", s->mptcpi_rcv_nxt); +} + +static void mptcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, + struct rtattr *tb[]) +{ + print_skmeminfo(tb, INET_DIAG_SKMEMINFO); + + if (tb[INET_DIAG_INFO]) { + struct mptcp_info *info; + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + + /* workaround for older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else + info = RTA_DATA(tb[INET_DIAG_INFO]); + + mptcp_stats_print(info); + } +} + static const char *format_host_sa(struct sockaddr_storage *sa) { union { @@ -3277,6 +3338,8 @@ static int inet_show_sock(struct nlmsghdr *nlh, out("\n\t"); if (s->type == IPPROTO_SCTP) sctp_show_info(nlh, r, tb); + else if (s->type == IPPROTO_MPTCP) + mptcp_show_info(nlh, r, tb); else tcp_show_info(nlh, r, tb); } @@ -3365,9 +3428,11 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f) DIAG_REQUEST(req, struct inet_diag_req_v2 r); char *bc = NULL; int bclen; + __u32 proto; struct msghdr msg; - struct rtattr rta; - struct iovec iov[3]; + struct rtattr rta_bc; + struct rtattr rta_proto; + struct iovec iov[5]; int iovlen = 1; if (family == PF_UNSPEC) @@ -3400,15 +3465,26 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f) if (f->f) { bclen = ssfilter_bytecompile(f->f, &bc); if (bclen) { - rta.rta_type = INET_DIAG_REQ_BYTECODE; - rta.rta_len = RTA_LENGTH(bclen); - iov[1] = (struct iovec){ &rta, sizeof(rta) }; + rta_bc.rta_type = INET_DIAG_REQ_BYTECODE; + rta_bc.rta_len = RTA_LENGTH(bclen); + iov[1] = (struct iovec){ &rta_bc, sizeof(rta_bc) }; iov[2] = (struct iovec){ bc, bclen }; req.nlh.nlmsg_len += RTA_LENGTH(bclen); iovlen = 3; } } + /* put extended protocol attribute, if required */ + if (protocol > 255) { + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + proto = protocol; + iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto) }; + iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto) }; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + iovlen += 2; + } + msg = (struct msghdr) { .msg_name = (void *)&nladdr, .msg_namelen = sizeof(nladdr), @@ -3668,6 +3744,18 @@ outerr: } while (0); } +static int mptcp_show(struct filter *f) +{ + if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) + return 0; + + if (!getenv("PROC_NET_MPTCP") && !getenv("PROC_ROOT") + && inet_show_netlink(f, NULL, IPPROTO_MPTCP) == 0) + return 0; + + return 0; +} + static int dccp_show(struct filter *f) { if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6)) @@ -5108,6 +5196,7 @@ static void _usage(FILE *dest) " -6, --ipv6 display only IP version 6 sockets\n" " -0, --packet display PACKET sockets\n" " -t, --tcp display only TCP sockets\n" +" -M, --mptcp display only MPTCP sockets\n" " -S, --sctp display only SCTP sockets\n" " -u, --udp display only UDP sockets\n" " -d, --dccp display only DCCP sockets\n" @@ -5123,7 +5212,7 @@ static void _usage(FILE *dest) " -O, --oneline socket's data printed on a single line\n" "\n" " -A, --query=QUERY, --socket=QUERY\n" -" QUERY := {all|inet|tcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|netlink|vsock_stream|vsock_dgram|tipc}[,QUERY]\n" +" QUERY := {all|inet|tcp|mptcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|netlink|vsock_stream|vsock_dgram|tipc}[,QUERY]\n" "\n" " -D, --diag=FILE Dump raw information about TCP sockets to FILE\n" " -F, --filter=FILE read filter information from FILE\n" @@ -5250,6 +5339,7 @@ static const struct option long_opts[] = { { "kill", 0, 0, 'K' }, { "no-header", 0, 0, 'H' }, { "xdp", 0, 0, OPT_XDPSOCK}, + { "mptcp", 0, 0, 'M' }, { "oneline", 0, 0, 'O' }, { 0 } @@ -5266,7 +5356,7 @@ int main(int argc, char *argv[]) int state_filter = 0; while ((ch = getopt_long(argc, argv, - "dhaletuwxnro460spbEf:miA:D:F:vVzZN:KHSO", + "dhaletuwxnro460spbEf:mMiA:D:F:vVzZN:KHSO", long_opts, NULL)) != EOF) { switch (ch) { case 'n': @@ -5341,6 +5431,9 @@ int main(int argc, char *argv[]) case OPT_XDPSOCK: filter_af_set(¤t_filter, AF_XDP); break; + case 'M': + filter_db_set(¤t_filter, MPTCP_DB, true); + break; case 'f': if (strcmp(optarg, "inet") == 0) filter_af_set(¤t_filter, AF_INET); @@ -5566,6 +5659,8 @@ int main(int argc, char *argv[]) tipc_show(¤t_filter); if (current_filter.dbs & (1<= 0) { switch (opt) { case 'V': @@ -143,6 +145,9 @@ int main(int argc, char **argv) else show_details = true; break; + case 'r': + show_raw = true; + break; case 'j': json_output = 1; break; @@ -172,6 +177,7 @@ int main(int argc, char **argv) rd.show_driver_details = show_driver_details; rd.json_output = json_output; rd.pretty_output = pretty; + rd.show_raw = show_raw; err = rd_init(&rd, filename); if (err) diff --git a/rdma/rdma.h b/rdma/rdma.h index 735b1bf7..a6c6bdea 100644 --- a/rdma/rdma.h +++ b/rdma/rdma.h @@ -57,8 +57,9 @@ struct rd { int argc; char **argv; char *filename; - bool show_details; - bool show_driver_details; + uint8_t show_details:1; + uint8_t show_driver_details:1; + uint8_t show_raw:1; struct list_head dev_map_list; uint32_t dev_idx; uint32_t port_idx; @@ -134,9 +135,11 @@ int rd_attr_check(const struct nlattr *attr, int *typep); * Print helpers */ void print_driver_table(struct rd *rd, struct nlattr *tb); +void print_raw_data(struct rd *rd, struct nlattr **nla_line); void newline(struct rd *rd); void newline_indent(struct rd *rd); void print_on_off(struct rd *rd, const char *key_str, bool on); +void print_raw_data(struct rd *rd, struct nlattr **nla_line); #define MAX_LINE_LENGTH 80 #endif /* _RDMA_TOOL_H_ */ diff --git a/rdma/res-cq.c b/rdma/res-cq.c index e1efe3ba..313f929a 100644 --- a/rdma/res-cq.c +++ b/rdma/res-cq.c @@ -39,6 +39,20 @@ static void print_cq_dim_setting(struct rd *rd, struct nlattr *attr) print_on_off(rd, "adaptive-moderation", dim_setting); } +static int res_cq_line_raw(struct rd *rd, const char *name, int idx, + struct nlattr **nla_line) +{ + if (!nla_line[RDMA_NLDEV_ATTR_RES_RAW]) + return MNL_CB_ERROR; + + open_json_object(NULL); + print_dev(rd, idx, name); + print_raw_data(rd, nla_line); + newline(rd); + + return MNL_CB_OK; +} + static int res_cq_line(struct rd *rd, const char *name, int idx, struct nlattr **nla_line) { @@ -128,7 +142,8 @@ int res_cq_idx_parse_cb(const struct nlmsghdr *nlh, void *data) name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - return res_cq_line(rd, name, idx, tb); + return (rd->show_raw) ? res_cq_line_raw(rd, name, idx, tb) : + res_cq_line(rd, name, idx, tb); } int res_cq_parse_cb(const struct nlmsghdr *nlh, void *data) @@ -156,7 +171,8 @@ int res_cq_parse_cb(const struct nlmsghdr *nlh, void *data) if (ret != MNL_CB_OK) break; - ret = res_cq_line(rd, name, idx, nla_line); + ret = (rd->show_raw) ? res_cq_line_raw(rd, name, idx, nla_line) : + res_cq_line(rd, name, idx, nla_line); if (ret != MNL_CB_OK) break; diff --git a/rdma/res-mr.c b/rdma/res-mr.c index c1366035..1bf73f3a 100644 --- a/rdma/res-mr.c +++ b/rdma/res-mr.c @@ -7,6 +7,20 @@ #include "res.h" #include +static int res_mr_line_raw(struct rd *rd, const char *name, int idx, + struct nlattr **nla_line) +{ + if (!nla_line[RDMA_NLDEV_ATTR_RES_RAW]) + return MNL_CB_ERROR; + + open_json_object(NULL); + print_dev(rd, idx, name); + print_raw_data(rd, nla_line); + newline(rd); + + return MNL_CB_OK; +} + static int res_mr_line(struct rd *rd, const char *name, int idx, struct nlattr **nla_line) { @@ -69,6 +83,7 @@ static int res_mr_line(struct rd *rd, const char *name, int idx, print_comm(rd, comm, nla_line); print_driver_table(rd, nla_line[RDMA_NLDEV_ATTR_DRIVER]); + print_raw_data(rd, nla_line); newline(rd); out: @@ -91,7 +106,8 @@ int res_mr_idx_parse_cb(const struct nlmsghdr *nlh, void *data) name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - return res_mr_line(rd, name, idx, tb); + return (rd->show_raw) ? res_mr_line_raw(rd, name, idx, tb) : + res_mr_line(rd, name, idx, tb); } int res_mr_parse_cb(const struct nlmsghdr *nlh, void *data) @@ -119,7 +135,8 @@ int res_mr_parse_cb(const struct nlmsghdr *nlh, void *data) if (ret != MNL_CB_OK) break; - ret = res_mr_line(rd, name, idx, nla_line); + ret = (rd->show_raw) ? res_mr_line_raw(rd, name, idx, nla_line) : + res_mr_line(rd, name, idx, nla_line); if (ret != MNL_CB_OK) break; } diff --git a/rdma/res-qp.c b/rdma/res-qp.c index 801cfca9..a38be399 100644 --- a/rdma/res-qp.c +++ b/rdma/res-qp.c @@ -64,6 +64,20 @@ static void print_pathmig(struct rd *rd, uint32_t val, struct nlattr **nla_line) "path-mig-state %s ", path_mig_to_str(val)); } +static int res_qp_line_raw(struct rd *rd, const char *name, int idx, + struct nlattr **nla_line) +{ + if (!nla_line[RDMA_NLDEV_ATTR_RES_RAW]) + return MNL_CB_ERROR; + + open_json_object(NULL); + print_link(rd, idx, name, rd->port_idx, nla_line); + print_raw_data(rd, nla_line); + newline(rd); + + return MNL_CB_OK; +} + static int res_qp_line(struct rd *rd, const char *name, int idx, struct nlattr **nla_line) { @@ -184,7 +198,8 @@ int res_qp_idx_parse_cb(const struct nlmsghdr *nlh, void *data) name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]); idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - return res_qp_line(rd, name, idx, tb); + return (rd->show_raw) ? res_qp_line_raw(rd, name, idx, tb) : + res_qp_line(rd, name, idx, tb); } int res_qp_parse_cb(const struct nlmsghdr *nlh, void *data) @@ -212,7 +227,8 @@ int res_qp_parse_cb(const struct nlmsghdr *nlh, void *data) if (ret != MNL_CB_OK) break; - ret = res_qp_line(rd, name, idx, nla_line); + ret = (rd->show_raw) ? res_qp_line_raw(rd, name, idx, nla_line) : + res_qp_line(rd, name, idx, nla_line); if (ret != MNL_CB_OK) break; } diff --git a/rdma/res.h b/rdma/res.h index 525171fc..70ce5758 100644 --- a/rdma/res.h +++ b/rdma/res.h @@ -23,24 +23,44 @@ int res_cm_id_idx_parse_cb(const struct nlmsghdr *nlh, void *data); int res_qp_parse_cb(const struct nlmsghdr *nlh, void *data); int res_qp_idx_parse_cb(const struct nlmsghdr *nlh, void *data); +static inline uint32_t res_get_command(uint32_t command, struct rd *rd) +{ + if (!rd->show_raw) + return command; + + switch (command) { + case RDMA_NLDEV_CMD_RES_QP_GET: + return RDMA_NLDEV_CMD_RES_QP_GET_RAW; + case RDMA_NLDEV_CMD_RES_CQ_GET: + return RDMA_NLDEV_CMD_RES_CQ_GET_RAW; + case RDMA_NLDEV_CMD_RES_MR_GET: + return RDMA_NLDEV_CMD_RES_MR_GET_RAW; + default: + return command; + } +} + #define RES_FUNC(name, command, valid_filters, strict_port, id) \ static inline int _##name(struct rd *rd) \ { \ - uint32_t idx; \ + uint32_t idx, _command; \ int ret; \ + _command = res_get_command(command, rd); \ if (id) { \ ret = rd_doit_index(rd, &idx); \ if (ret) { \ rd->suppress_errors = true; \ - ret = _res_send_idx_msg(rd, command, \ + ret = _res_send_idx_msg(rd, _command, \ name##_idx_parse_cb, \ idx, id); \ - if (!ret) \ + if (!ret || rd->show_raw) \ return ret; \ - /* Fallback for old systems without .doit callbacks */ \ + /* Fallback for old systems without .doit callbacks. \ + * Kernel that supports raw, for sure supports doit. \ + */ \ } \ } \ - return _res_send_msg(rd, command, name##_parse_cb); \ + return _res_send_msg(rd, _command, name##_parse_cb); \ } \ static inline int name(struct rd *rd) \ { \ diff --git a/rdma/utils.c b/rdma/utils.c index e25c3adf..4d3de4fa 100644 --- a/rdma/utils.c +++ b/rdma/utils.c @@ -450,6 +450,7 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_STAT_RES] = MNL_TYPE_U32, [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = MNL_TYPE_U32, [RDMA_NLDEV_ATTR_DEV_DIM] = MNL_TYPE_U8, + [RDMA_NLDEV_ATTR_RES_RAW] = MNL_TYPE_BINARY, }; int rd_attr_check(const struct nlattr *attr, int *typep) @@ -890,6 +891,25 @@ static int print_driver_entry(struct rd *rd, struct nlattr *key_attr, return ret; } +void print_raw_data(struct rd *rd, struct nlattr **nla_line) +{ + uint8_t *data; + uint32_t len; + int i = 0; + + if (!rd->show_raw) + return; + + len = mnl_attr_get_payload_len(nla_line[RDMA_NLDEV_ATTR_RES_RAW]); + data = mnl_attr_get_payload(nla_line[RDMA_NLDEV_ATTR_RES_RAW]); + open_json_array(PRINT_JSON, "data"); + while (i < len) { + print_color_uint(PRINT_ANY, COLOR_NONE, NULL, "%d", data[i]); + i++; + } + close_json_array(PRINT_ANY, ">"); +} + void print_driver_table(struct rd *rd, struct nlattr *tb) { int print_type = RDMA_NLDEV_PRINT_TYPE_UNSPEC; diff --git a/tc/Makefile b/tc/Makefile index 79c9c1dd..5a517af2 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -122,6 +122,7 @@ TCLIB += tc_red.o TCLIB += tc_cbq.o TCLIB += tc_estimator.o TCLIB += tc_stab.o +TCLIB += tc_qevent.o CFLAGS += -DCONFIG_GACT -DCONFIG_GACT_PROB ifneq ($(IPT_LIB_DIR),) diff --git a/tc/m_police.c b/tc/m_police.c index a5bc20c0..83b25db4 100644 --- a/tc/m_police.c +++ b/tc/m_police.c @@ -161,24 +161,24 @@ action_ctrl_ok: return -1; /* Must at least do late binding, use TB or ewma policing */ - if (!rate64 && !avrate && !p.index) { - fprintf(stderr, "\"rate\" or \"avrate\" MUST be specified.\n"); + if (!rate64 && !avrate && !p.index && !mtu) { + fprintf(stderr, "'rate' or 'avrate' or 'mtu' MUST be specified.\n"); return -1; } /* When the TB policer is used, burst is required */ if (rate64 && !buffer && !avrate) { - fprintf(stderr, "\"burst\" requires \"rate\".\n"); + fprintf(stderr, "'burst' requires 'rate'.\n"); return -1; } if (prate64) { if (!rate64) { - fprintf(stderr, "\"peakrate\" requires \"rate\".\n"); + fprintf(stderr, "'peakrate' requires 'rate'.\n"); return -1; } if (!mtu) { - fprintf(stderr, "\"mtu\" is required, if \"peakrate\" is requested.\n"); + fprintf(stderr, "'mtu' is required, if 'peakrate' is requested.\n"); return -1; } } diff --git a/tc/q_red.c b/tc/q_red.c index 53181c82..df788f8f 100644 --- a/tc/q_red.c +++ b/tc/q_red.c @@ -22,6 +22,7 @@ #include "utils.h" #include "tc_util.h" +#include "tc_qevent.h" #include "tc_red.h" @@ -30,11 +31,20 @@ static void explain(void) fprintf(stderr, "Usage: ... red limit BYTES [min BYTES] [max BYTES] avpkt BYTES [burst PACKETS]\n" " [adaptive] [probability PROBABILITY] [bandwidth KBPS]\n" - " [ecn] [harddrop] [nodrop]\n"); + " [ecn] [harddrop] [nodrop]\n" + " [qevent early_drop block IDX] [qevent mark block IDX]\n"); } #define RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP) +static struct qevent_plain qe_early_drop = {}; +static struct qevent_plain qe_mark = {}; +static struct qevent_util qevents[] = { + QEVENT("early_drop", plain, &qe_early_drop, TCA_RED_EARLY_DROP_BLOCK), + QEVENT("mark", plain, &qe_mark, TCA_RED_MARK_BLOCK), + {}, +}; + static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n, const char *dev) { @@ -51,6 +61,8 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, __u32 max_P; struct rtattr *tail; + qevents_init(qevents); + while (argc > 0) { if (strcmp(*argv, "limit") == 0) { NEXT_ARG(); @@ -109,6 +121,11 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, flags_bf.value |= TC_RED_ADAPTATIVE; } else if (strcmp(*argv, "adaptive") == 0) { flags_bf.value |= TC_RED_ADAPTATIVE; + } else if (matches(*argv, "qevent") == 0) { + NEXT_ARG(); + if (qevent_parse(qevents, &argc, &argv)) + return -1; + continue; } else if (strcmp(*argv, "help") == 0) { explain(); return -1; @@ -162,6 +179,8 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv, max_P = probability * pow(2, 32); addattr_l(n, 1024, TCA_RED_MAX_P, &max_P, sizeof(max_P)); addattr_l(n, 1024, TCA_RED_FLAGS, &flags_bf, sizeof(flags_bf)); + if (qevents_dump(qevents, n)) + return -1; addattr_nest_end(n, tail); return 0; } @@ -217,6 +236,11 @@ static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) print_uint(PRINT_ANY, "Scell_log", "Scell_log %u", qopt->Scell_log); } + + qevents_init(qevents); + if (qevents_read(qevents, tb)) + return -1; + qevents_print(qevents, f); return 0; } @@ -240,10 +264,27 @@ static int red_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstat return 0; } +static int red_has_block(struct qdisc_util *qu, struct rtattr *opt, __u32 block_idx, bool *p_has) +{ + struct rtattr *tb[TCA_RED_MAX + 1]; + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_RED_MAX, opt); + + qevents_init(qevents); + if (qevents_read(qevents, tb)) + return -1; + + *p_has = qevents_have_block(qevents, block_idx); + return 0; +} struct qdisc_util red_qdisc_util = { .id = "red", .parse_qopt = red_parse_opt, .print_qopt = red_print_opt, .print_xstats = red_print_xstats, + .has_block = red_has_block, }; diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c index 8eb08c34..b79029d9 100644 --- a/tc/tc_qdisc.c +++ b/tc/tc_qdisc.c @@ -478,6 +478,9 @@ static int tc_qdisc_block_exists_cb(struct nlmsghdr *n, void *arg) struct tcmsg *t = NLMSG_DATA(n); struct rtattr *tb[TCA_MAX+1]; int len = n->nlmsg_len; + struct qdisc_util *q; + const char *kind; + int err; if (n->nlmsg_type != RTM_NEWQDISC) return 0; @@ -506,6 +509,21 @@ static int tc_qdisc_block_exists_cb(struct nlmsghdr *n, void *arg) if (block == ctx->block_index) ctx->found = true; } + + kind = rta_getattr_str(tb[TCA_KIND]); + q = get_qdisc_kind(kind); + if (!q) + return -1; + if (q->has_block) { + bool found = false; + + err = q->has_block(q, tb[TCA_OPTIONS], ctx->block_index, &found); + if (err) + return err; + if (found) + ctx->found = true; + } + return 0; } diff --git a/tc/tc_qevent.c b/tc/tc_qevent.c new file mode 100644 index 00000000..34568070 --- /dev/null +++ b/tc/tc_qevent.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Helpers for handling qevents. + */ + +#include +#include + +#include "tc_qevent.h" +#include "utils.h" + +void qevents_init(struct qevent_util *qevents) +{ + if (!qevents) + return; + + for (; qevents->id; qevents++) + memset(qevents->data, 0, qevents->data_size); +} + +int qevent_parse(struct qevent_util *qevents, int *p_argc, char ***p_argv) +{ + char **argv = *p_argv; + int argc = *p_argc; + const char *name = *argv; + int err; + + if (!qevents) + goto out; + + for (; qevents->id; qevents++) { + if (strcmp(name, qevents->id) == 0) { + NEXT_ARG(); + err = qevents->parse_qevent(qevents, &argc, &argv); + if (err) + return err; + + *p_argc = argc; + *p_argv = argv; + return 0; + } + } + +out: + fprintf(stderr, "Unknown qevent `%s'\n", name); + return -1; +} + +int qevents_read(struct qevent_util *qevents, struct rtattr **tb) +{ + int err; + + if (!qevents) + return 0; + + for (; qevents->id; qevents++) { + if (tb[qevents->attr]) { + err = qevents->read_qevent(qevents, tb); + if (err) + return err; + } + } + + return 0; +} + +void qevents_print(struct qevent_util *qevents, FILE *f) +{ + int first = true; + + if (!qevents) + return; + + for (; qevents->id; qevents++) { + struct qevent_base *qeb = qevents->data; + + if (qeb->block_idx) { + if (first) { + open_json_array(PRINT_JSON, "qevents"); + first = false; + } + + open_json_object(NULL); + print_string(PRINT_ANY, "kind", "qevent %s", qevents->id); + qevents->print_qevent(qevents, f); + print_string(PRINT_FP, NULL, "%s", " "); + close_json_object(); + } + } + + if (!first) + close_json_array(PRINT_ANY, ""); +} + +bool qevents_have_block(struct qevent_util *qevents, __u32 block_idx) +{ + if (!qevents) + return false; + + for (; qevents->id; qevents++) { + struct qevent_base *qeb = qevents->data; + + if (qeb->block_idx == block_idx) + return true; + } + + return false; +} + +int qevents_dump(struct qevent_util *qevents, struct nlmsghdr *n) +{ + int err; + + if (!qevents) + return 0; + + for (; qevents->id; qevents++) { + struct qevent_base *qeb = qevents->data; + + if (qeb->block_idx) { + err = qevents->dump_qevent(qevents, n); + if (err) + return err; + } + } + + return 0; +} + +static int parse_block_idx(const char *arg, struct qevent_base *qeb) +{ + if (qeb->block_idx) { + fprintf(stderr, "Qevent block index already specified\n"); + return -1; + } + + if (get_unsigned(&qeb->block_idx, arg, 10) || !qeb->block_idx) { + fprintf(stderr, "Illegal qevent block index\n"); + return -1; + } + + return 0; +} + +static int read_block_idx(struct rtattr *attr, struct qevent_base *qeb) +{ + if (qeb->block_idx) { + fprintf(stderr, "Qevent block index already specified\n"); + return -1; + } + + qeb->block_idx = rta_getattr_u32(attr); + if (!qeb->block_idx) { + fprintf(stderr, "Illegal qevent block index\n"); + return -1; + } + + return 0; +} + +static void print_block_idx(FILE *f, __u32 block_idx) +{ + print_uint(PRINT_ANY, "block", " block %u", block_idx); +} + +int qevent_parse_plain(struct qevent_util *qu, int *p_argc, char ***p_argv) +{ + struct qevent_plain *qe = qu->data; + char **argv = *p_argv; + int argc = *p_argc; + + if (qe->base.block_idx) { + fprintf(stderr, "Duplicate qevent\n"); + return -1; + } + + while (argc > 0) { + if (strcmp(*argv, "block") == 0) { + NEXT_ARG(); + if (parse_block_idx(*argv, &qe->base)) + return -1; + } else { + break; + } + NEXT_ARG_FWD(); + } + + if (!qe->base.block_idx) { + fprintf(stderr, "Unspecified qevent block index\n"); + return -1; + } + + *p_argc = argc; + *p_argv = argv; + return 0; +} + +int qevent_read_plain(struct qevent_util *qu, struct rtattr **tb) +{ + struct qevent_plain *qe = qu->data; + + return read_block_idx(tb[qu->attr], &qe->base); +} + +void qevent_print_plain(struct qevent_util *qu, FILE *f) +{ + struct qevent_plain *qe = qu->data; + + print_block_idx(f, qe->base.block_idx); +} + +int qevent_dump_plain(struct qevent_util *qu, struct nlmsghdr *n) +{ + struct qevent_plain *qe = qu->data; + + return addattr32(n, 1024, qu->attr, qe->base.block_idx); +} diff --git a/tc/tc_qevent.h b/tc/tc_qevent.h new file mode 100644 index 00000000..d60c3f75 --- /dev/null +++ b/tc/tc_qevent.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TC_QEVENT_H_ +#define _TC_QEVENT_H_ + +#include +#include +#include + +struct qevent_base { + __u32 block_idx; +}; + +struct qevent_util { + const char *id; + int (*parse_qevent)(struct qevent_util *qu, int *argc, char ***argv); + int (*read_qevent)(struct qevent_util *qu, struct rtattr **tb); + void (*print_qevent)(struct qevent_util *qu, FILE *f); + int (*dump_qevent)(struct qevent_util *qu, struct nlmsghdr *n); + size_t data_size; + void *data; + int attr; +}; + +#define QEVENT(_name, _form, _data, _attr) \ + { \ + .id = _name, \ + .parse_qevent = qevent_parse_##_form, \ + .read_qevent = qevent_read_##_form, \ + .print_qevent = qevent_print_##_form, \ + .dump_qevent = qevent_dump_##_form, \ + .data_size = sizeof(struct qevent_##_form), \ + .data = _data, \ + .attr = _attr, \ + } + +void qevents_init(struct qevent_util *qevents); +int qevent_parse(struct qevent_util *qevents, int *p_argc, char ***p_argv); +int qevents_read(struct qevent_util *qevents, struct rtattr **tb); +int qevents_dump(struct qevent_util *qevents, struct nlmsghdr *n); +void qevents_print(struct qevent_util *qevents, FILE *f); +bool qevents_have_block(struct qevent_util *qevents, __u32 block_idx); + +struct qevent_plain { + struct qevent_base base; +}; +int qevent_parse_plain(struct qevent_util *qu, int *p_argc, char ***p_argv); +int qevent_read_plain(struct qevent_util *qu, struct rtattr **tb); +void qevent_print_plain(struct qevent_util *qu, FILE *f); +int qevent_dump_plain(struct qevent_util *qu, struct nlmsghdr *n); + +#endif diff --git a/tc/tc_util.h b/tc/tc_util.h index edc39138..c8af4e95 100644 --- a/tc/tc_util.h +++ b/tc/tc_util.h @@ -5,6 +5,7 @@ #define MAX_MSG 16384 #include #include +#include #include #include @@ -40,6 +41,7 @@ struct qdisc_util { int (*parse_copt)(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n, const char *dev); int (*print_copt)(struct qdisc_util *qu, FILE *f, struct rtattr *opt); + int (*has_block)(struct qdisc_util *qu, struct rtattr *opt, __u32 block_idx, bool *p_has); }; extern __u16 f_proto;