1 // SPDX-License-Identifier: GPL-2.0
3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6 * Vinicius Costa Gomes <vinicius.gomes@intel.com>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/errqueue.h>
15 #include <linux/rbtree.h>
16 #include <linux/skbuff.h>
17 #include <linux/posix-timers.h>
18 #include <net/netlink.h>
19 #include <net/sch_generic.h>
20 #include <net/pkt_sched.h>
23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
25 #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
27 struct etf_sched_data
{
33 s32 delta
; /* in ns */
34 ktime_t last
; /* The txtime of the last skb sent to the netdevice. */
35 struct rb_root_cached head
;
36 struct qdisc_watchdog watchdog
;
37 ktime_t (*get_time
)(void);
40 static const struct nla_policy etf_policy
[TCA_ETF_MAX
+ 1] = {
41 [TCA_ETF_PARMS
] = { .len
= sizeof(struct tc_etf_qopt
) },
44 static inline int validate_input_params(struct tc_etf_qopt
*qopt
,
45 struct netlink_ext_ack
*extack
)
47 /* Check if params comply to the following rules:
48 * * Clockid and delta must be valid.
50 * * Dynamic clockids are not supported.
52 * * Delta must be a positive integer.
54 * Also note that for the HW offload case, we must
55 * expect that system clocks have been synchronized to PHC.
57 if (qopt
->clockid
< 0) {
58 NL_SET_ERR_MSG(extack
, "Dynamic clockids are not supported");
62 if (qopt
->clockid
!= CLOCK_TAI
) {
63 NL_SET_ERR_MSG(extack
, "Invalid clockid. CLOCK_TAI must be used");
67 if (qopt
->delta
< 0) {
68 NL_SET_ERR_MSG(extack
, "Delta must be positive");
75 static bool is_packet_valid(struct Qdisc
*sch
, struct sk_buff
*nskb
)
77 struct etf_sched_data
*q
= qdisc_priv(sch
);
78 ktime_t txtime
= nskb
->tstamp
;
79 struct sock
*sk
= nskb
->sk
;
82 if (q
->skip_sock_check
)
88 if (!sock_flag(sk
, SOCK_TXTIME
))
91 /* We don't perform crosstimestamping.
92 * Drop if packet's clockid differs from qdisc's.
94 if (sk
->sk_clockid
!= q
->clockid
)
97 if (sk
->sk_txtime_deadline_mode
!= q
->deadline_mode
)
102 if (ktime_before(txtime
, now
) || ktime_before(txtime
, q
->last
))
108 static struct sk_buff
*etf_peek_timesortedlist(struct Qdisc
*sch
)
110 struct etf_sched_data
*q
= qdisc_priv(sch
);
113 p
= rb_first_cached(&q
->head
);
120 static void reset_watchdog(struct Qdisc
*sch
)
122 struct etf_sched_data
*q
= qdisc_priv(sch
);
123 struct sk_buff
*skb
= etf_peek_timesortedlist(sch
);
127 qdisc_watchdog_cancel(&q
->watchdog
);
131 next
= ktime_sub_ns(skb
->tstamp
, q
->delta
);
132 qdisc_watchdog_schedule_ns(&q
->watchdog
, ktime_to_ns(next
));
135 static void report_sock_error(struct sk_buff
*skb
, u32 err
, u8 code
)
137 struct sock_exterr_skb
*serr
;
138 struct sk_buff
*clone
;
139 ktime_t txtime
= skb
->tstamp
;
141 if (!skb
->sk
|| !(skb
->sk
->sk_txtime_report_errors
))
144 clone
= skb_clone(skb
, GFP_ATOMIC
);
148 serr
= SKB_EXT_ERR(clone
);
149 serr
->ee
.ee_errno
= err
;
150 serr
->ee
.ee_origin
= SO_EE_ORIGIN_TXTIME
;
151 serr
->ee
.ee_type
= 0;
152 serr
->ee
.ee_code
= code
;
154 serr
->ee
.ee_data
= (txtime
>> 32); /* high part of tstamp */
155 serr
->ee
.ee_info
= txtime
; /* low part of tstamp */
157 if (sock_queue_err_skb(skb
->sk
, clone
))
161 static int etf_enqueue_timesortedlist(struct sk_buff
*nskb
, struct Qdisc
*sch
,
162 struct sk_buff
**to_free
)
164 struct etf_sched_data
*q
= qdisc_priv(sch
);
165 struct rb_node
**p
= &q
->head
.rb_root
.rb_node
, *parent
= NULL
;
166 ktime_t txtime
= nskb
->tstamp
;
167 bool leftmost
= true;
169 if (!is_packet_valid(sch
, nskb
)) {
170 report_sock_error(nskb
, EINVAL
,
171 SO_EE_CODE_TXTIME_INVALID_PARAM
);
172 return qdisc_drop(nskb
, sch
, to_free
);
179 skb
= rb_to_skb(parent
);
180 if (ktime_after(txtime
, skb
->tstamp
)) {
181 p
= &parent
->rb_right
;
184 p
= &parent
->rb_left
;
187 rb_link_node(&nskb
->rbnode
, parent
, p
);
188 rb_insert_color_cached(&nskb
->rbnode
, &q
->head
, leftmost
);
190 qdisc_qstats_backlog_inc(sch
, nskb
);
193 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
196 return NET_XMIT_SUCCESS
;
199 static void timesortedlist_drop(struct Qdisc
*sch
, struct sk_buff
*skb
,
202 struct etf_sched_data
*q
= qdisc_priv(sch
);
203 struct sk_buff
*to_free
= NULL
;
204 struct sk_buff
*tmp
= NULL
;
206 skb_rbtree_walk_from_safe(skb
, tmp
) {
207 if (ktime_after(skb
->tstamp
, now
))
210 rb_erase_cached(&skb
->rbnode
, &q
->head
);
212 /* The rbnode field in the skb re-uses these fields, now that
213 * we are done with the rbnode, reset them.
217 skb
->dev
= qdisc_dev(sch
);
219 report_sock_error(skb
, ECANCELED
, SO_EE_CODE_TXTIME_MISSED
);
221 qdisc_qstats_backlog_dec(sch
, skb
);
222 qdisc_drop(skb
, sch
, &to_free
);
223 qdisc_qstats_overlimit(sch
);
227 kfree_skb_list(to_free
);
230 static void timesortedlist_remove(struct Qdisc
*sch
, struct sk_buff
*skb
)
232 struct etf_sched_data
*q
= qdisc_priv(sch
);
234 rb_erase_cached(&skb
->rbnode
, &q
->head
);
236 /* The rbnode field in the skb re-uses these fields, now that
237 * we are done with the rbnode, reset them.
241 skb
->dev
= qdisc_dev(sch
);
243 qdisc_qstats_backlog_dec(sch
, skb
);
245 qdisc_bstats_update(sch
, skb
);
247 q
->last
= skb
->tstamp
;
252 static struct sk_buff
*etf_dequeue_timesortedlist(struct Qdisc
*sch
)
254 struct etf_sched_data
*q
= qdisc_priv(sch
);
258 skb
= etf_peek_timesortedlist(sch
);
264 /* Drop if packet has expired while in queue. */
265 if (ktime_before(skb
->tstamp
, now
)) {
266 timesortedlist_drop(sch
, skb
, now
);
271 /* When in deadline mode, dequeue as soon as possible and change the
272 * txtime from deadline to (now + delta).
274 if (q
->deadline_mode
) {
275 timesortedlist_remove(sch
, skb
);
280 next
= ktime_sub_ns(skb
->tstamp
, q
->delta
);
282 /* Dequeue only if now is within the [txtime - delta, txtime] range. */
283 if (ktime_after(now
, next
))
284 timesortedlist_remove(sch
, skb
);
289 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
295 static void etf_disable_offload(struct net_device
*dev
,
296 struct etf_sched_data
*q
)
298 struct tc_etf_qopt_offload etf
= { };
299 const struct net_device_ops
*ops
;
305 ops
= dev
->netdev_ops
;
306 if (!ops
->ndo_setup_tc
)
309 etf
.queue
= q
->queue
;
312 err
= ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_ETF
, &etf
);
314 pr_warn("Couldn't disable ETF offload for queue %d\n",
318 static int etf_enable_offload(struct net_device
*dev
, struct etf_sched_data
*q
,
319 struct netlink_ext_ack
*extack
)
321 const struct net_device_ops
*ops
= dev
->netdev_ops
;
322 struct tc_etf_qopt_offload etf
= { };
328 if (!ops
->ndo_setup_tc
) {
329 NL_SET_ERR_MSG(extack
, "Specified device does not support ETF offload");
333 etf
.queue
= q
->queue
;
336 err
= ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_ETF
, &etf
);
338 NL_SET_ERR_MSG(extack
, "Specified device failed to setup ETF hardware offload");
345 static int etf_init(struct Qdisc
*sch
, struct nlattr
*opt
,
346 struct netlink_ext_ack
*extack
)
348 struct etf_sched_data
*q
= qdisc_priv(sch
);
349 struct net_device
*dev
= qdisc_dev(sch
);
350 struct nlattr
*tb
[TCA_ETF_MAX
+ 1];
351 struct tc_etf_qopt
*qopt
;
355 NL_SET_ERR_MSG(extack
,
356 "Missing ETF qdisc options which are mandatory");
360 err
= nla_parse_nested_deprecated(tb
, TCA_ETF_MAX
, opt
, etf_policy
,
365 if (!tb
[TCA_ETF_PARMS
]) {
366 NL_SET_ERR_MSG(extack
, "Missing mandatory ETF parameters");
370 qopt
= nla_data(tb
[TCA_ETF_PARMS
]);
372 pr_debug("delta %d clockid %d offload %s deadline %s\n",
373 qopt
->delta
, qopt
->clockid
,
374 OFFLOAD_IS_ON(qopt
) ? "on" : "off",
375 DEADLINE_MODE_IS_ON(qopt
) ? "on" : "off");
377 err
= validate_input_params(qopt
, extack
);
381 q
->queue
= sch
->dev_queue
- netdev_get_tx_queue(dev
, 0);
383 if (OFFLOAD_IS_ON(qopt
)) {
384 err
= etf_enable_offload(dev
, q
, extack
);
389 /* Everything went OK, save the parameters used. */
390 q
->delta
= qopt
->delta
;
391 q
->clockid
= qopt
->clockid
;
392 q
->offload
= OFFLOAD_IS_ON(qopt
);
393 q
->deadline_mode
= DEADLINE_MODE_IS_ON(qopt
);
394 q
->skip_sock_check
= SKIP_SOCK_CHECK_IS_SET(qopt
);
396 switch (q
->clockid
) {
398 q
->get_time
= ktime_get_real
;
400 case CLOCK_MONOTONIC
:
401 q
->get_time
= ktime_get
;
404 q
->get_time
= ktime_get_boottime
;
407 q
->get_time
= ktime_get_clocktai
;
410 NL_SET_ERR_MSG(extack
, "Clockid is not supported");
414 qdisc_watchdog_init_clockid(&q
->watchdog
, sch
, q
->clockid
);
419 static void timesortedlist_clear(struct Qdisc
*sch
)
421 struct etf_sched_data
*q
= qdisc_priv(sch
);
422 struct rb_node
*p
= rb_first_cached(&q
->head
);
425 struct sk_buff
*skb
= rb_to_skb(p
);
429 rb_erase_cached(&skb
->rbnode
, &q
->head
);
430 rtnl_kfree_skbs(skb
, skb
);
435 static void etf_reset(struct Qdisc
*sch
)
437 struct etf_sched_data
*q
= qdisc_priv(sch
);
439 /* Only cancel watchdog if it's been initialized. */
440 if (q
->watchdog
.qdisc
== sch
)
441 qdisc_watchdog_cancel(&q
->watchdog
);
443 /* No matter which mode we are on, it's safe to clear both lists. */
444 timesortedlist_clear(sch
);
445 __qdisc_reset_queue(&sch
->q
);
447 sch
->qstats
.backlog
= 0;
453 static void etf_destroy(struct Qdisc
*sch
)
455 struct etf_sched_data
*q
= qdisc_priv(sch
);
456 struct net_device
*dev
= qdisc_dev(sch
);
458 /* Only cancel watchdog if it's been initialized. */
459 if (q
->watchdog
.qdisc
== sch
)
460 qdisc_watchdog_cancel(&q
->watchdog
);
462 etf_disable_offload(dev
, q
);
465 static int etf_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
467 struct etf_sched_data
*q
= qdisc_priv(sch
);
468 struct tc_etf_qopt opt
= { };
471 nest
= nla_nest_start_noflag(skb
, TCA_OPTIONS
);
473 goto nla_put_failure
;
475 opt
.delta
= q
->delta
;
476 opt
.clockid
= q
->clockid
;
478 opt
.flags
|= TC_ETF_OFFLOAD_ON
;
480 if (q
->deadline_mode
)
481 opt
.flags
|= TC_ETF_DEADLINE_MODE_ON
;
483 if (q
->skip_sock_check
)
484 opt
.flags
|= TC_ETF_SKIP_SOCK_CHECK
;
486 if (nla_put(skb
, TCA_ETF_PARMS
, sizeof(opt
), &opt
))
487 goto nla_put_failure
;
489 return nla_nest_end(skb
, nest
);
492 nla_nest_cancel(skb
, nest
);
496 static struct Qdisc_ops etf_qdisc_ops __read_mostly
= {
498 .priv_size
= sizeof(struct etf_sched_data
),
499 .enqueue
= etf_enqueue_timesortedlist
,
500 .dequeue
= etf_dequeue_timesortedlist
,
501 .peek
= etf_peek_timesortedlist
,
504 .destroy
= etf_destroy
,
506 .owner
= THIS_MODULE
,
509 static int __init
etf_module_init(void)
511 return register_qdisc(&etf_qdisc_ops
);
514 static void __exit
etf_module_exit(void)
516 unregister_qdisc(&etf_qdisc_ops
);
518 module_init(etf_module_init
)
519 module_exit(etf_module_exit
)
520 MODULE_LICENSE("GPL");