kernel/bpf/cgroup.c

   1 /*
   2  * Functions to manage eBPF programs attached to cgroups
   3  *
   4  * Copyright (c) 2016 Daniel Mack
   5  *
   6  * This file is subject to the terms and conditions of version 2 of the GNU
   7  * General Public License.  See the file COPYING in the main directory of the
   8  * Linux distribution for more details.
   9  */
  10
  11 #include <linux/kernel.h>
  12 #include <linux/atomic.h>
  13 #include <linux/cgroup.h>
  14 #include <linux/slab.h>
  15 #include <linux/bpf.h>
  16 #include <linux/bpf-cgroup.h>
  17 #include <net/sock.h>
  18
  19 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
  20 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
  21
  22 /**
  23  * cgroup_bpf_put() - put references of all bpf programs
  24  * @cgrp: the cgroup to modify
  25  */
  26 void cgroup_bpf_put(struct cgroup *cgrp)
  27 {
  28         unsigned int type;
  29
  30         for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
  31                 struct bpf_prog *prog = cgrp->bpf.prog[type];
  32
  33                 if (prog) {
  34                         bpf_prog_put(prog);
  35                         static_branch_dec(&cgroup_bpf_enabled_key);
  36                 }
  37         }
  38 }
  39
  40 /**
  41  * cgroup_bpf_inherit() - inherit effective programs from parent
  42  * @cgrp: the cgroup to modify
  43  * @parent: the parent to inherit from
  44  */
  45 void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
  46 {
  47         unsigned int type;
  48
  49         for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
  50                 struct bpf_prog *e;
  51
  52                 e = rcu_dereference_protected(parent->bpf.effective[type],
  53                                               lockdep_is_held(&cgroup_mutex));
  54                 rcu_assign_pointer(cgrp->bpf.effective[type], e);
  55                 cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
  56         }
  57 }
  58
  59 /**
  60  * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
  61  *                         propagate the change to descendants
  62  * @cgrp: The cgroup which descendants to traverse
  63  * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
  64  * @prog: A new program to pin
  65  * @type: Type of pinning operation (ingress/egress)
  66  *
  67  * Each cgroup has a set of two pointers for bpf programs; one for eBPF
  68  * programs it owns, and which is effective for execution.
  69  *
  70  * If @prog is not %NULL, this function attaches a new program to the cgroup
  71  * and releases the one that is currently attached, if any. @prog is then made
  72  * the effective program of type @type in that cgroup.
  73  *
  74  * If @prog is %NULL, the currently attached program of type @type is released,
  75  * and the effective program of the parent cgroup (if any) is inherited to
  76  * @cgrp.
  77  *
  78  * Then, the descendants of @cgrp are walked and the effective program for
  79  * each of them is set to the effective program of @cgrp unless the
  80  * descendant has its own program attached, in which case the subbranch is
  81  * skipped. This ensures that delegated subcgroups with own programs are left
  82  * untouched.
  83  *
  84  * Must be called with cgroup_mutex held.
  85  */
  86 int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
  87                         struct bpf_prog *prog, enum bpf_attach_type type,
  88                         bool new_overridable)
  89 {
  90         struct bpf_prog *old_prog, *effective = NULL;
  91         struct cgroup_subsys_state *pos;
  92         bool overridable = true;
  93
  94         if (parent) {
  95                 overridable = !parent->bpf.disallow_override[type];
  96                 effective = rcu_dereference_protected(parent->bpf.effective[type],
  97                                                       lockdep_is_held(&cgroup_mutex));
  98         }
  99
 100         if (prog && effective && !overridable)
 101                 /* if parent has non-overridable prog attached, disallow
 102                  * attaching new programs to descendent cgroup
 103                  */
 104                 return -EPERM;
 105
 106         if (prog && effective && overridable != new_overridable)
 107                 /* if parent has overridable prog attached, only
 108                  * allow overridable programs in descendent cgroup
 109                  */
 110                 return -EPERM;
 111
 112         old_prog = cgrp->bpf.prog[type];
 113
 114         if (prog) {
 115                 overridable = new_overridable;
 116                 effective = prog;
 117                 if (old_prog &&
 118                     cgrp->bpf.disallow_override[type] == new_overridable)
 119                         /* disallow attaching non-overridable on top
 120                          * of existing overridable in this cgroup
 121                          * and vice versa
 122                          */
 123                         return -EPERM;
 124         }
 125
 126         if (!prog && !old_prog)
 127                 /* report error when trying to detach and nothing is attached */
 128                 return -ENOENT;
 129
 130         cgrp->bpf.prog[type] = prog;
 131
 132         css_for_each_descendant_pre(pos, &cgrp->self) {
 133                 struct cgroup *desc = container_of(pos, struct cgroup, self);
 134
 135                 /* skip the subtree if the descendant has its own program */
 136                 if (desc->bpf.prog[type] && desc != cgrp) {
 137                         pos = css_rightmost_descendant(pos);
 138                 } else {
 139                         rcu_assign_pointer(desc->bpf.effective[type],
 140                                            effective);
 141                         desc->bpf.disallow_override[type] = !overridable;
 142                 }
 143         }
 144
 145         if (prog)
 146                 static_branch_inc(&cgroup_bpf_enabled_key);
 147
 148         if (old_prog) {
 149                 bpf_prog_put(old_prog);
 150                 static_branch_dec(&cgroup_bpf_enabled_key);
 151         }
 152         return 0;
 153 }
 154
 155 /**
 156  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
 157  * @sk: The socket sending or receiving traffic
 158  * @skb: The skb that is being sent or received
 159  * @type: The type of program to be exectuted
 160  *
 161  * If no socket is passed, or the socket is not of type INET or INET6,
 162  * this function does nothing and returns 0.
 163  *
 164  * The program type passed in via @type must be suitable for network
 165  * filtering. No further check is performed to assert that.
 166  *
 167  * This function will return %-EPERM if any if an attached program was found
 168  * and if it returned != 1 during execution. In all other cases, 0 is returned.
 169  */
 170 int __cgroup_bpf_run_filter_skb(struct sock *sk,
 171                                 struct sk_buff *skb,
 172                                 enum bpf_attach_type type)
 173 {
 174         struct bpf_prog *prog;
 175         struct cgroup *cgrp;
 176         int ret = 0;
 177
 178         if (!sk || !sk_fullsock(sk))
 179                 return 0;
 180
 181         if (sk->sk_family != AF_INET &&
 182             sk->sk_family != AF_INET6)
 183                 return 0;
 184
 185         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 186
 187         rcu_read_lock();
 188
 189         prog = rcu_dereference(cgrp->bpf.effective[type]);
 190         if (prog) {
 191                 unsigned int offset = skb->data - skb_network_header(skb);
 192                 struct sock *save_sk = skb->sk;
 193
 194                 skb->sk = sk;
 195                 __skb_push(skb, offset);
 196                 ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
 197                 __skb_pull(skb, offset);
 198                 skb->sk = save_sk;
 199         }
 200
 201         rcu_read_unlock();
 202
 203         return ret;
 204 }
 205 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
 206
 207 /**
 208  * __cgroup_bpf_run_filter_sk() - Run a program on a sock
 209  * @sk: sock structure to manipulate
 210  * @type: The type of program to be exectuted
 211  *
 212  * socket is passed is expected to be of type INET or INET6.
 213  *
 214  * The program type passed in via @type must be suitable for sock
 215  * filtering. No further check is performed to assert that.
 216  *
 217  * This function will return %-EPERM if any if an attached program was found
 218  * and if it returned != 1 during execution. In all other cases, 0 is returned.
 219  */
 220 int __cgroup_bpf_run_filter_sk(struct sock *sk,
 221                                enum bpf_attach_type type)
 222 {
 223         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 224         struct bpf_prog *prog;
 225         int ret = 0;
 226
 227
 228         rcu_read_lock();
 229
 230         prog = rcu_dereference(cgrp->bpf.effective[type]);
 231         if (prog)
 232                 ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
 233
 234         rcu_read_unlock();
 235
 236         return ret;
 237 }
 238 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 239
 240 /**
 241  * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
 242  * @sk: socket to get cgroup from
 243  * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
 244  * sk with connection information (IP addresses, etc.) May not contain
 245  * cgroup info if it is a req sock.
 246  * @type: The type of program to be exectuted
 247  *
 248  * socket passed is expected to be of type INET or INET6.
 249  *
 250  * The program type passed in via @type must be suitable for sock_ops
 251  * filtering. No further check is performed to assert that.
 252  *
 253  * This function will return %-EPERM if any if an attached program was found
 254  * and if it returned != 1 during execution. In all other cases, 0 is returned.
 255  */
 256 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 257                                      struct bpf_sock_ops_kern *sock_ops,
 258                                      enum bpf_attach_type type)
 259 {
 260         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 261         struct bpf_prog *prog;
 262         int ret = 0;
 263
 264
 265         rcu_read_lock();
 266
 267         prog = rcu_dereference(cgrp->bpf.effective[type]);
 268         if (prog)
 269                 ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
 270
 271         rcu_read_unlock();
 272
 273         return ret;
 274 }
 275 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);