kernel/padata.c

   1 /*
   2  * padata.c - generic interface to process data streams in parallel
   3  *
   4  * See Documentation/padata.txt for an api documentation.
   5  *
   6  * Copyright (C) 2008, 2009 secunet Security Networks AG
   7  * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
   8  *
   9  * This program is free software; you can redistribute it and/or modify it
  10  * under the terms and conditions of the GNU General Public License,
  11  * version 2, as published by the Free Software Foundation.
  12  *
  13  * This program is distributed in the hope it will be useful, but WITHOUT
  14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  16  * more details.
  17  *
  18  * You should have received a copy of the GNU General Public License along with
  19  * this program; if not, write to the Free Software Foundation, Inc.,
  20  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22
  23 #include <linux/export.h>
  24 #include <linux/cpumask.h>
  25 #include <linux/err.h>
  26 #include <linux/cpu.h>
  27 #include <linux/padata.h>
  28 #include <linux/mutex.h>
  29 #include <linux/sched.h>
  30 #include <linux/slab.h>
  31 #include <linux/sysfs.h>
  32 #include <linux/rcupdate.h>
  33 #include <linux/module.h>
  34
  35 #define MAX_OBJ_NUM 1000
  36
  37 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
  38 {
  39         int cpu, target_cpu;
  40
  41         target_cpu = cpumask_first(pd->cpumask.pcpu);
  42         for (cpu = 0; cpu < cpu_index; cpu++)
  43                 target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
  44
  45         return target_cpu;
  46 }
  47
  48 static int padata_cpu_hash(struct parallel_data *pd)
  49 {
  50         unsigned int seq_nr;
  51         int cpu_index;
  52
  53         /*
  54          * Hash the sequence numbers to the cpus by taking
  55          * seq_nr mod. number of cpus in use.
  56          */
  57
  58         seq_nr = atomic_inc_return(&pd->seq_nr);
  59         cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
  60
  61         return padata_index_to_cpu(pd, cpu_index);
  62 }
  63
  64 static void padata_parallel_worker(struct work_struct *parallel_work)
  65 {
  66         struct padata_parallel_queue *pqueue;
  67         LIST_HEAD(local_list);
  68
  69         local_bh_disable();
  70         pqueue = container_of(parallel_work,
  71                               struct padata_parallel_queue, work);
  72
  73         spin_lock(&pqueue->parallel.lock);
  74         list_replace_init(&pqueue->parallel.list, &local_list);
  75         spin_unlock(&pqueue->parallel.lock);
  76
  77         while (!list_empty(&local_list)) {
  78                 struct padata_priv *padata;
  79
  80                 padata = list_entry(local_list.next,
  81                                     struct padata_priv, list);
  82
  83                 list_del_init(&padata->list);
  84
  85                 padata->parallel(padata);
  86         }
  87
  88         local_bh_enable();
  89 }
  90
  91 /**
  92  * padata_do_parallel - padata parallelization function
  93  *
  94  * @pinst: padata instance
  95  * @padata: object to be parallelized
  96  * @cb_cpu: cpu the serialization callback function will run on,
  97  *          must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
  98  *
  99  * The parallelization callback function will run with BHs off.
 100  * Note: Every object which is parallelized by padata_do_parallel
 101  * must be seen by padata_do_serial.
 102  */
 103 int padata_do_parallel(struct padata_instance *pinst,
 104                        struct padata_priv *padata, int cb_cpu)
 105 {
 106         int target_cpu, err;
 107         struct padata_parallel_queue *queue;
 108         struct parallel_data *pd;
 109
 110         rcu_read_lock_bh();
 111
 112         pd = rcu_dereference_bh(pinst->pd);
 113
 114         err = -EINVAL;
 115         if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
 116                 goto out;
 117
 118         if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
 119                 goto out;
 120
 121         err =  -EBUSY;
 122         if ((pinst->flags & PADATA_RESET))
 123                 goto out;
 124
 125         if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
 126                 goto out;
 127
 128         err = 0;
 129         atomic_inc(&pd->refcnt);
 130         padata->pd = pd;
 131         padata->cb_cpu = cb_cpu;
 132
 133         target_cpu = padata_cpu_hash(pd);
 134         padata->cpu = target_cpu;
 135         queue = per_cpu_ptr(pd->pqueue, target_cpu);
 136
 137         spin_lock(&queue->parallel.lock);
 138         list_add_tail(&padata->list, &queue->parallel.list);
 139         spin_unlock(&queue->parallel.lock);
 140
 141         queue_work_on(target_cpu, pinst->wq, &queue->work);
 142
 143 out:
 144         rcu_read_unlock_bh();
 145
 146         return err;
 147 }
 148 EXPORT_SYMBOL(padata_do_parallel);
 149
 150 /*
 151  * padata_get_next - Get the next object that needs serialization.
 152  *
 153  * Return values are:
 154  *
 155  * A pointer to the control struct of the next object that needs
 156  * serialization, if present in one of the percpu reorder queues.
 157  *
 158  * -EINPROGRESS, if the next object that needs serialization will
 159  *  be parallel processed by another cpu and is not yet present in
 160  *  the cpu's reorder queue.
 161  *
 162  * -ENODATA, if this cpu has to do the parallel processing for
 163  *  the next object.
 164  */
 165 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 166 {
 167         int cpu, num_cpus;
 168         unsigned int next_nr, next_index;
 169         struct padata_parallel_queue *next_queue;
 170         struct padata_priv *padata;
 171         struct padata_list *reorder;
 172
 173         num_cpus = cpumask_weight(pd->cpumask.pcpu);
 174
 175         /*
 176          * Calculate the percpu reorder queue and the sequence
 177          * number of the next object.
 178          */
 179         next_nr = pd->processed;
 180         next_index = next_nr % num_cpus;
 181         cpu = padata_index_to_cpu(pd, next_index);
 182         next_queue = per_cpu_ptr(pd->pqueue, cpu);
 183
 184         reorder = &next_queue->reorder;
 185
 186         spin_lock(&reorder->lock);
 187         if (!list_empty(&reorder->list)) {
 188                 padata = list_entry(reorder->list.next,
 189                                     struct padata_priv, list);
 190
 191                 list_del_init(&padata->list);
 192                 atomic_dec(&pd->reorder_objects);
 193
 194                 pd->processed++;
 195
 196                 spin_unlock(&reorder->lock);
 197                 goto out;
 198         }
 199         spin_unlock(&reorder->lock);
 200
 201         if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
 202                 padata = ERR_PTR(-ENODATA);
 203                 goto out;
 204         }
 205
 206         padata = ERR_PTR(-EINPROGRESS);
 207 out:
 208         return padata;
 209 }
 210
 211 static void padata_reorder(struct parallel_data *pd)
 212 {
 213         int cb_cpu;
 214         struct padata_priv *padata;
 215         struct padata_serial_queue *squeue;
 216         struct padata_instance *pinst = pd->pinst;
 217
 218         /*
 219          * We need to ensure that only one cpu can work on dequeueing of
 220          * the reorder queue the time. Calculating in which percpu reorder
 221          * queue the next object will arrive takes some time. A spinlock
 222          * would be highly contended. Also it is not clear in which order
 223          * the objects arrive to the reorder queues. So a cpu could wait to
 224          * get the lock just to notice that there is nothing to do at the
 225          * moment. Therefore we use a trylock and let the holder of the lock
 226          * care for all the objects enqueued during the holdtime of the lock.
 227          */
 228         if (!spin_trylock_bh(&pd->lock))
 229                 return;
 230
 231         while (1) {
 232                 padata = padata_get_next(pd);
 233
 234                 /*
 235                  * If the next object that needs serialization is parallel
 236                  * processed by another cpu and is still on it's way to the
 237                  * cpu's reorder queue, nothing to do for now.
 238                  */
 239                 if (PTR_ERR(padata) == -EINPROGRESS)
 240                         break;
 241
 242                 /*
 243                  * This cpu has to do the parallel processing of the next
 244                  * object. It's waiting in the cpu's parallelization queue,
 245                  * so exit immediately.
 246                  */
 247                 if (PTR_ERR(padata) == -ENODATA) {
 248                         del_timer(&pd->timer);
 249                         spin_unlock_bh(&pd->lock);
 250                         return;
 251                 }
 252
 253                 cb_cpu = padata->cb_cpu;
 254                 squeue = per_cpu_ptr(pd->squeue, cb_cpu);
 255
 256                 spin_lock(&squeue->serial.lock);
 257                 list_add_tail(&padata->list, &squeue->serial.list);
 258                 spin_unlock(&squeue->serial.lock);
 259
 260                 queue_work_on(cb_cpu, pinst->wq, &squeue->work);
 261         }
 262
 263         spin_unlock_bh(&pd->lock);
 264
 265         /*
 266          * The next object that needs serialization might have arrived to
 267          * the reorder queues in the meantime, we will be called again
 268          * from the timer function if no one else cares for it.
 269          *
 270          * Ensure reorder_objects is read after pd->lock is dropped so we see
 271          * an increment from another task in padata_do_serial.  Pairs with
 272          * smp_mb__after_atomic in padata_do_serial.
 273          */
 274         smp_mb();
 275         if (atomic_read(&pd->reorder_objects)
 276                         && !(pinst->flags & PADATA_RESET))
 277                 mod_timer(&pd->timer, jiffies + HZ);
 278         else
 279                 del_timer(&pd->timer);
 280
 281         return;
 282 }
 283
 284 static void invoke_padata_reorder(struct work_struct *work)
 285 {
 286         struct padata_parallel_queue *pqueue;
 287         struct parallel_data *pd;
 288
 289         local_bh_disable();
 290         pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
 291         pd = pqueue->pd;
 292         padata_reorder(pd);
 293         local_bh_enable();
 294 }
 295
 296 static void padata_reorder_timer(struct timer_list *t)
 297 {
 298         struct parallel_data *pd = from_timer(pd, t, timer);
 299         unsigned int weight;
 300         int target_cpu, cpu;
 301
 302         cpu = get_cpu();
 303
 304         /* We don't lock pd here to not interfere with parallel processing
 305          * padata_reorder() calls on other CPUs. We just need any CPU out of
 306          * the cpumask.pcpu set. It would be nice if it's the right one but
 307          * it doesn't matter if we're off to the next one by using an outdated
 308          * pd->processed value.
 309          */
 310         weight = cpumask_weight(pd->cpumask.pcpu);
 311         target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
 312
 313         /* ensure to call the reorder callback on the correct CPU */
 314         if (cpu != target_cpu) {
 315                 struct padata_parallel_queue *pqueue;
 316                 struct padata_instance *pinst;
 317
 318                 /* The timer function is serialized wrt itself -- no locking
 319                  * needed.
 320                  */
 321                 pinst = pd->pinst;
 322                 pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
 323                 queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
 324         } else {
 325                 padata_reorder(pd);
 326         }
 327
 328         put_cpu();
 329 }
 330
 331 static void padata_serial_worker(struct work_struct *serial_work)
 332 {
 333         struct padata_serial_queue *squeue;
 334         struct parallel_data *pd;
 335         LIST_HEAD(local_list);
 336
 337         local_bh_disable();
 338         squeue = container_of(serial_work, struct padata_serial_queue, work);
 339         pd = squeue->pd;
 340
 341         spin_lock(&squeue->serial.lock);
 342         list_replace_init(&squeue->serial.list, &local_list);
 343         spin_unlock(&squeue->serial.lock);
 344
 345         while (!list_empty(&local_list)) {
 346                 struct padata_priv *padata;
 347
 348                 padata = list_entry(local_list.next,
 349                                     struct padata_priv, list);
 350
 351                 list_del_init(&padata->list);
 352
 353                 padata->serial(padata);
 354                 atomic_dec(&pd->refcnt);
 355         }
 356         local_bh_enable();
 357 }
 358
 359 /**
 360  * padata_do_serial - padata serialization function
 361  *
 362  * @padata: object to be serialized.
 363  *
 364  * padata_do_serial must be called for every parallelized object.
 365  * The serialization callback function will run with BHs off.
 366  */
 367 void padata_do_serial(struct padata_priv *padata)
 368 {
 369         int cpu;
 370         struct padata_parallel_queue *pqueue;
 371         struct parallel_data *pd;
 372         int reorder_via_wq = 0;
 373
 374         pd = padata->pd;
 375
 376         cpu = get_cpu();
 377
 378         /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
 379          * was called on -- or, at least, enqueue the padata object into the
 380          * correct per-cpu queue.
 381          */
 382         if (cpu != padata->cpu) {
 383                 reorder_via_wq = 1;
 384                 cpu = padata->cpu;
 385         }
 386
 387         pqueue = per_cpu_ptr(pd->pqueue, cpu);
 388
 389         spin_lock(&pqueue->reorder.lock);
 390         atomic_inc(&pd->reorder_objects);
 391         list_add_tail(&padata->list, &pqueue->reorder.list);
 392         spin_unlock(&pqueue->reorder.lock);
 393
 394         /*
 395          * Ensure the atomic_inc of reorder_objects above is ordered correctly
 396          * with the trylock of pd->lock in padata_reorder.  Pairs with smp_mb
 397          * in padata_reorder.
 398          */
 399         smp_mb__after_atomic();
 400
 401         put_cpu();
 402
 403         /* If we're running on the wrong CPU, call padata_reorder() via a
 404          * kernel worker.
 405          */
 406         if (reorder_via_wq)
 407                 queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
 408         else
 409                 padata_reorder(pd);
 410 }
 411 EXPORT_SYMBOL(padata_do_serial);
 412
 413 static int padata_setup_cpumasks(struct parallel_data *pd,
 414                                  const struct cpumask *pcpumask,
 415                                  const struct cpumask *cbcpumask)
 416 {
 417         if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
 418                 return -ENOMEM;
 419
 420         cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
 421         if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
 422                 free_cpumask_var(pd->cpumask.pcpu);
 423                 return -ENOMEM;
 424         }
 425
 426         cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
 427         return 0;
 428 }
 429
 430 static void __padata_list_init(struct padata_list *pd_list)
 431 {
 432         INIT_LIST_HEAD(&pd_list->list);
 433         spin_lock_init(&pd_list->lock);
 434 }
 435
 436 /* Initialize all percpu queues used by serial workers */
 437 static void padata_init_squeues(struct parallel_data *pd)
 438 {
 439         int cpu;
 440         struct padata_serial_queue *squeue;
 441
 442         for_each_cpu(cpu, pd->cpumask.cbcpu) {
 443                 squeue = per_cpu_ptr(pd->squeue, cpu);
 444                 squeue->pd = pd;
 445                 __padata_list_init(&squeue->serial);
 446                 INIT_WORK(&squeue->work, padata_serial_worker);
 447         }
 448 }
 449
 450 /* Initialize all percpu queues used by parallel workers */
 451 static void padata_init_pqueues(struct parallel_data *pd)
 452 {
 453         int cpu_index, cpu;
 454         struct padata_parallel_queue *pqueue;
 455
 456         cpu_index = 0;
 457         for_each_possible_cpu(cpu) {
 458                 pqueue = per_cpu_ptr(pd->pqueue, cpu);
 459
 460                 if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
 461                         pqueue->cpu_index = -1;
 462                         continue;
 463                 }
 464
 465                 pqueue->pd = pd;
 466                 pqueue->cpu_index = cpu_index;
 467                 cpu_index++;
 468
 469                 __padata_list_init(&pqueue->reorder);
 470                 __padata_list_init(&pqueue->parallel);
 471                 INIT_WORK(&pqueue->work, padata_parallel_worker);
 472                 INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
 473                 atomic_set(&pqueue->num_obj, 0);
 474         }
 475 }
 476
 477 /* Allocate and initialize the internal cpumask dependend resources. */
 478 static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 479                                              const struct cpumask *pcpumask,
 480                                              const struct cpumask *cbcpumask)
 481 {
 482         struct parallel_data *pd;
 483
 484         pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
 485         if (!pd)
 486                 goto err;
 487
 488         pd->pqueue = alloc_percpu(struct padata_parallel_queue);
 489         if (!pd->pqueue)
 490                 goto err_free_pd;
 491
 492         pd->squeue = alloc_percpu(struct padata_serial_queue);
 493         if (!pd->squeue)
 494                 goto err_free_pqueue;
 495         if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
 496                 goto err_free_squeue;
 497
 498         padata_init_pqueues(pd);
 499         padata_init_squeues(pd);
 500         timer_setup(&pd->timer, padata_reorder_timer, 0);
 501         atomic_set(&pd->seq_nr, -1);
 502         atomic_set(&pd->reorder_objects, 0);
 503         atomic_set(&pd->refcnt, 0);
 504         pd->pinst = pinst;
 505         spin_lock_init(&pd->lock);
 506
 507         return pd;
 508
 509 err_free_squeue:
 510         free_percpu(pd->squeue);
 511 err_free_pqueue:
 512         free_percpu(pd->pqueue);
 513 err_free_pd:
 514         kfree(pd);
 515 err:
 516         return NULL;
 517 }
 518
 519 static void padata_free_pd(struct parallel_data *pd)
 520 {
 521         free_cpumask_var(pd->cpumask.pcpu);
 522         free_cpumask_var(pd->cpumask.cbcpu);
 523         free_percpu(pd->pqueue);
 524         free_percpu(pd->squeue);
 525         kfree(pd);
 526 }
 527
 528 /* Flush all objects out of the padata queues. */
 529 static void padata_flush_queues(struct parallel_data *pd)
 530 {
 531         int cpu;
 532         struct padata_parallel_queue *pqueue;
 533         struct padata_serial_queue *squeue;
 534
 535         for_each_cpu(cpu, pd->cpumask.pcpu) {
 536                 pqueue = per_cpu_ptr(pd->pqueue, cpu);
 537                 flush_work(&pqueue->work);
 538         }
 539
 540         del_timer_sync(&pd->timer);
 541
 542         if (atomic_read(&pd->reorder_objects))
 543                 padata_reorder(pd);
 544
 545         for_each_cpu(cpu, pd->cpumask.cbcpu) {
 546                 squeue = per_cpu_ptr(pd->squeue, cpu);
 547                 flush_work(&squeue->work);
 548         }
 549
 550         BUG_ON(atomic_read(&pd->refcnt) != 0);
 551 }
 552
 553 static void __padata_start(struct padata_instance *pinst)
 554 {
 555         pinst->flags |= PADATA_INIT;
 556 }
 557
 558 static void __padata_stop(struct padata_instance *pinst)
 559 {
 560         if (!(pinst->flags & PADATA_INIT))
 561                 return;
 562
 563         pinst->flags &= ~PADATA_INIT;
 564
 565         synchronize_rcu();
 566
 567         get_online_cpus();
 568         padata_flush_queues(pinst->pd);
 569         put_online_cpus();
 570 }
 571
 572 /* Replace the internal control structure with a new one. */
 573 static void padata_replace(struct padata_instance *pinst,
 574                            struct parallel_data *pd_new)
 575 {
 576         struct parallel_data *pd_old = pinst->pd;
 577         int notification_mask = 0;
 578
 579         pinst->flags |= PADATA_RESET;
 580
 581         rcu_assign_pointer(pinst->pd, pd_new);
 582
 583         synchronize_rcu();
 584
 585         if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
 586                 notification_mask |= PADATA_CPU_PARALLEL;
 587         if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
 588                 notification_mask |= PADATA_CPU_SERIAL;
 589
 590         padata_flush_queues(pd_old);
 591         padata_free_pd(pd_old);
 592
 593         if (notification_mask)
 594                 blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
 595                                              notification_mask,
 596                                              &pd_new->cpumask);
 597
 598         pinst->flags &= ~PADATA_RESET;
 599 }
 600
 601 /**
 602  * padata_register_cpumask_notifier - Registers a notifier that will be called
 603  *                             if either pcpu or cbcpu or both cpumasks change.
 604  *
 605  * @pinst: A poineter to padata instance
 606  * @nblock: A pointer to notifier block.
 607  */
 608 int padata_register_cpumask_notifier(struct padata_instance *pinst,
 609                                      struct notifier_block *nblock)
 610 {
 611         return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
 612                                                 nblock);
 613 }
 614 EXPORT_SYMBOL(padata_register_cpumask_notifier);
 615
 616 /**
 617  * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
 618  *        registered earlier  using padata_register_cpumask_notifier
 619  *
 620  * @pinst: A pointer to data instance.
 621  * @nlock: A pointer to notifier block.
 622  */
 623 int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
 624                                        struct notifier_block *nblock)
 625 {
 626         return blocking_notifier_chain_unregister(
 627                 &pinst->cpumask_change_notifier,
 628                 nblock);
 629 }
 630 EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
 631
 632
 633 /* If cpumask contains no active cpu, we mark the instance as invalid. */
 634 static bool padata_validate_cpumask(struct padata_instance *pinst,
 635                                     const struct cpumask *cpumask)
 636 {
 637         if (!cpumask_intersects(cpumask, cpu_online_mask)) {
 638                 pinst->flags |= PADATA_INVALID;
 639                 return false;
 640         }
 641
 642         pinst->flags &= ~PADATA_INVALID;
 643         return true;
 644 }
 645
 646 static int __padata_set_cpumasks(struct padata_instance *pinst,
 647                                  cpumask_var_t pcpumask,
 648                                  cpumask_var_t cbcpumask)
 649 {
 650         int valid;
 651         struct parallel_data *pd;
 652
 653         valid = padata_validate_cpumask(pinst, pcpumask);
 654         if (!valid) {
 655                 __padata_stop(pinst);
 656                 goto out_replace;
 657         }
 658
 659         valid = padata_validate_cpumask(pinst, cbcpumask);
 660         if (!valid)
 661                 __padata_stop(pinst);
 662
 663 out_replace:
 664         pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
 665         if (!pd)
 666                 return -ENOMEM;
 667
 668         cpumask_copy(pinst->cpumask.pcpu, pcpumask);
 669         cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 670
 671         padata_replace(pinst, pd);
 672
 673         if (valid)
 674                 __padata_start(pinst);
 675
 676         return 0;
 677 }
 678
 679 /**
 680  * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
 681  *                     equivalent to @cpumask.
 682  *
 683  * @pinst: padata instance
 684  * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
 685  *                to parallel and serial cpumasks respectively.
 686  * @cpumask: the cpumask to use
 687  */
 688 int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 689                        cpumask_var_t cpumask)
 690 {
 691         struct cpumask *serial_mask, *parallel_mask;
 692         int err = -EINVAL;
 693
 694         mutex_lock(&pinst->lock);
 695         get_online_cpus();
 696
 697         switch (cpumask_type) {
 698         case PADATA_CPU_PARALLEL:
 699                 serial_mask = pinst->cpumask.cbcpu;
 700                 parallel_mask = cpumask;
 701                 break;
 702         case PADATA_CPU_SERIAL:
 703                 parallel_mask = pinst->cpumask.pcpu;
 704                 serial_mask = cpumask;
 705                 break;
 706         default:
 707                  goto out;
 708         }
 709
 710         err =  __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
 711
 712 out:
 713         put_online_cpus();
 714         mutex_unlock(&pinst->lock);
 715
 716         return err;
 717 }
 718 EXPORT_SYMBOL(padata_set_cpumask);
 719
 720 /**
 721  * padata_start - start the parallel processing
 722  *
 723  * @pinst: padata instance to start
 724  */
 725 int padata_start(struct padata_instance *pinst)
 726 {
 727         int err = 0;
 728
 729         mutex_lock(&pinst->lock);
 730
 731         if (pinst->flags & PADATA_INVALID)
 732                 err = -EINVAL;
 733
 734          __padata_start(pinst);
 735
 736         mutex_unlock(&pinst->lock);
 737
 738         return err;
 739 }
 740 EXPORT_SYMBOL(padata_start);
 741
 742 /**
 743  * padata_stop - stop the parallel processing
 744  *
 745  * @pinst: padata instance to stop
 746  */
 747 void padata_stop(struct padata_instance *pinst)
 748 {
 749         mutex_lock(&pinst->lock);
 750         __padata_stop(pinst);
 751         mutex_unlock(&pinst->lock);
 752 }
 753 EXPORT_SYMBOL(padata_stop);
 754
 755 #ifdef CONFIG_HOTPLUG_CPU
 756
 757 static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
 758 {
 759         struct parallel_data *pd;
 760
 761         if (cpumask_test_cpu(cpu, cpu_online_mask)) {
 762                 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
 763                                      pinst->cpumask.cbcpu);
 764                 if (!pd)
 765                         return -ENOMEM;
 766
 767                 padata_replace(pinst, pd);
 768
 769                 if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
 770                     padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
 771                         __padata_start(pinst);
 772         }
 773
 774         return 0;
 775 }
 776
 777 static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 778 {
 779         struct parallel_data *pd = NULL;
 780
 781         if (cpumask_test_cpu(cpu, cpu_online_mask)) {
 782
 783                 if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
 784                     !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
 785                         __padata_stop(pinst);
 786
 787                 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
 788                                      pinst->cpumask.cbcpu);
 789                 if (!pd)
 790                         return -ENOMEM;
 791
 792                 padata_replace(pinst, pd);
 793
 794                 cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
 795                 cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
 796         }
 797
 798         return 0;
 799 }
 800
 801  /**
 802  * padata_remove_cpu - remove a cpu from the one or both(serial and parallel)
 803  *                     padata cpumasks.
 804  *
 805  * @pinst: padata instance
 806  * @cpu: cpu to remove
 807  * @mask: bitmask specifying from which cpumask @cpu should be removed
 808  *        The @mask may be any combination of the following flags:
 809  *          PADATA_CPU_SERIAL   - serial cpumask
 810  *          PADATA_CPU_PARALLEL - parallel cpumask
 811  */
 812 int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
 813 {
 814         int err;
 815
 816         if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
 817                 return -EINVAL;
 818
 819         mutex_lock(&pinst->lock);
 820
 821         get_online_cpus();
 822         if (mask & PADATA_CPU_SERIAL)
 823                 cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
 824         if (mask & PADATA_CPU_PARALLEL)
 825                 cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
 826
 827         err = __padata_remove_cpu(pinst, cpu);
 828         put_online_cpus();
 829
 830         mutex_unlock(&pinst->lock);
 831
 832         return err;
 833 }
 834 EXPORT_SYMBOL(padata_remove_cpu);
 835
 836 static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
 837 {
 838         return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
 839                 cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
 840 }
 841
 842 static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
 843 {
 844         struct padata_instance *pinst;
 845         int ret;
 846
 847         pinst = hlist_entry_safe(node, struct padata_instance, node);
 848         if (!pinst_has_cpu(pinst, cpu))
 849                 return 0;
 850
 851         mutex_lock(&pinst->lock);
 852         ret = __padata_add_cpu(pinst, cpu);
 853         mutex_unlock(&pinst->lock);
 854         return ret;
 855 }
 856
 857 static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node)
 858 {
 859         struct padata_instance *pinst;
 860         int ret;
 861
 862         pinst = hlist_entry_safe(node, struct padata_instance, node);
 863         if (!pinst_has_cpu(pinst, cpu))
 864                 return 0;
 865
 866         mutex_lock(&pinst->lock);
 867         ret = __padata_remove_cpu(pinst, cpu);
 868         mutex_unlock(&pinst->lock);
 869         return ret;
 870 }
 871
 872 static enum cpuhp_state hp_online;
 873 #endif
 874
 875 static void __padata_free(struct padata_instance *pinst)
 876 {
 877 #ifdef CONFIG_HOTPLUG_CPU
 878         cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
 879 #endif
 880
 881         padata_stop(pinst);
 882         padata_free_pd(pinst->pd);
 883         free_cpumask_var(pinst->cpumask.pcpu);
 884         free_cpumask_var(pinst->cpumask.cbcpu);
 885         kfree(pinst);
 886 }
 887
 888 #define kobj2pinst(_kobj)                                       \
 889         container_of(_kobj, struct padata_instance, kobj)
 890 #define attr2pentry(_attr)                                      \
 891         container_of(_attr, struct padata_sysfs_entry, attr)
 892
 893 static void padata_sysfs_release(struct kobject *kobj)
 894 {
 895         struct padata_instance *pinst = kobj2pinst(kobj);
 896         __padata_free(pinst);
 897 }
 898
 899 struct padata_sysfs_entry {
 900         struct attribute attr;
 901         ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
 902         ssize_t (*store)(struct padata_instance *, struct attribute *,
 903                          const char *, size_t);
 904 };
 905
 906 static ssize_t show_cpumask(struct padata_instance *pinst,
 907                             struct attribute *attr,  char *buf)
 908 {
 909         struct cpumask *cpumask;
 910         ssize_t len;
 911
 912         mutex_lock(&pinst->lock);
 913         if (!strcmp(attr->name, "serial_cpumask"))
 914                 cpumask = pinst->cpumask.cbcpu;
 915         else
 916                 cpumask = pinst->cpumask.pcpu;
 917
 918         len = snprintf(buf, PAGE_SIZE, "%*pb\n",
 919                        nr_cpu_ids, cpumask_bits(cpumask));
 920         mutex_unlock(&pinst->lock);
 921         return len < PAGE_SIZE ? len : -EINVAL;
 922 }
 923
 924 static ssize_t store_cpumask(struct padata_instance *pinst,
 925                              struct attribute *attr,
 926                              const char *buf, size_t count)
 927 {
 928         cpumask_var_t new_cpumask;
 929         ssize_t ret;
 930         int mask_type;
 931
 932         if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
 933                 return -ENOMEM;
 934
 935         ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
 936                            nr_cpumask_bits);
 937         if (ret < 0)
 938                 goto out;
 939
 940         mask_type = !strcmp(attr->name, "serial_cpumask") ?
 941                 PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
 942         ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
 943         if (!ret)
 944                 ret = count;
 945
 946 out:
 947         free_cpumask_var(new_cpumask);
 948         return ret;
 949 }
 950
 951 #define PADATA_ATTR_RW(_name, _show_name, _store_name)          \
 952         static struct padata_sysfs_entry _name##_attr =         \
 953                 __ATTR(_name, 0644, _show_name, _store_name)
 954 #define PADATA_ATTR_RO(_name, _show_name)               \
 955         static struct padata_sysfs_entry _name##_attr = \
 956                 __ATTR(_name, 0400, _show_name, NULL)
 957
 958 PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
 959 PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
 960
 961 /*
 962  * Padata sysfs provides the following objects:
 963  * serial_cpumask   [RW] - cpumask for serial workers
 964  * parallel_cpumask [RW] - cpumask for parallel workers
 965  */
 966 static struct attribute *padata_default_attrs[] = {
 967         &serial_cpumask_attr.attr,
 968         &parallel_cpumask_attr.attr,
 969         NULL,
 970 };
 971
 972 static ssize_t padata_sysfs_show(struct kobject *kobj,
 973                                  struct attribute *attr, char *buf)
 974 {
 975         struct padata_instance *pinst;
 976         struct padata_sysfs_entry *pentry;
 977         ssize_t ret = -EIO;
 978
 979         pinst = kobj2pinst(kobj);
 980         pentry = attr2pentry(attr);
 981         if (pentry->show)
 982                 ret = pentry->show(pinst, attr, buf);
 983
 984         return ret;
 985 }
 986
 987 static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
 988                                   const char *buf, size_t count)
 989 {
 990         struct padata_instance *pinst;
 991         struct padata_sysfs_entry *pentry;
 992         ssize_t ret = -EIO;
 993
 994         pinst = kobj2pinst(kobj);
 995         pentry = attr2pentry(attr);
 996         if (pentry->show)
 997                 ret = pentry->store(pinst, attr, buf, count);
 998
 999         return ret;
1000 }
1001
1002 static const struct sysfs_ops padata_sysfs_ops = {
1003         .show = padata_sysfs_show,
1004         .store = padata_sysfs_store,
1005 };
1006
1007 static struct kobj_type padata_attr_type = {
1008         .sysfs_ops = &padata_sysfs_ops,
1009         .default_attrs = padata_default_attrs,
1010         .release = padata_sysfs_release,
1011 };
1012
1013 /**
1014  * padata_alloc - allocate and initialize a padata instance and specify
1015  *                cpumasks for serial and parallel workers.
1016  *
1017  * @wq: workqueue to use for the allocated padata instance
1018  * @pcpumask: cpumask that will be used for padata parallelization
1019  * @cbcpumask: cpumask that will be used for padata serialization
1020  *
1021  * Must be called from a cpus_read_lock() protected region
1022  */
1023 static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
1024                                             const struct cpumask *pcpumask,
1025                                             const struct cpumask *cbcpumask)
1026 {
1027         struct padata_instance *pinst;
1028         struct parallel_data *pd = NULL;
1029
1030         pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
1031         if (!pinst)
1032                 goto err;
1033
1034         if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
1035                 goto err_free_inst;
1036         if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
1037                 free_cpumask_var(pinst->cpumask.pcpu);
1038                 goto err_free_inst;
1039         }
1040         if (!padata_validate_cpumask(pinst, pcpumask) ||
1041             !padata_validate_cpumask(pinst, cbcpumask))
1042                 goto err_free_masks;
1043
1044         pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
1045         if (!pd)
1046                 goto err_free_masks;
1047
1048         rcu_assign_pointer(pinst->pd, pd);
1049
1050         pinst->wq = wq;
1051
1052         cpumask_copy(pinst->cpumask.pcpu, pcpumask);
1053         cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
1054
1055         pinst->flags = 0;
1056
1057         BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
1058         kobject_init(&pinst->kobj, &padata_attr_type);
1059         mutex_init(&pinst->lock);
1060
1061 #ifdef CONFIG_HOTPLUG_CPU
1062         cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
1063 #endif
1064         return pinst;
1065
1066 err_free_masks:
1067         free_cpumask_var(pinst->cpumask.pcpu);
1068         free_cpumask_var(pinst->cpumask.cbcpu);
1069 err_free_inst:
1070         kfree(pinst);
1071 err:
1072         return NULL;
1073 }
1074
1075 /**
1076  * padata_alloc_possible - Allocate and initialize padata instance.
1077  *                         Use the cpu_possible_mask for serial and
1078  *                         parallel workers.
1079  *
1080  * @wq: workqueue to use for the allocated padata instance
1081  *
1082  * Must be called from a cpus_read_lock() protected region
1083  */
1084 struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
1085 {
1086         lockdep_assert_cpus_held();
1087         return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
1088 }
1089 EXPORT_SYMBOL(padata_alloc_possible);
1090
1091 /**
1092  * padata_free - free a padata instance
1093  *
1094  * @padata_inst: padata instance to free
1095  */
1096 void padata_free(struct padata_instance *pinst)
1097 {
1098         kobject_put(&pinst->kobj);
1099 }
1100 EXPORT_SYMBOL(padata_free);
1101
1102 #ifdef CONFIG_HOTPLUG_CPU
1103
1104 static __init int padata_driver_init(void)
1105 {
1106         int ret;
1107
1108         ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
1109                                       padata_cpu_online,
1110                                       padata_cpu_prep_down);
1111         if (ret < 0)
1112                 return ret;
1113         hp_online = ret;
1114         return 0;
1115 }
1116 module_init(padata_driver_init);
1117
1118 static __exit void padata_driver_exit(void)
1119 {
1120         cpuhp_remove_multi_state(hp_online);
1121 }
1122 module_exit(padata_driver_exit);
1123 #endif