]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/block/aoe/aoecmd.c
aoe: use a kernel thread for transmissions
[mirror_ubuntu-bionic-kernel.git] / drivers / block / aoe / aoecmd.c
1 /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */
2 /*
3 * aoecmd.c
4 * Filesystem request handling methods
5 */
6
7 #include <linux/ata.h>
8 #include <linux/slab.h>
9 #include <linux/hdreg.h>
10 #include <linux/blkdev.h>
11 #include <linux/skbuff.h>
12 #include <linux/netdevice.h>
13 #include <linux/genhd.h>
14 #include <linux/moduleparam.h>
15 #include <linux/workqueue.h>
16 #include <linux/kthread.h>
17 #include <net/net_namespace.h>
18 #include <asm/unaligned.h>
19 #include <linux/uio.h>
20 #include "aoe.h"
21
22 #define MAXIOC (8192) /* default meant to avoid most soft lockups */
23
24 static void ktcomplete(struct frame *, struct sk_buff *);
25
26 static struct buf *nextbuf(struct aoedev *);
27
28 static int aoe_deadsecs = 60 * 3;
29 module_param(aoe_deadsecs, int, 0644);
30 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
31
32 static int aoe_maxout = 16;
33 module_param(aoe_maxout, int, 0644);
34 MODULE_PARM_DESC(aoe_maxout,
35 "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
36
37 static wait_queue_head_t ktiowq;
38 static struct ktstate kts;
39
40 /* io completion queue */
41 static struct {
42 struct list_head head;
43 spinlock_t lock;
44 } iocq;
45
46 static struct sk_buff *
47 new_skb(ulong len)
48 {
49 struct sk_buff *skb;
50
51 skb = alloc_skb(len, GFP_ATOMIC);
52 if (skb) {
53 skb_reset_mac_header(skb);
54 skb_reset_network_header(skb);
55 skb->protocol = __constant_htons(ETH_P_AOE);
56 skb_checksum_none_assert(skb);
57 }
58 return skb;
59 }
60
61 static struct frame *
62 getframe(struct aoetgt *t, u32 tag)
63 {
64 struct frame *f;
65 struct list_head *head, *pos, *nx;
66 u32 n;
67
68 n = tag % NFACTIVE;
69 head = &t->factive[n];
70 list_for_each_safe(pos, nx, head) {
71 f = list_entry(pos, struct frame, head);
72 if (f->tag == tag) {
73 list_del(pos);
74 return f;
75 }
76 }
77 return NULL;
78 }
79
80 /*
81 * Leave the top bit clear so we have tagspace for userland.
82 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
83 * This driver reserves tag -1 to mean "unused frame."
84 */
85 static int
86 newtag(struct aoetgt *t)
87 {
88 register ulong n;
89
90 n = jiffies & 0xffff;
91 return n |= (++t->lasttag & 0x7fff) << 16;
92 }
93
94 static u32
95 aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
96 {
97 u32 host_tag = newtag(t);
98
99 memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
100 memcpy(h->dst, t->addr, sizeof h->dst);
101 h->type = __constant_cpu_to_be16(ETH_P_AOE);
102 h->verfl = AOE_HVER;
103 h->major = cpu_to_be16(d->aoemajor);
104 h->minor = d->aoeminor;
105 h->cmd = AOECMD_ATA;
106 h->tag = cpu_to_be32(host_tag);
107
108 return host_tag;
109 }
110
111 static inline void
112 put_lba(struct aoe_atahdr *ah, sector_t lba)
113 {
114 ah->lba0 = lba;
115 ah->lba1 = lba >>= 8;
116 ah->lba2 = lba >>= 8;
117 ah->lba3 = lba >>= 8;
118 ah->lba4 = lba >>= 8;
119 ah->lba5 = lba >>= 8;
120 }
121
122 static void
123 ifrotate(struct aoetgt *t)
124 {
125 t->ifp++;
126 if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
127 t->ifp = t->ifs;
128 if (t->ifp->nd == NULL) {
129 printk(KERN_INFO "aoe: no interface to rotate to\n");
130 BUG();
131 }
132 }
133
134 static void
135 skb_pool_put(struct aoedev *d, struct sk_buff *skb)
136 {
137 __skb_queue_tail(&d->skbpool, skb);
138 }
139
140 static struct sk_buff *
141 skb_pool_get(struct aoedev *d)
142 {
143 struct sk_buff *skb = skb_peek(&d->skbpool);
144
145 if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
146 __skb_unlink(skb, &d->skbpool);
147 return skb;
148 }
149 if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
150 (skb = new_skb(ETH_ZLEN)))
151 return skb;
152
153 return NULL;
154 }
155
156 void
157 aoe_freetframe(struct frame *f)
158 {
159 struct aoetgt *t;
160
161 t = f->t;
162 f->buf = NULL;
163 f->bv = NULL;
164 f->r_skb = NULL;
165 list_add(&f->head, &t->ffree);
166 }
167
168 static struct frame *
169 newtframe(struct aoedev *d, struct aoetgt *t)
170 {
171 struct frame *f;
172 struct sk_buff *skb;
173 struct list_head *pos;
174
175 if (list_empty(&t->ffree)) {
176 if (t->falloc >= NSKBPOOLMAX*2)
177 return NULL;
178 f = kcalloc(1, sizeof(*f), GFP_ATOMIC);
179 if (f == NULL)
180 return NULL;
181 t->falloc++;
182 f->t = t;
183 } else {
184 pos = t->ffree.next;
185 list_del(pos);
186 f = list_entry(pos, struct frame, head);
187 }
188
189 skb = f->skb;
190 if (skb == NULL) {
191 f->skb = skb = new_skb(ETH_ZLEN);
192 if (!skb) {
193 bail: aoe_freetframe(f);
194 return NULL;
195 }
196 }
197
198 if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
199 skb = skb_pool_get(d);
200 if (skb == NULL)
201 goto bail;
202 skb_pool_put(d, f->skb);
203 f->skb = skb;
204 }
205
206 skb->truesize -= skb->data_len;
207 skb_shinfo(skb)->nr_frags = skb->data_len = 0;
208 skb_trim(skb, 0);
209 return f;
210 }
211
212 static struct frame *
213 newframe(struct aoedev *d)
214 {
215 struct frame *f;
216 struct aoetgt *t, **tt;
217 int totout = 0;
218
219 if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
220 printk(KERN_ERR "aoe: NULL TARGETS!\n");
221 return NULL;
222 }
223 tt = d->tgt; /* last used target */
224 for (;;) {
225 tt++;
226 if (tt >= &d->targets[NTARGETS] || !*tt)
227 tt = d->targets;
228 t = *tt;
229 totout += t->nout;
230 if (t->nout < t->maxout
231 && t != d->htgt
232 && t->ifp->nd) {
233 f = newtframe(d, t);
234 if (f) {
235 d->tgt = tt;
236 ifrotate(t);
237 return f;
238 }
239 }
240 if (tt == d->tgt) /* we've looped and found nada */
241 break;
242 }
243 if (totout == 0) {
244 d->kicked++;
245 d->flags |= DEVFL_KICKME;
246 }
247 return NULL;
248 }
249
250 static void
251 skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt)
252 {
253 int frag = 0;
254 ulong fcnt;
255 loop:
256 fcnt = bv->bv_len - (off - bv->bv_offset);
257 if (fcnt > cnt)
258 fcnt = cnt;
259 skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
260 cnt -= fcnt;
261 if (cnt <= 0)
262 return;
263 bv++;
264 off = bv->bv_offset;
265 goto loop;
266 }
267
268 static void
269 fhash(struct frame *f)
270 {
271 struct aoetgt *t = f->t;
272 u32 n;
273
274 n = f->tag % NFACTIVE;
275 list_add_tail(&f->head, &t->factive[n]);
276 }
277
278 static int
279 aoecmd_ata_rw(struct aoedev *d)
280 {
281 struct frame *f;
282 struct aoe_hdr *h;
283 struct aoe_atahdr *ah;
284 struct buf *buf;
285 struct bio_vec *bv;
286 struct aoetgt *t;
287 struct sk_buff *skb;
288 struct sk_buff_head queue;
289 ulong bcnt, fbcnt;
290 char writebit, extbit;
291
292 writebit = 0x10;
293 extbit = 0x4;
294
295 buf = nextbuf(d);
296 if (buf == NULL)
297 return 0;
298 f = newframe(d);
299 if (f == NULL)
300 return 0;
301 t = *d->tgt;
302 bv = buf->bv;
303 bcnt = t->ifp->maxbcnt;
304 if (bcnt == 0)
305 bcnt = DEFAULTBCNT;
306 if (bcnt > buf->resid)
307 bcnt = buf->resid;
308 fbcnt = bcnt;
309 f->bv = buf->bv;
310 f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
311 do {
312 if (fbcnt < buf->bv_resid) {
313 buf->bv_resid -= fbcnt;
314 buf->resid -= fbcnt;
315 break;
316 }
317 fbcnt -= buf->bv_resid;
318 buf->resid -= buf->bv_resid;
319 if (buf->resid == 0) {
320 d->ip.buf = NULL;
321 break;
322 }
323 buf->bv++;
324 buf->bv_resid = buf->bv->bv_len;
325 WARN_ON(buf->bv_resid == 0);
326 } while (fbcnt);
327
328 /* initialize the headers & frame */
329 skb = f->skb;
330 h = (struct aoe_hdr *) skb_mac_header(skb);
331 ah = (struct aoe_atahdr *) (h+1);
332 skb_put(skb, sizeof *h + sizeof *ah);
333 memset(h, 0, skb->len);
334 f->tag = aoehdr_atainit(d, t, h);
335 fhash(f);
336 t->nout++;
337 f->waited = 0;
338 f->buf = buf;
339 f->bcnt = bcnt;
340 f->lba = buf->sector;
341
342 /* set up ata header */
343 ah->scnt = bcnt >> 9;
344 put_lba(ah, buf->sector);
345 if (d->flags & DEVFL_EXT) {
346 ah->aflags |= AOEAFL_EXT;
347 } else {
348 extbit = 0;
349 ah->lba3 &= 0x0f;
350 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
351 }
352 if (bio_data_dir(buf->bio) == WRITE) {
353 skb_fillup(skb, f->bv, f->bv_off, bcnt);
354 ah->aflags |= AOEAFL_WRITE;
355 skb->len += bcnt;
356 skb->data_len = bcnt;
357 skb->truesize += bcnt;
358 t->wpkts++;
359 } else {
360 t->rpkts++;
361 writebit = 0;
362 }
363
364 ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
365
366 /* mark all tracking fields and load out */
367 buf->nframesout += 1;
368 buf->sector += bcnt >> 9;
369
370 skb->dev = t->ifp->nd;
371 skb = skb_clone(skb, GFP_ATOMIC);
372 if (skb) {
373 __skb_queue_head_init(&queue);
374 __skb_queue_tail(&queue, skb);
375 aoenet_xmit(&queue);
376 }
377 return 1;
378 }
379
380 /* some callers cannot sleep, and they can call this function,
381 * transmitting the packets later, when interrupts are on
382 */
383 static void
384 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
385 {
386 struct aoe_hdr *h;
387 struct aoe_cfghdr *ch;
388 struct sk_buff *skb;
389 struct net_device *ifp;
390
391 rcu_read_lock();
392 for_each_netdev_rcu(&init_net, ifp) {
393 dev_hold(ifp);
394 if (!is_aoe_netif(ifp))
395 goto cont;
396
397 skb = new_skb(sizeof *h + sizeof *ch);
398 if (skb == NULL) {
399 printk(KERN_INFO "aoe: skb alloc failure\n");
400 goto cont;
401 }
402 skb_put(skb, sizeof *h + sizeof *ch);
403 skb->dev = ifp;
404 __skb_queue_tail(queue, skb);
405 h = (struct aoe_hdr *) skb_mac_header(skb);
406 memset(h, 0, sizeof *h + sizeof *ch);
407
408 memset(h->dst, 0xff, sizeof h->dst);
409 memcpy(h->src, ifp->dev_addr, sizeof h->src);
410 h->type = __constant_cpu_to_be16(ETH_P_AOE);
411 h->verfl = AOE_HVER;
412 h->major = cpu_to_be16(aoemajor);
413 h->minor = aoeminor;
414 h->cmd = AOECMD_CFG;
415
416 cont:
417 dev_put(ifp);
418 }
419 rcu_read_unlock();
420 }
421
422 static void
423 resend(struct aoedev *d, struct frame *f)
424 {
425 struct sk_buff *skb;
426 struct sk_buff_head queue;
427 struct aoe_hdr *h;
428 struct aoe_atahdr *ah;
429 struct aoetgt *t;
430 char buf[128];
431 u32 n;
432
433 t = f->t;
434 ifrotate(t);
435 n = newtag(t);
436 skb = f->skb;
437 h = (struct aoe_hdr *) skb_mac_header(skb);
438 ah = (struct aoe_atahdr *) (h+1);
439
440 snprintf(buf, sizeof buf,
441 "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
442 "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
443 h->src, h->dst, t->nout);
444 aoechr_error(buf);
445
446 f->tag = n;
447 fhash(f);
448 h->tag = cpu_to_be32(n);
449 memcpy(h->dst, t->addr, sizeof h->dst);
450 memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
451
452 skb->dev = t->ifp->nd;
453 skb = skb_clone(skb, GFP_ATOMIC);
454 if (skb == NULL)
455 return;
456 __skb_queue_head_init(&queue);
457 __skb_queue_tail(&queue, skb);
458 aoenet_xmit(&queue);
459 }
460
461 static int
462 tsince(u32 tag)
463 {
464 int n;
465
466 n = jiffies & 0xffff;
467 n -= tag & 0xffff;
468 if (n < 0)
469 n += 1<<16;
470 return n;
471 }
472
473 static struct aoeif *
474 getif(struct aoetgt *t, struct net_device *nd)
475 {
476 struct aoeif *p, *e;
477
478 p = t->ifs;
479 e = p + NAOEIFS;
480 for (; p < e; p++)
481 if (p->nd == nd)
482 return p;
483 return NULL;
484 }
485
486 static struct aoeif *
487 addif(struct aoetgt *t, struct net_device *nd)
488 {
489 struct aoeif *p;
490
491 p = getif(t, NULL);
492 if (!p)
493 return NULL;
494 p->nd = nd;
495 p->maxbcnt = DEFAULTBCNT;
496 p->lost = 0;
497 p->lostjumbo = 0;
498 return p;
499 }
500
501 static void
502 ejectif(struct aoetgt *t, struct aoeif *ifp)
503 {
504 struct aoeif *e;
505 ulong n;
506
507 e = t->ifs + NAOEIFS - 1;
508 n = (e - ifp) * sizeof *ifp;
509 memmove(ifp, ifp+1, n);
510 e->nd = NULL;
511 }
512
513 static int
514 sthtith(struct aoedev *d)
515 {
516 struct frame *f, *nf;
517 struct list_head *nx, *pos, *head;
518 struct sk_buff *skb;
519 struct aoetgt *ht = d->htgt;
520 int i;
521
522 for (i = 0; i < NFACTIVE; i++) {
523 head = &ht->factive[i];
524 list_for_each_safe(pos, nx, head) {
525 f = list_entry(pos, struct frame, head);
526 nf = newframe(d);
527 if (!nf)
528 return 0;
529
530 /* remove frame from active list */
531 list_del(pos);
532
533 /* reassign all pertinent bits to new outbound frame */
534 skb = nf->skb;
535 nf->skb = f->skb;
536 nf->buf = f->buf;
537 nf->bcnt = f->bcnt;
538 nf->lba = f->lba;
539 nf->bv = f->bv;
540 nf->bv_off = f->bv_off;
541 nf->waited = 0;
542 f->skb = skb;
543 aoe_freetframe(f);
544 ht->nout--;
545 nf->t->nout++;
546 resend(d, nf);
547 }
548 }
549 /* he's clean, he's useless. take away his interfaces */
550 memset(ht->ifs, 0, sizeof ht->ifs);
551 d->htgt = NULL;
552 return 1;
553 }
554
555 static inline unsigned char
556 ata_scnt(unsigned char *packet) {
557 struct aoe_hdr *h;
558 struct aoe_atahdr *ah;
559
560 h = (struct aoe_hdr *) packet;
561 ah = (struct aoe_atahdr *) (h+1);
562 return ah->scnt;
563 }
564
565 static void
566 rexmit_timer(ulong vp)
567 {
568 struct aoedev *d;
569 struct aoetgt *t, **tt, **te;
570 struct aoeif *ifp;
571 struct frame *f;
572 struct list_head *head, *pos, *nx;
573 LIST_HEAD(flist);
574 register long timeout;
575 ulong flags, n;
576 int i;
577
578 d = (struct aoedev *) vp;
579
580 /* timeout is always ~150% of the moving average */
581 timeout = d->rttavg;
582 timeout += timeout >> 1;
583
584 spin_lock_irqsave(&d->lock, flags);
585
586 if (d->flags & DEVFL_TKILL) {
587 spin_unlock_irqrestore(&d->lock, flags);
588 return;
589 }
590
591 /* collect all frames to rexmit into flist */
592 tt = d->targets;
593 te = tt + NTARGETS;
594 for (; tt < te && *tt; tt++) {
595 t = *tt;
596 for (i = 0; i < NFACTIVE; i++) {
597 head = &t->factive[i];
598 list_for_each_safe(pos, nx, head) {
599 f = list_entry(pos, struct frame, head);
600 if (tsince(f->tag) < timeout)
601 continue;
602 /* move to flist for later processing */
603 list_move_tail(pos, &flist);
604 }
605 }
606
607 /* window check */
608 if (t->nout == t->maxout
609 && t->maxout < t->nframes
610 && (jiffies - t->lastwadj)/HZ > 10) {
611 t->maxout++;
612 t->lastwadj = jiffies;
613 }
614 }
615
616 if (!list_empty(&flist)) { /* retransmissions necessary */
617 n = d->rttavg <<= 1;
618 if (n > MAXTIMER)
619 d->rttavg = MAXTIMER;
620 }
621
622 /* process expired frames */
623 while (!list_empty(&flist)) {
624 pos = flist.next;
625 f = list_entry(pos, struct frame, head);
626 n = f->waited += timeout;
627 n /= HZ;
628 if (n > aoe_deadsecs) {
629 /* Waited too long. Device failure.
630 * Hang all frames on first hash bucket for downdev
631 * to clean up.
632 */
633 list_splice(&flist, &f->t->factive[0]);
634 aoedev_downdev(d);
635 break;
636 }
637 list_del(pos);
638
639 t = f->t;
640 if (n > HELPWAIT) {
641 /* see if another target can help */
642 if (d->ntargets > 1)
643 d->htgt = t;
644 }
645 if (t->nout == t->maxout) {
646 if (t->maxout > 1)
647 t->maxout--;
648 t->lastwadj = jiffies;
649 }
650
651 ifp = getif(t, f->skb->dev);
652 if (ifp && ++ifp->lost > (t->nframes << 1)
653 && (ifp != t->ifs || t->ifs[1].nd)) {
654 ejectif(t, ifp);
655 ifp = NULL;
656 }
657 resend(d, f);
658 }
659
660 if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) {
661 d->flags &= ~DEVFL_KICKME;
662 d->blkq->request_fn(d->blkq);
663 }
664
665 d->timer.expires = jiffies + TIMERTICK;
666 add_timer(&d->timer);
667
668 spin_unlock_irqrestore(&d->lock, flags);
669 }
670
671 static unsigned long
672 rqbiocnt(struct request *r)
673 {
674 struct bio *bio;
675 unsigned long n = 0;
676
677 __rq_for_each_bio(bio, r)
678 n++;
679 return n;
680 }
681
682 /* This can be removed if we are certain that no users of the block
683 * layer will ever use zero-count pages in bios. Otherwise we have to
684 * protect against the put_page sometimes done by the network layer.
685 *
686 * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for
687 * discussion.
688 *
689 * We cannot use get_page in the workaround, because it insists on a
690 * positive page count as a precondition. So we use _count directly.
691 */
692 static void
693 bio_pageinc(struct bio *bio)
694 {
695 struct bio_vec *bv;
696 struct page *page;
697 int i;
698
699 bio_for_each_segment(bv, bio, i) {
700 page = bv->bv_page;
701 /* Non-zero page count for non-head members of
702 * compound pages is no longer allowed by the kernel,
703 * but this has never been seen here.
704 */
705 if (unlikely(PageCompound(page)))
706 if (compound_trans_head(page) != page) {
707 pr_crit("page tail used for block I/O\n");
708 BUG();
709 }
710 atomic_inc(&page->_count);
711 }
712 }
713
714 static void
715 bio_pagedec(struct bio *bio)
716 {
717 struct bio_vec *bv;
718 int i;
719
720 bio_for_each_segment(bv, bio, i)
721 atomic_dec(&bv->bv_page->_count);
722 }
723
724 static void
725 bufinit(struct buf *buf, struct request *rq, struct bio *bio)
726 {
727 struct bio_vec *bv;
728
729 memset(buf, 0, sizeof(*buf));
730 buf->rq = rq;
731 buf->bio = bio;
732 buf->resid = bio->bi_size;
733 buf->sector = bio->bi_sector;
734 bio_pageinc(bio);
735 buf->bv = bv = &bio->bi_io_vec[bio->bi_idx];
736 buf->bv_resid = bv->bv_len;
737 WARN_ON(buf->bv_resid == 0);
738 }
739
740 static struct buf *
741 nextbuf(struct aoedev *d)
742 {
743 struct request *rq;
744 struct request_queue *q;
745 struct buf *buf;
746 struct bio *bio;
747
748 q = d->blkq;
749 if (q == NULL)
750 return NULL; /* initializing */
751 if (d->ip.buf)
752 return d->ip.buf;
753 rq = d->ip.rq;
754 if (rq == NULL) {
755 rq = blk_peek_request(q);
756 if (rq == NULL)
757 return NULL;
758 blk_start_request(rq);
759 d->ip.rq = rq;
760 d->ip.nxbio = rq->bio;
761 rq->special = (void *) rqbiocnt(rq);
762 }
763 buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
764 if (buf == NULL) {
765 pr_err("aoe: nextbuf: unable to mempool_alloc!\n");
766 return NULL;
767 }
768 bio = d->ip.nxbio;
769 bufinit(buf, rq, bio);
770 bio = bio->bi_next;
771 d->ip.nxbio = bio;
772 if (bio == NULL)
773 d->ip.rq = NULL;
774 return d->ip.buf = buf;
775 }
776
777 /* enters with d->lock held */
778 void
779 aoecmd_work(struct aoedev *d)
780 {
781 if (d->htgt && !sthtith(d))
782 return;
783 while (aoecmd_ata_rw(d))
784 ;
785 }
786
787 /* this function performs work that has been deferred until sleeping is OK
788 */
789 void
790 aoecmd_sleepwork(struct work_struct *work)
791 {
792 struct aoedev *d = container_of(work, struct aoedev, work);
793
794 if (d->flags & DEVFL_GDALLOC)
795 aoeblk_gdalloc(d);
796
797 if (d->flags & DEVFL_NEWSIZE) {
798 struct block_device *bd;
799 unsigned long flags;
800 u64 ssize;
801
802 ssize = get_capacity(d->gd);
803 bd = bdget_disk(d->gd, 0);
804
805 if (bd) {
806 mutex_lock(&bd->bd_inode->i_mutex);
807 i_size_write(bd->bd_inode, (loff_t)ssize<<9);
808 mutex_unlock(&bd->bd_inode->i_mutex);
809 bdput(bd);
810 }
811 spin_lock_irqsave(&d->lock, flags);
812 d->flags |= DEVFL_UP;
813 d->flags &= ~DEVFL_NEWSIZE;
814 spin_unlock_irqrestore(&d->lock, flags);
815 }
816 }
817
818 static void
819 ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
820 {
821 u64 ssize;
822 u16 n;
823
824 /* word 83: command set supported */
825 n = get_unaligned_le16(&id[83 << 1]);
826
827 /* word 86: command set/feature enabled */
828 n |= get_unaligned_le16(&id[86 << 1]);
829
830 if (n & (1<<10)) { /* bit 10: LBA 48 */
831 d->flags |= DEVFL_EXT;
832
833 /* word 100: number lba48 sectors */
834 ssize = get_unaligned_le64(&id[100 << 1]);
835
836 /* set as in ide-disk.c:init_idedisk_capacity */
837 d->geo.cylinders = ssize;
838 d->geo.cylinders /= (255 * 63);
839 d->geo.heads = 255;
840 d->geo.sectors = 63;
841 } else {
842 d->flags &= ~DEVFL_EXT;
843
844 /* number lba28 sectors */
845 ssize = get_unaligned_le32(&id[60 << 1]);
846
847 /* NOTE: obsolete in ATA 6 */
848 d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
849 d->geo.heads = get_unaligned_le16(&id[55 << 1]);
850 d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
851 }
852
853 if (d->ssize != ssize)
854 printk(KERN_INFO
855 "aoe: %pm e%ld.%d v%04x has %llu sectors\n",
856 t->addr,
857 d->aoemajor, d->aoeminor,
858 d->fw_ver, (long long)ssize);
859 d->ssize = ssize;
860 d->geo.start = 0;
861 if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
862 return;
863 if (d->gd != NULL) {
864 set_capacity(d->gd, ssize);
865 d->flags |= DEVFL_NEWSIZE;
866 } else
867 d->flags |= DEVFL_GDALLOC;
868 schedule_work(&d->work);
869 }
870
871 static void
872 calc_rttavg(struct aoedev *d, int rtt)
873 {
874 register long n;
875
876 n = rtt;
877 if (n < 0) {
878 n = -rtt;
879 if (n < MINTIMER)
880 n = MINTIMER;
881 else if (n > MAXTIMER)
882 n = MAXTIMER;
883 d->mintimer += (n - d->mintimer) >> 1;
884 } else if (n < d->mintimer)
885 n = d->mintimer;
886 else if (n > MAXTIMER)
887 n = MAXTIMER;
888
889 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
890 n -= d->rttavg;
891 d->rttavg += n >> 2;
892 }
893
894 static struct aoetgt *
895 gettgt(struct aoedev *d, char *addr)
896 {
897 struct aoetgt **t, **e;
898
899 t = d->targets;
900 e = t + NTARGETS;
901 for (; t < e && *t; t++)
902 if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
903 return *t;
904 return NULL;
905 }
906
907 static void
908 bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt)
909 {
910 ulong fcnt;
911 char *p;
912 int soff = 0;
913 loop:
914 fcnt = bv->bv_len - (off - bv->bv_offset);
915 if (fcnt > cnt)
916 fcnt = cnt;
917 p = page_address(bv->bv_page) + off;
918 skb_copy_bits(skb, soff, p, fcnt);
919 soff += fcnt;
920 cnt -= fcnt;
921 if (cnt <= 0)
922 return;
923 bv++;
924 off = bv->bv_offset;
925 goto loop;
926 }
927
928 void
929 aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
930 {
931 struct bio *bio;
932 int bok;
933 struct request_queue *q;
934
935 q = d->blkq;
936 if (rq == d->ip.rq)
937 d->ip.rq = NULL;
938 do {
939 bio = rq->bio;
940 bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags);
941 } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size));
942
943 /* cf. http://lkml.org/lkml/2006/10/31/28 */
944 if (!fastfail)
945 q->request_fn(q);
946 }
947
948 static void
949 aoe_end_buf(struct aoedev *d, struct buf *buf)
950 {
951 struct request *rq;
952 unsigned long n;
953
954 if (buf == d->ip.buf)
955 d->ip.buf = NULL;
956 rq = buf->rq;
957 bio_pagedec(buf->bio);
958 mempool_free(buf, d->bufpool);
959 n = (unsigned long) rq->special;
960 rq->special = (void *) --n;
961 if (n == 0)
962 aoe_end_request(d, rq, 0);
963 }
964
965 static void
966 ktiocomplete(struct frame *f)
967 {
968 struct aoe_hdr *hin, *hout;
969 struct aoe_atahdr *ahin, *ahout;
970 struct buf *buf;
971 struct sk_buff *skb;
972 struct aoetgt *t;
973 struct aoeif *ifp;
974 struct aoedev *d;
975 long n;
976
977 if (f == NULL)
978 return;
979
980 t = f->t;
981 d = t->d;
982
983 hout = (struct aoe_hdr *) skb_mac_header(f->skb);
984 ahout = (struct aoe_atahdr *) (hout+1);
985 buf = f->buf;
986 skb = f->r_skb;
987 if (skb == NULL)
988 goto noskb; /* just fail the buf. */
989
990 hin = (struct aoe_hdr *) skb->data;
991 skb_pull(skb, sizeof(*hin));
992 ahin = (struct aoe_atahdr *) skb->data;
993 skb_pull(skb, sizeof(*ahin));
994 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
995 pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
996 ahout->cmdstat, ahin->cmdstat,
997 d->aoemajor, d->aoeminor);
998 noskb: if (buf)
999 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1000 goto badrsp;
1001 }
1002
1003 n = ahout->scnt << 9;
1004 switch (ahout->cmdstat) {
1005 case ATA_CMD_PIO_READ:
1006 case ATA_CMD_PIO_READ_EXT:
1007 if (skb->len < n) {
1008 pr_err("aoe: runt data size in read. skb->len=%d need=%ld\n",
1009 skb->len, n);
1010 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1011 break;
1012 }
1013 bvcpy(f->bv, f->bv_off, skb, n);
1014 case ATA_CMD_PIO_WRITE:
1015 case ATA_CMD_PIO_WRITE_EXT:
1016 spin_lock_irq(&d->lock);
1017 ifp = getif(t, skb->dev);
1018 if (ifp) {
1019 ifp->lost = 0;
1020 if (n > DEFAULTBCNT)
1021 ifp->lostjumbo = 0;
1022 }
1023 if (d->htgt == t) /* I'll help myself, thank you. */
1024 d->htgt = NULL;
1025 spin_unlock_irq(&d->lock);
1026 break;
1027 case ATA_CMD_ID_ATA:
1028 if (skb->len < 512) {
1029 pr_info("aoe: runt data size in ataid. skb->len=%d\n",
1030 skb->len);
1031 break;
1032 }
1033 if (skb_linearize(skb))
1034 break;
1035 spin_lock_irq(&d->lock);
1036 ataid_complete(d, t, skb->data);
1037 spin_unlock_irq(&d->lock);
1038 break;
1039 default:
1040 pr_info("aoe: unrecognized ata command %2.2Xh for %d.%d\n",
1041 ahout->cmdstat,
1042 be16_to_cpu(get_unaligned(&hin->major)),
1043 hin->minor);
1044 }
1045 badrsp:
1046 spin_lock_irq(&d->lock);
1047
1048 aoe_freetframe(f);
1049
1050 if (buf && --buf->nframesout == 0 && buf->resid == 0)
1051 aoe_end_buf(d, buf);
1052
1053 aoecmd_work(d);
1054
1055 spin_unlock_irq(&d->lock);
1056 aoedev_put(d);
1057 dev_kfree_skb(skb);
1058 }
1059
1060 /* Enters with iocq.lock held.
1061 * Returns true iff responses needing processing remain.
1062 */
1063 static int
1064 ktio(void)
1065 {
1066 struct frame *f;
1067 struct list_head *pos;
1068 int i;
1069
1070 for (i = 0; ; ++i) {
1071 if (i == MAXIOC)
1072 return 1;
1073 if (list_empty(&iocq.head))
1074 return 0;
1075 pos = iocq.head.next;
1076 list_del(pos);
1077 spin_unlock_irq(&iocq.lock);
1078 f = list_entry(pos, struct frame, head);
1079 ktiocomplete(f);
1080 spin_lock_irq(&iocq.lock);
1081 }
1082 }
1083
1084 static int
1085 kthread(void *vp)
1086 {
1087 struct ktstate *k;
1088 DECLARE_WAITQUEUE(wait, current);
1089 int more;
1090
1091 k = vp;
1092 current->flags |= PF_NOFREEZE;
1093 set_user_nice(current, -10);
1094 complete(&k->rendez); /* tell spawner we're running */
1095 do {
1096 spin_lock_irq(k->lock);
1097 more = k->fn();
1098 if (!more) {
1099 add_wait_queue(k->waitq, &wait);
1100 __set_current_state(TASK_INTERRUPTIBLE);
1101 }
1102 spin_unlock_irq(k->lock);
1103 if (!more) {
1104 schedule();
1105 remove_wait_queue(k->waitq, &wait);
1106 } else
1107 cond_resched();
1108 } while (!kthread_should_stop());
1109 complete(&k->rendez); /* tell spawner we're stopping */
1110 return 0;
1111 }
1112
1113 void
1114 aoe_ktstop(struct ktstate *k)
1115 {
1116 kthread_stop(k->task);
1117 wait_for_completion(&k->rendez);
1118 }
1119
1120 int
1121 aoe_ktstart(struct ktstate *k)
1122 {
1123 struct task_struct *task;
1124
1125 init_completion(&k->rendez);
1126 task = kthread_run(kthread, k, k->name);
1127 if (task == NULL || IS_ERR(task))
1128 return -ENOMEM;
1129 k->task = task;
1130 wait_for_completion(&k->rendez); /* allow kthread to start */
1131 init_completion(&k->rendez); /* for waiting for exit later */
1132 return 0;
1133 }
1134
1135 /* pass it off to kthreads for processing */
1136 static void
1137 ktcomplete(struct frame *f, struct sk_buff *skb)
1138 {
1139 ulong flags;
1140
1141 f->r_skb = skb;
1142 spin_lock_irqsave(&iocq.lock, flags);
1143 list_add_tail(&f->head, &iocq.head);
1144 spin_unlock_irqrestore(&iocq.lock, flags);
1145 wake_up(&ktiowq);
1146 }
1147
1148 struct sk_buff *
1149 aoecmd_ata_rsp(struct sk_buff *skb)
1150 {
1151 struct aoedev *d;
1152 struct aoe_hdr *h;
1153 struct frame *f;
1154 struct aoetgt *t;
1155 u32 n;
1156 ulong flags;
1157 char ebuf[128];
1158 u16 aoemajor;
1159
1160 h = (struct aoe_hdr *) skb->data;
1161 aoemajor = be16_to_cpu(get_unaligned(&h->major));
1162 d = aoedev_by_aoeaddr(aoemajor, h->minor);
1163 if (d == NULL) {
1164 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
1165 "for unknown device %d.%d\n",
1166 aoemajor, h->minor);
1167 aoechr_error(ebuf);
1168 return skb;
1169 }
1170
1171 spin_lock_irqsave(&d->lock, flags);
1172
1173 n = be32_to_cpu(get_unaligned(&h->tag));
1174 t = gettgt(d, h->src);
1175 if (t == NULL) {
1176 printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
1177 d->aoemajor, d->aoeminor, h->src);
1178 spin_unlock_irqrestore(&d->lock, flags);
1179 aoedev_put(d);
1180 return skb;
1181 }
1182 f = getframe(t, n);
1183 if (f == NULL) {
1184 calc_rttavg(d, -tsince(n));
1185 spin_unlock_irqrestore(&d->lock, flags);
1186 aoedev_put(d);
1187 snprintf(ebuf, sizeof ebuf,
1188 "%15s e%d.%d tag=%08x@%08lx\n",
1189 "unexpected rsp",
1190 get_unaligned_be16(&h->major),
1191 h->minor,
1192 get_unaligned_be32(&h->tag),
1193 jiffies);
1194 aoechr_error(ebuf);
1195 return skb;
1196 }
1197 calc_rttavg(d, tsince(f->tag));
1198 t->nout--;
1199 aoecmd_work(d);
1200
1201 spin_unlock_irqrestore(&d->lock, flags);
1202
1203 ktcomplete(f, skb);
1204
1205 /*
1206 * Note here that we do not perform an aoedev_put, as we are
1207 * leaving this reference for the ktio to release.
1208 */
1209 return NULL;
1210 }
1211
1212 void
1213 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
1214 {
1215 struct sk_buff_head queue;
1216
1217 __skb_queue_head_init(&queue);
1218 aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
1219 aoenet_xmit(&queue);
1220 }
1221
1222 struct sk_buff *
1223 aoecmd_ata_id(struct aoedev *d)
1224 {
1225 struct aoe_hdr *h;
1226 struct aoe_atahdr *ah;
1227 struct frame *f;
1228 struct sk_buff *skb;
1229 struct aoetgt *t;
1230
1231 f = newframe(d);
1232 if (f == NULL)
1233 return NULL;
1234
1235 t = *d->tgt;
1236
1237 /* initialize the headers & frame */
1238 skb = f->skb;
1239 h = (struct aoe_hdr *) skb_mac_header(skb);
1240 ah = (struct aoe_atahdr *) (h+1);
1241 skb_put(skb, sizeof *h + sizeof *ah);
1242 memset(h, 0, skb->len);
1243 f->tag = aoehdr_atainit(d, t, h);
1244 fhash(f);
1245 t->nout++;
1246 f->waited = 0;
1247
1248 /* set up ata header */
1249 ah->scnt = 1;
1250 ah->cmdstat = ATA_CMD_ID_ATA;
1251 ah->lba3 = 0xa0;
1252
1253 skb->dev = t->ifp->nd;
1254
1255 d->rttavg = MAXTIMER;
1256 d->timer.function = rexmit_timer;
1257
1258 return skb_clone(skb, GFP_ATOMIC);
1259 }
1260
1261 static struct aoetgt *
1262 addtgt(struct aoedev *d, char *addr, ulong nframes)
1263 {
1264 struct aoetgt *t, **tt, **te;
1265 int i;
1266
1267 tt = d->targets;
1268 te = tt + NTARGETS;
1269 for (; tt < te && *tt; tt++)
1270 ;
1271
1272 if (tt == te) {
1273 printk(KERN_INFO
1274 "aoe: device addtgt failure; too many targets\n");
1275 return NULL;
1276 }
1277 t = kzalloc(sizeof(*t), GFP_ATOMIC);
1278 if (!t) {
1279 printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
1280 return NULL;
1281 }
1282
1283 d->ntargets++;
1284 t->nframes = nframes;
1285 t->d = d;
1286 memcpy(t->addr, addr, sizeof t->addr);
1287 t->ifp = t->ifs;
1288 t->maxout = t->nframes;
1289 INIT_LIST_HEAD(&t->ffree);
1290 for (i = 0; i < NFACTIVE; ++i)
1291 INIT_LIST_HEAD(&t->factive[i]);
1292 return *tt = t;
1293 }
1294
1295 void
1296 aoecmd_cfg_rsp(struct sk_buff *skb)
1297 {
1298 struct aoedev *d;
1299 struct aoe_hdr *h;
1300 struct aoe_cfghdr *ch;
1301 struct aoetgt *t;
1302 struct aoeif *ifp;
1303 ulong flags, sysminor, aoemajor;
1304 struct sk_buff *sl;
1305 struct sk_buff_head queue;
1306 u16 n;
1307
1308 sl = NULL;
1309 h = (struct aoe_hdr *) skb_mac_header(skb);
1310 ch = (struct aoe_cfghdr *) (h+1);
1311
1312 /*
1313 * Enough people have their dip switches set backwards to
1314 * warrant a loud message for this special case.
1315 */
1316 aoemajor = get_unaligned_be16(&h->major);
1317 if (aoemajor == 0xfff) {
1318 printk(KERN_ERR "aoe: Warning: shelf address is all ones. "
1319 "Check shelf dip switches.\n");
1320 return;
1321 }
1322
1323 sysminor = SYSMINOR(aoemajor, h->minor);
1324 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
1325 printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
1326 aoemajor, (int) h->minor);
1327 return;
1328 }
1329
1330 n = be16_to_cpu(ch->bufcnt);
1331 if (n > aoe_maxout) /* keep it reasonable */
1332 n = aoe_maxout;
1333
1334 d = aoedev_by_sysminor_m(sysminor);
1335 if (d == NULL) {
1336 printk(KERN_INFO "aoe: device sysminor_m failure\n");
1337 return;
1338 }
1339
1340 spin_lock_irqsave(&d->lock, flags);
1341
1342 t = gettgt(d, h->src);
1343 if (!t) {
1344 t = addtgt(d, h->src, n);
1345 if (!t)
1346 goto bail;
1347 }
1348 ifp = getif(t, skb->dev);
1349 if (!ifp) {
1350 ifp = addif(t, skb->dev);
1351 if (!ifp) {
1352 printk(KERN_INFO
1353 "aoe: device addif failure; "
1354 "too many interfaces?\n");
1355 goto bail;
1356 }
1357 }
1358 if (ifp->maxbcnt) {
1359 n = ifp->nd->mtu;
1360 n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
1361 n /= 512;
1362 if (n > ch->scnt)
1363 n = ch->scnt;
1364 n = n ? n * 512 : DEFAULTBCNT;
1365 if (n != ifp->maxbcnt) {
1366 printk(KERN_INFO
1367 "aoe: e%ld.%d: setting %d%s%s:%pm\n",
1368 d->aoemajor, d->aoeminor, n,
1369 " byte data frames on ", ifp->nd->name,
1370 t->addr);
1371 ifp->maxbcnt = n;
1372 }
1373 }
1374
1375 /* don't change users' perspective */
1376 if (d->nopen == 0) {
1377 d->fw_ver = be16_to_cpu(ch->fwver);
1378 sl = aoecmd_ata_id(d);
1379 }
1380 bail:
1381 spin_unlock_irqrestore(&d->lock, flags);
1382 aoedev_put(d);
1383 if (sl) {
1384 __skb_queue_head_init(&queue);
1385 __skb_queue_tail(&queue, sl);
1386 aoenet_xmit(&queue);
1387 }
1388 }
1389
1390 void
1391 aoecmd_cleanslate(struct aoedev *d)
1392 {
1393 struct aoetgt **t, **te;
1394 struct aoeif *p, *e;
1395
1396 d->mintimer = MINTIMER;
1397
1398 t = d->targets;
1399 te = t + NTARGETS;
1400 for (; t < te && *t; t++) {
1401 (*t)->maxout = (*t)->nframes;
1402 p = (*t)->ifs;
1403 e = p + NAOEIFS;
1404 for (; p < e; p++) {
1405 p->lostjumbo = 0;
1406 p->lost = 0;
1407 p->maxbcnt = DEFAULTBCNT;
1408 }
1409 }
1410 }
1411
1412 void
1413 aoe_failbuf(struct aoedev *d, struct buf *buf)
1414 {
1415 if (buf == NULL)
1416 return;
1417 buf->resid = 0;
1418 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1419 if (buf->nframesout == 0)
1420 aoe_end_buf(d, buf);
1421 }
1422
1423 void
1424 aoe_flush_iocq(void)
1425 {
1426 struct frame *f;
1427 struct aoedev *d;
1428 LIST_HEAD(flist);
1429 struct list_head *pos;
1430 struct sk_buff *skb;
1431 ulong flags;
1432
1433 spin_lock_irqsave(&iocq.lock, flags);
1434 list_splice_init(&iocq.head, &flist);
1435 spin_unlock_irqrestore(&iocq.lock, flags);
1436 while (!list_empty(&flist)) {
1437 pos = flist.next;
1438 list_del(pos);
1439 f = list_entry(pos, struct frame, head);
1440 d = f->t->d;
1441 skb = f->r_skb;
1442 spin_lock_irqsave(&d->lock, flags);
1443 if (f->buf) {
1444 f->buf->nframesout--;
1445 aoe_failbuf(d, f->buf);
1446 }
1447 aoe_freetframe(f);
1448 spin_unlock_irqrestore(&d->lock, flags);
1449 dev_kfree_skb(skb);
1450 aoedev_put(d);
1451 }
1452 }
1453
1454 int __init
1455 aoecmd_init(void)
1456 {
1457 INIT_LIST_HEAD(&iocq.head);
1458 spin_lock_init(&iocq.lock);
1459 init_waitqueue_head(&ktiowq);
1460 kts.name = "aoe_ktio";
1461 kts.fn = ktio;
1462 kts.waitq = &ktiowq;
1463 kts.lock = &iocq.lock;
1464 return aoe_ktstart(&kts);
1465 }
1466
1467 void
1468 aoecmd_exit(void)
1469 {
1470 aoe_ktstop(&kts);
1471 aoe_flush_iocq();
1472 }