]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - arch/sparc/kernel/ldc.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide
[mirror_ubuntu-focal-kernel.git] / arch / sparc / kernel / ldc.c
CommitLineData
e53e97ce
DM
1/* ldc.c: Logical Domain Channel link-layer protocol driver.
2 *
b7c2a757 3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
e53e97ce
DM
4 */
5
6#include <linux/kernel.h>
066bcaca 7#include <linux/export.h>
e53e97ce
DM
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/delay.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/scatterlist.h>
14#include <linux/interrupt.h>
15#include <linux/list.h>
16#include <linux/init.h>
e756fd80 17#include <linux/bitmap.h>
671d7732
SV
18#include <linux/hash.h>
19#include <linux/iommu-common.h>
e53e97ce
DM
20
21#include <asm/hypervisor.h>
22#include <asm/iommu.h>
23#include <asm/page.h>
24#include <asm/ldc.h>
25#include <asm/mdesc.h>
26
27#define DRV_MODULE_NAME "ldc"
28#define PFX DRV_MODULE_NAME ": "
b7c2a757
DM
29#define DRV_MODULE_VERSION "1.1"
30#define DRV_MODULE_RELDATE "July 22, 2008"
e53e97ce 31
671d7732
SV
32#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
33#define COOKIE_PGSZ_CODE_SHIFT 60ULL
34
35static DEFINE_PER_CPU(unsigned int, ldc_pool_hash);
36
7c9503b8 37static char version[] =
e53e97ce
DM
38 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
39#define LDC_PACKET_SIZE 64
40
41/* Packet header layout for unreliable and reliable mode frames.
42 * When in RAW mode, packets are simply straight 64-byte payloads
43 * with no headers.
44 */
45struct ldc_packet {
46 u8 type;
47#define LDC_CTRL 0x01
48#define LDC_DATA 0x02
49#define LDC_ERR 0x10
50
51 u8 stype;
52#define LDC_INFO 0x01
53#define LDC_ACK 0x02
54#define LDC_NACK 0x04
55
56 u8 ctrl;
57#define LDC_VERS 0x01 /* Link Version */
58#define LDC_RTS 0x02 /* Request To Send */
59#define LDC_RTR 0x03 /* Ready To Receive */
60#define LDC_RDX 0x04 /* Ready for Data eXchange */
61#define LDC_CTRL_MSK 0x0f
62
63 u8 env;
64#define LDC_LEN 0x3f
65#define LDC_FRAG_MASK 0xc0
66#define LDC_START 0x40
67#define LDC_STOP 0x80
68
69 u32 seqid;
70
71 union {
72 u8 u_data[LDC_PACKET_SIZE - 8];
73 struct {
74 u32 pad;
75 u32 ackid;
76 u8 r_data[LDC_PACKET_SIZE - 8 - 8];
77 } r;
78 } u;
79};
80
81struct ldc_version {
82 u16 major;
83 u16 minor;
84};
85
86/* Ordered from largest major to lowest. */
87static struct ldc_version ver_arr[] = {
88 { .major = 1, .minor = 0 },
89};
90
91#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
92#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
93
94struct ldc_channel;
95
96struct ldc_mode_ops {
97 int (*write)(struct ldc_channel *, const void *, unsigned int);
98 int (*read)(struct ldc_channel *, void *, unsigned int);
99};
100
101static const struct ldc_mode_ops raw_ops;
102static const struct ldc_mode_ops nonraw_ops;
103static const struct ldc_mode_ops stream_ops;
104
105int ldom_domaining_enabled;
106
107struct ldc_iommu {
671d7732 108 /* Protects ldc_unmap. */
e53e97ce 109 spinlock_t lock;
e53e97ce 110 struct ldc_mtable_entry *page_table;
671d7732 111 struct iommu_table iommu_table;
e53e97ce
DM
112};
113
114struct ldc_channel {
115 /* Protects all operations that depend upon channel state. */
116 spinlock_t lock;
117
118 unsigned long id;
119
120 u8 *mssbuf;
121 u32 mssbuf_len;
122 u32 mssbuf_off;
123
124 struct ldc_packet *tx_base;
125 unsigned long tx_head;
126 unsigned long tx_tail;
127 unsigned long tx_num_entries;
128 unsigned long tx_ra;
129
130 unsigned long tx_acked;
131
132 struct ldc_packet *rx_base;
133 unsigned long rx_head;
134 unsigned long rx_tail;
135 unsigned long rx_num_entries;
136 unsigned long rx_ra;
137
138 u32 rcv_nxt;
139 u32 snd_nxt;
140
141 unsigned long chan_state;
142
143 struct ldc_channel_config cfg;
144 void *event_arg;
145
146 const struct ldc_mode_ops *mops;
147
148 struct ldc_iommu iommu;
149
150 struct ldc_version ver;
151
152 u8 hs_state;
153#define LDC_HS_CLOSED 0x00
154#define LDC_HS_OPEN 0x01
155#define LDC_HS_GOTVERS 0x02
156#define LDC_HS_SENTRTR 0x03
157#define LDC_HS_GOTRTR 0x04
158#define LDC_HS_COMPLETE 0x10
159
160 u8 flags;
161#define LDC_FLAG_ALLOCED_QUEUES 0x01
162#define LDC_FLAG_REGISTERED_QUEUES 0x02
163#define LDC_FLAG_REGISTERED_IRQS 0x04
164#define LDC_FLAG_RESET 0x10
165
166 u8 mss;
167 u8 state;
168
133f09a1
DM
169#define LDC_IRQ_NAME_MAX 32
170 char rx_irq_name[LDC_IRQ_NAME_MAX];
171 char tx_irq_name[LDC_IRQ_NAME_MAX];
172
e53e97ce
DM
173 struct hlist_head mh_list;
174
175 struct hlist_node list;
176};
177
178#define ldcdbg(TYPE, f, a...) \
179do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
180 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
181} while (0)
182
183static const char *state_to_str(u8 state)
184{
185 switch (state) {
186 case LDC_STATE_INVALID:
187 return "INVALID";
188 case LDC_STATE_INIT:
189 return "INIT";
190 case LDC_STATE_BOUND:
191 return "BOUND";
192 case LDC_STATE_READY:
193 return "READY";
194 case LDC_STATE_CONNECTED:
195 return "CONNECTED";
196 default:
197 return "<UNKNOWN>";
198 }
199}
200
201static void ldc_set_state(struct ldc_channel *lp, u8 state)
202{
203 ldcdbg(STATE, "STATE (%s) --> (%s)\n",
204 state_to_str(lp->state),
205 state_to_str(state));
206
207 lp->state = state;
208}
209
210static unsigned long __advance(unsigned long off, unsigned long num_entries)
211{
212 off += LDC_PACKET_SIZE;
213 if (off == (num_entries * LDC_PACKET_SIZE))
214 off = 0;
215
216 return off;
217}
218
219static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
220{
221 return __advance(off, lp->rx_num_entries);
222}
223
224static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
225{
226 return __advance(off, lp->tx_num_entries);
227}
228
229static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
230 unsigned long *new_tail)
231{
232 struct ldc_packet *p;
233 unsigned long t;
234
235 t = tx_advance(lp, lp->tx_tail);
236 if (t == lp->tx_head)
237 return NULL;
238
239 *new_tail = t;
240
241 p = lp->tx_base;
242 return p + (lp->tx_tail / LDC_PACKET_SIZE);
243}
244
245/* When we are in reliable or stream mode, have to track the next packet
246 * we haven't gotten an ACK for in the TX queue using tx_acked. We have
247 * to be careful not to stomp over the queue past that point. During
248 * the handshake, we don't have TX data packets pending in the queue
249 * and that's why handshake_get_tx_packet() need not be mindful of
250 * lp->tx_acked.
251 */
252static unsigned long head_for_data(struct ldc_channel *lp)
253{
cb481235 254 if (lp->cfg.mode == LDC_MODE_STREAM)
e53e97ce
DM
255 return lp->tx_acked;
256 return lp->tx_head;
257}
258
259static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
260{
261 unsigned long limit, tail, new_tail, diff;
262 unsigned int mss;
263
264 limit = head_for_data(lp);
265 tail = lp->tx_tail;
266 new_tail = tx_advance(lp, tail);
267 if (new_tail == limit)
268 return 0;
269
270 if (limit > new_tail)
271 diff = limit - new_tail;
272 else
273 diff = (limit +
274 ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
275 diff /= LDC_PACKET_SIZE;
276 mss = lp->mss;
277
278 if (diff * mss < size)
279 return 0;
280
281 return 1;
282}
283
284static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
285 unsigned long *new_tail)
286{
287 struct ldc_packet *p;
288 unsigned long h, t;
289
290 h = head_for_data(lp);
291 t = tx_advance(lp, lp->tx_tail);
292 if (t == h)
293 return NULL;
294
295 *new_tail = t;
296
297 p = lp->tx_base;
298 return p + (lp->tx_tail / LDC_PACKET_SIZE);
299}
300
301static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
302{
303 unsigned long orig_tail = lp->tx_tail;
304 int limit = 1000;
305
306 lp->tx_tail = tail;
307 while (limit-- > 0) {
308 unsigned long err;
309
310 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
311 if (!err)
312 return 0;
313
314 if (err != HV_EWOULDBLOCK) {
315 lp->tx_tail = orig_tail;
316 return -EINVAL;
317 }
318 udelay(1);
319 }
320
321 lp->tx_tail = orig_tail;
322 return -EBUSY;
323}
324
325/* This just updates the head value in the hypervisor using
326 * a polling loop with a timeout. The caller takes care of
327 * upating software state representing the head change, if any.
328 */
329static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
330{
331 int limit = 1000;
332
333 while (limit-- > 0) {
334 unsigned long err;
335
336 err = sun4v_ldc_rx_set_qhead(lp->id, head);
337 if (!err)
338 return 0;
339
340 if (err != HV_EWOULDBLOCK)
341 return -EINVAL;
342
343 udelay(1);
344 }
345
346 return -EBUSY;
347}
348
349static int send_tx_packet(struct ldc_channel *lp,
350 struct ldc_packet *p,
351 unsigned long new_tail)
352{
353 BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
354
355 return set_tx_tail(lp, new_tail);
356}
357
358static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
359 u8 stype, u8 ctrl,
360 void *data, int dlen,
361 unsigned long *new_tail)
362{
363 struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
364
365 if (p) {
366 memset(p, 0, sizeof(*p));
367 p->type = LDC_CTRL;
368 p->stype = stype;
369 p->ctrl = ctrl;
370 if (data)
371 memcpy(p->u.u_data, data, dlen);
372 }
373 return p;
374}
375
376static int start_handshake(struct ldc_channel *lp)
377{
378 struct ldc_packet *p;
379 struct ldc_version *ver;
380 unsigned long new_tail;
381
382 ver = &ver_arr[0];
383
384 ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
385 ver->major, ver->minor);
386
387 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
388 ver, sizeof(*ver), &new_tail);
389 if (p) {
390 int err = send_tx_packet(lp, p, new_tail);
391 if (!err)
392 lp->flags &= ~LDC_FLAG_RESET;
393 return err;
394 }
395 return -EBUSY;
396}
397
398static int send_version_nack(struct ldc_channel *lp,
399 u16 major, u16 minor)
400{
401 struct ldc_packet *p;
402 struct ldc_version ver;
403 unsigned long new_tail;
404
405 ver.major = major;
406 ver.minor = minor;
407
408 p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
409 &ver, sizeof(ver), &new_tail);
410 if (p) {
411 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
412 ver.major, ver.minor);
413
414 return send_tx_packet(lp, p, new_tail);
415 }
416 return -EBUSY;
417}
418
419static int send_version_ack(struct ldc_channel *lp,
420 struct ldc_version *vp)
421{
422 struct ldc_packet *p;
423 unsigned long new_tail;
424
425 p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
426 vp, sizeof(*vp), &new_tail);
427 if (p) {
428 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
429 vp->major, vp->minor);
430
431 return send_tx_packet(lp, p, new_tail);
432 }
433 return -EBUSY;
434}
435
436static int send_rts(struct ldc_channel *lp)
437{
438 struct ldc_packet *p;
439 unsigned long new_tail;
440
441 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
442 &new_tail);
443 if (p) {
444 p->env = lp->cfg.mode;
445 p->seqid = 0;
446 lp->rcv_nxt = 0;
447
448 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
449 p->env, p->seqid);
450
451 return send_tx_packet(lp, p, new_tail);
452 }
453 return -EBUSY;
454}
455
456static int send_rtr(struct ldc_channel *lp)
457{
458 struct ldc_packet *p;
459 unsigned long new_tail;
460
461 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
462 &new_tail);
463 if (p) {
464 p->env = lp->cfg.mode;
465 p->seqid = 0;
466
467 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
468 p->env, p->seqid);
469
470 return send_tx_packet(lp, p, new_tail);
471 }
472 return -EBUSY;
473}
474
475static int send_rdx(struct ldc_channel *lp)
476{
477 struct ldc_packet *p;
478 unsigned long new_tail;
479
480 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
481 &new_tail);
482 if (p) {
483 p->env = 0;
484 p->seqid = ++lp->snd_nxt;
485 p->u.r.ackid = lp->rcv_nxt;
486
487 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
488 p->env, p->seqid, p->u.r.ackid);
489
490 return send_tx_packet(lp, p, new_tail);
491 }
492 return -EBUSY;
493}
494
495static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
496{
497 struct ldc_packet *p;
498 unsigned long new_tail;
499 int err;
500
501 p = data_get_tx_packet(lp, &new_tail);
502 if (!p)
503 return -EBUSY;
504 memset(p, 0, sizeof(*p));
505 p->type = data_pkt->type;
506 p->stype = LDC_NACK;
507 p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
cb481235 508 p->seqid = lp->snd_nxt + 1;
e53e97ce
DM
509 p->u.r.ackid = lp->rcv_nxt;
510
511 ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
512 p->type, p->ctrl, p->seqid, p->u.r.ackid);
513
514 err = send_tx_packet(lp, p, new_tail);
515 if (!err)
516 lp->snd_nxt++;
517
518 return err;
519}
520
521static int ldc_abort(struct ldc_channel *lp)
522{
523 unsigned long hv_err;
524
525 ldcdbg(STATE, "ABORT\n");
526
527 /* We report but do not act upon the hypervisor errors because
528 * there really isn't much we can do if they fail at this point.
529 */
530 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
531 if (hv_err)
532 printk(KERN_ERR PFX "ldc_abort: "
533 "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
534 lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
535
536 hv_err = sun4v_ldc_tx_get_state(lp->id,
537 &lp->tx_head,
538 &lp->tx_tail,
539 &lp->chan_state);
540 if (hv_err)
541 printk(KERN_ERR PFX "ldc_abort: "
542 "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
543 lp->id, hv_err);
544
545 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
546 if (hv_err)
547 printk(KERN_ERR PFX "ldc_abort: "
548 "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
549 lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
550
551 /* Refetch the RX queue state as well, because we could be invoked
552 * here in the queue processing context.
553 */
554 hv_err = sun4v_ldc_rx_get_state(lp->id,
555 &lp->rx_head,
556 &lp->rx_tail,
557 &lp->chan_state);
558 if (hv_err)
559 printk(KERN_ERR PFX "ldc_abort: "
560 "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
561 lp->id, hv_err);
562
563 return -ECONNRESET;
564}
565
566static struct ldc_version *find_by_major(u16 major)
567{
568 struct ldc_version *ret = NULL;
569 int i;
570
571 for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
572 struct ldc_version *v = &ver_arr[i];
573 if (v->major <= major) {
574 ret = v;
575 break;
576 }
577 }
578 return ret;
579}
580
581static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
582{
583 struct ldc_version *vap;
584 int err;
585
586 ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
587 vp->major, vp->minor);
588
589 if (lp->hs_state == LDC_HS_GOTVERS) {
590 lp->hs_state = LDC_HS_OPEN;
591 memset(&lp->ver, 0, sizeof(lp->ver));
592 }
593
594 vap = find_by_major(vp->major);
595 if (!vap) {
596 err = send_version_nack(lp, 0, 0);
597 } else if (vap->major != vp->major) {
598 err = send_version_nack(lp, vap->major, vap->minor);
599 } else {
600 struct ldc_version ver = *vp;
601 if (ver.minor > vap->minor)
602 ver.minor = vap->minor;
603 err = send_version_ack(lp, &ver);
604 if (!err) {
605 lp->ver = ver;
606 lp->hs_state = LDC_HS_GOTVERS;
607 }
608 }
609 if (err)
610 return ldc_abort(lp);
611
612 return 0;
613}
614
615static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
616{
617 ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
618 vp->major, vp->minor);
619
620 if (lp->hs_state == LDC_HS_GOTVERS) {
621 if (lp->ver.major != vp->major ||
622 lp->ver.minor != vp->minor)
623 return ldc_abort(lp);
624 } else {
625 lp->ver = *vp;
626 lp->hs_state = LDC_HS_GOTVERS;
627 }
628 if (send_rts(lp))
629 return ldc_abort(lp);
630 return 0;
631}
632
633static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
634{
635 struct ldc_version *vap;
b2c0805f
SR
636 struct ldc_packet *p;
637 unsigned long new_tail;
e53e97ce 638
b2c0805f
SR
639 if (vp->major == 0 && vp->minor == 0)
640 return ldc_abort(lp);
641
642 vap = find_by_major(vp->major);
643 if (!vap)
e53e97ce 644 return ldc_abort(lp);
e53e97ce 645
b2c0805f 646 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
e53e97ce
DM
647 vap, sizeof(*vap),
648 &new_tail);
b2c0805f
SR
649 if (!p)
650 return ldc_abort(lp);
651
652 return send_tx_packet(lp, p, new_tail);
e53e97ce
DM
653}
654
655static int process_version(struct ldc_channel *lp,
656 struct ldc_packet *p)
657{
658 struct ldc_version *vp;
659
660 vp = (struct ldc_version *) p->u.u_data;
661
662 switch (p->stype) {
663 case LDC_INFO:
664 return process_ver_info(lp, vp);
665
666 case LDC_ACK:
667 return process_ver_ack(lp, vp);
668
669 case LDC_NACK:
670 return process_ver_nack(lp, vp);
671
672 default:
673 return ldc_abort(lp);
674 }
675}
676
677static int process_rts(struct ldc_channel *lp,
678 struct ldc_packet *p)
679{
680 ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
681 p->stype, p->seqid, p->env);
682
683 if (p->stype != LDC_INFO ||
684 lp->hs_state != LDC_HS_GOTVERS ||
685 p->env != lp->cfg.mode)
686 return ldc_abort(lp);
687
688 lp->snd_nxt = p->seqid;
689 lp->rcv_nxt = p->seqid;
690 lp->hs_state = LDC_HS_SENTRTR;
691 if (send_rtr(lp))
692 return ldc_abort(lp);
693
694 return 0;
695}
696
697static int process_rtr(struct ldc_channel *lp,
698 struct ldc_packet *p)
699{
700 ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
701 p->stype, p->seqid, p->env);
702
703 if (p->stype != LDC_INFO ||
704 p->env != lp->cfg.mode)
705 return ldc_abort(lp);
706
707 lp->snd_nxt = p->seqid;
708 lp->hs_state = LDC_HS_COMPLETE;
709 ldc_set_state(lp, LDC_STATE_CONNECTED);
710 send_rdx(lp);
711
712 return LDC_EVENT_UP;
713}
714
715static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
716{
717 return lp->rcv_nxt + 1 == seqid;
718}
719
720static int process_rdx(struct ldc_channel *lp,
721 struct ldc_packet *p)
722{
723 ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
724 p->stype, p->seqid, p->env, p->u.r.ackid);
725
726 if (p->stype != LDC_INFO ||
727 !(rx_seq_ok(lp, p->seqid)))
728 return ldc_abort(lp);
729
730 lp->rcv_nxt = p->seqid;
731
732 lp->hs_state = LDC_HS_COMPLETE;
733 ldc_set_state(lp, LDC_STATE_CONNECTED);
734
735 return LDC_EVENT_UP;
736}
737
738static int process_control_frame(struct ldc_channel *lp,
739 struct ldc_packet *p)
740{
741 switch (p->ctrl) {
742 case LDC_VERS:
743 return process_version(lp, p);
744
745 case LDC_RTS:
746 return process_rts(lp, p);
747
748 case LDC_RTR:
749 return process_rtr(lp, p);
750
751 case LDC_RDX:
752 return process_rdx(lp, p);
753
754 default:
755 return ldc_abort(lp);
756 }
757}
758
759static int process_error_frame(struct ldc_channel *lp,
760 struct ldc_packet *p)
761{
762 return ldc_abort(lp);
763}
764
765static int process_data_ack(struct ldc_channel *lp,
766 struct ldc_packet *ack)
767{
768 unsigned long head = lp->tx_acked;
769 u32 ackid = ack->u.r.ackid;
770
771 while (1) {
772 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
773
774 head = tx_advance(lp, head);
775
776 if (p->seqid == ackid) {
777 lp->tx_acked = head;
778 return 0;
779 }
cb481235 780 if (head == lp->tx_tail)
e53e97ce
DM
781 return ldc_abort(lp);
782 }
783
784 return 0;
785}
786
787static void send_events(struct ldc_channel *lp, unsigned int event_mask)
788{
789 if (event_mask & LDC_EVENT_RESET)
790 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
791 if (event_mask & LDC_EVENT_UP)
792 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
793 if (event_mask & LDC_EVENT_DATA_READY)
794 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
795}
796
797static irqreturn_t ldc_rx(int irq, void *dev_id)
798{
799 struct ldc_channel *lp = dev_id;
c6fee081 800 unsigned long orig_state, flags;
e53e97ce
DM
801 unsigned int event_mask;
802
803 spin_lock_irqsave(&lp->lock, flags);
804
805 orig_state = lp->chan_state;
c6fee081
DM
806
807 /* We should probably check for hypervisor errors here and
808 * reset the LDC channel if we get one.
809 */
810 sun4v_ldc_rx_get_state(lp->id,
811 &lp->rx_head,
812 &lp->rx_tail,
813 &lp->chan_state);
e53e97ce
DM
814
815 ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
816 orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
817
818 event_mask = 0;
819
820 if (lp->cfg.mode == LDC_MODE_RAW &&
821 lp->chan_state == LDC_CHANNEL_UP) {
822 lp->hs_state = LDC_HS_COMPLETE;
823 ldc_set_state(lp, LDC_STATE_CONNECTED);
824
825 event_mask |= LDC_EVENT_UP;
826
827 orig_state = lp->chan_state;
828 }
829
830 /* If we are in reset state, flush the RX queue and ignore
831 * everything.
832 */
833 if (lp->flags & LDC_FLAG_RESET) {
834 (void) __set_rx_head(lp, lp->rx_tail);
835 goto out;
836 }
837
838 /* Once we finish the handshake, we let the ldc_read()
839 * paths do all of the control frame and state management.
840 * Just trigger the callback.
841 */
842 if (lp->hs_state == LDC_HS_COMPLETE) {
843handshake_complete:
844 if (lp->chan_state != orig_state) {
845 unsigned int event = LDC_EVENT_RESET;
846
847 if (lp->chan_state == LDC_CHANNEL_UP)
848 event = LDC_EVENT_UP;
849
850 event_mask |= event;
851 }
852 if (lp->rx_head != lp->rx_tail)
853 event_mask |= LDC_EVENT_DATA_READY;
854
855 goto out;
856 }
857
858 if (lp->chan_state != orig_state)
859 goto out;
860
861 while (lp->rx_head != lp->rx_tail) {
862 struct ldc_packet *p;
863 unsigned long new;
864 int err;
865
866 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
867
868 switch (p->type) {
869 case LDC_CTRL:
870 err = process_control_frame(lp, p);
871 if (err > 0)
872 event_mask |= err;
873 break;
874
875 case LDC_DATA:
876 event_mask |= LDC_EVENT_DATA_READY;
877 err = 0;
878 break;
879
880 case LDC_ERR:
881 err = process_error_frame(lp, p);
882 break;
883
884 default:
885 err = ldc_abort(lp);
886 break;
887 }
888
889 if (err < 0)
890 break;
891
892 new = lp->rx_head;
893 new += LDC_PACKET_SIZE;
894 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
895 new = 0;
896 lp->rx_head = new;
897
898 err = __set_rx_head(lp, new);
899 if (err < 0) {
900 (void) ldc_abort(lp);
901 break;
902 }
903 if (lp->hs_state == LDC_HS_COMPLETE)
904 goto handshake_complete;
905 }
906
907out:
908 spin_unlock_irqrestore(&lp->lock, flags);
909
910 send_events(lp, event_mask);
911
912 return IRQ_HANDLED;
913}
914
915static irqreturn_t ldc_tx(int irq, void *dev_id)
916{
917 struct ldc_channel *lp = dev_id;
c6fee081 918 unsigned long flags, orig_state;
e53e97ce
DM
919 unsigned int event_mask = 0;
920
921 spin_lock_irqsave(&lp->lock, flags);
922
923 orig_state = lp->chan_state;
c6fee081
DM
924
925 /* We should probably check for hypervisor errors here and
926 * reset the LDC channel if we get one.
927 */
928 sun4v_ldc_tx_get_state(lp->id,
929 &lp->tx_head,
930 &lp->tx_tail,
931 &lp->chan_state);
e53e97ce
DM
932
933 ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
934 orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
935
936 if (lp->cfg.mode == LDC_MODE_RAW &&
937 lp->chan_state == LDC_CHANNEL_UP) {
938 lp->hs_state = LDC_HS_COMPLETE;
939 ldc_set_state(lp, LDC_STATE_CONNECTED);
940
941 event_mask |= LDC_EVENT_UP;
942 }
943
944 spin_unlock_irqrestore(&lp->lock, flags);
945
946 send_events(lp, event_mask);
947
948 return IRQ_HANDLED;
949}
950
951/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
952 * XXX that addition and removal from the ldc_channel_list has
953 * XXX atomicity, otherwise the __ldc_channel_exists() check is
954 * XXX totally pointless as another thread can slip into ldc_alloc()
955 * XXX and add a channel with the same ID. There also needs to be
956 * XXX a spinlock for ldc_channel_list.
957 */
958static HLIST_HEAD(ldc_channel_list);
959
960static int __ldc_channel_exists(unsigned long id)
961{
962 struct ldc_channel *lp;
e53e97ce 963
b67bfe0d 964 hlist_for_each_entry(lp, &ldc_channel_list, list) {
e53e97ce
DM
965 if (lp->id == id)
966 return 1;
967 }
968 return 0;
969}
970
971static int alloc_queue(const char *name, unsigned long num_entries,
972 struct ldc_packet **base, unsigned long *ra)
973{
974 unsigned long size, order;
975 void *q;
976
977 size = num_entries * LDC_PACKET_SIZE;
978 order = get_order(size);
979
980 q = (void *) __get_free_pages(GFP_KERNEL, order);
981 if (!q) {
982 printk(KERN_ERR PFX "Alloc of %s queue failed with "
983 "size=%lu order=%lu\n", name, size, order);
984 return -ENOMEM;
985 }
986
987 memset(q, 0, PAGE_SIZE << order);
988
989 *base = q;
990 *ra = __pa(q);
991
992 return 0;
993}
994
995static void free_queue(unsigned long num_entries, struct ldc_packet *q)
996{
997 unsigned long size, order;
998
999 if (!q)
1000 return;
1001
1002 size = num_entries * LDC_PACKET_SIZE;
1003 order = get_order(size);
1004
1005 free_pages((unsigned long)q, order);
1006}
1007
671d7732
SV
1008static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1009{
1010 u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1011 /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1012
1013 cookie &= ~COOKIE_PGSZ_CODE;
1014
1015 return (cookie >> (13ULL + (szcode * 3ULL)));
1016}
1017
1018struct ldc_demap_arg {
1019 struct ldc_iommu *ldc_iommu;
1020 u64 cookie;
1021 unsigned long id;
1022};
1023
1024static void ldc_demap(void *arg, unsigned long entry, unsigned long npages)
1025{
1026 struct ldc_demap_arg *ldc_demap_arg = arg;
1027 struct ldc_iommu *iommu = ldc_demap_arg->ldc_iommu;
1028 unsigned long id = ldc_demap_arg->id;
1029 u64 cookie = ldc_demap_arg->cookie;
1030 struct ldc_mtable_entry *base;
1031 unsigned long i, shift;
1032
1033 shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1034 base = iommu->page_table + entry;
1035 for (i = 0; i < npages; i++) {
1036 if (base->cookie)
1037 sun4v_ldc_revoke(id, cookie + (i << shift),
1038 base->cookie);
1039 base->mte = 0;
1040 }
1041}
1042
e53e97ce
DM
1043/* XXX Make this configurable... XXX */
1044#define LDC_IOTABLE_SIZE (8 * 1024)
1045
671d7732
SV
1046struct iommu_tbl_ops ldc_iommu_ops = {
1047 .cookie_to_index = ldc_cookie_to_index,
1048 .demap = ldc_demap,
1049};
1050
1051static void setup_ldc_pool_hash(void)
1052{
1053 unsigned int i;
1054 static bool do_once;
1055
1056 if (do_once)
1057 return;
1058 do_once = true;
1059 for_each_possible_cpu(i)
1060 per_cpu(ldc_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
1061}
1062
1063
1064static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
e53e97ce
DM
1065{
1066 unsigned long sz, num_tsb_entries, tsbsize, order;
671d7732
SV
1067 struct ldc_iommu *ldc_iommu = &lp->iommu;
1068 struct iommu_table *iommu = &ldc_iommu->iommu_table;
e53e97ce
DM
1069 struct ldc_mtable_entry *table;
1070 unsigned long hv_err;
1071 int err;
1072
1073 num_tsb_entries = LDC_IOTABLE_SIZE;
1074 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
671d7732
SV
1075 setup_ldc_pool_hash();
1076 spin_lock_init(&ldc_iommu->lock);
e53e97ce
DM
1077
1078 sz = num_tsb_entries / 8;
1079 sz = (sz + 7UL) & ~7UL;
671d7732
SV
1080 iommu->map = kzalloc(sz, GFP_KERNEL);
1081 if (!iommu->map) {
e53e97ce
DM
1082 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1083 return -ENOMEM;
1084 }
671d7732
SV
1085 iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1086 &ldc_iommu_ops, false, 1);
e53e97ce
DM
1087
1088 order = get_order(tsbsize);
1089
1090 table = (struct ldc_mtable_entry *)
1091 __get_free_pages(GFP_KERNEL, order);
1092 err = -ENOMEM;
1093 if (!table) {
1094 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1095 "size=%lu order=%lu\n", tsbsize, order);
1096 goto out_free_map;
1097 }
1098
1099 memset(table, 0, PAGE_SIZE << order);
1100
671d7732 1101 ldc_iommu->page_table = table;
e53e97ce
DM
1102
1103 hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1104 num_tsb_entries);
1105 err = -EINVAL;
1106 if (hv_err)
1107 goto out_free_table;
1108
1109 return 0;
1110
1111out_free_table:
1112 free_pages((unsigned long) table, order);
671d7732 1113 ldc_iommu->page_table = NULL;
e53e97ce
DM
1114
1115out_free_map:
671d7732
SV
1116 kfree(iommu->map);
1117 iommu->map = NULL;
e53e97ce
DM
1118
1119 return err;
1120}
1121
1122static void ldc_iommu_release(struct ldc_channel *lp)
1123{
671d7732
SV
1124 struct ldc_iommu *ldc_iommu = &lp->iommu;
1125 struct iommu_table *iommu = &ldc_iommu->iommu_table;
e53e97ce
DM
1126 unsigned long num_tsb_entries, tsbsize, order;
1127
1128 (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1129
671d7732 1130 num_tsb_entries = iommu->poolsize * iommu->nr_pools;
e53e97ce
DM
1131 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1132 order = get_order(tsbsize);
1133
671d7732
SV
1134 free_pages((unsigned long) ldc_iommu->page_table, order);
1135 ldc_iommu->page_table = NULL;
e53e97ce 1136
671d7732
SV
1137 kfree(iommu->map);
1138 iommu->map = NULL;
e53e97ce
DM
1139}
1140
1141struct ldc_channel *ldc_alloc(unsigned long id,
1142 const struct ldc_channel_config *cfgp,
c21c4ab0
SV
1143 void *event_arg,
1144 const char *name)
e53e97ce
DM
1145{
1146 struct ldc_channel *lp;
1147 const struct ldc_mode_ops *mops;
1148 unsigned long dummy1, dummy2, hv_err;
1149 u8 mss, *mssbuf;
1150 int err;
1151
1152 err = -ENODEV;
1153 if (!ldom_domaining_enabled)
1154 goto out_err;
1155
1156 err = -EINVAL;
1157 if (!cfgp)
1158 goto out_err;
c21c4ab0
SV
1159 if (!name)
1160 goto out_err;
e53e97ce
DM
1161
1162 switch (cfgp->mode) {
1163 case LDC_MODE_RAW:
1164 mops = &raw_ops;
1165 mss = LDC_PACKET_SIZE;
1166 break;
1167
1168 case LDC_MODE_UNRELIABLE:
1169 mops = &nonraw_ops;
1170 mss = LDC_PACKET_SIZE - 8;
1171 break;
1172
e53e97ce
DM
1173 case LDC_MODE_STREAM:
1174 mops = &stream_ops;
1175 mss = LDC_PACKET_SIZE - 8 - 8;
1176 break;
1177
1178 default:
1179 goto out_err;
1180 }
1181
1182 if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1183 goto out_err;
1184
1185 hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1186 err = -ENODEV;
1187 if (hv_err == HV_ECHANNEL)
1188 goto out_err;
1189
1190 err = -EEXIST;
1191 if (__ldc_channel_exists(id))
1192 goto out_err;
1193
1194 mssbuf = NULL;
1195
1196 lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1197 err = -ENOMEM;
1198 if (!lp)
1199 goto out_err;
1200
1201 spin_lock_init(&lp->lock);
1202
1203 lp->id = id;
1204
671d7732 1205 err = ldc_iommu_init(name, lp);
e53e97ce
DM
1206 if (err)
1207 goto out_free_ldc;
1208
1209 lp->mops = mops;
1210 lp->mss = mss;
1211
1212 lp->cfg = *cfgp;
1213 if (!lp->cfg.mtu)
1214 lp->cfg.mtu = LDC_DEFAULT_MTU;
1215
1216 if (lp->cfg.mode == LDC_MODE_STREAM) {
1217 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1218 if (!mssbuf) {
1219 err = -ENOMEM;
1220 goto out_free_iommu;
1221 }
1222 lp->mssbuf = mssbuf;
1223 }
1224
1225 lp->event_arg = event_arg;
1226
1227 /* XXX allow setting via ldc_channel_config to override defaults
1228 * XXX or use some formula based upon mtu
1229 */
1230 lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1231 lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1232
1233 err = alloc_queue("TX", lp->tx_num_entries,
1234 &lp->tx_base, &lp->tx_ra);
1235 if (err)
1236 goto out_free_mssbuf;
1237
1238 err = alloc_queue("RX", lp->rx_num_entries,
1239 &lp->rx_base, &lp->rx_ra);
1240 if (err)
1241 goto out_free_txq;
1242
1243 lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1244
1245 lp->hs_state = LDC_HS_CLOSED;
1246 ldc_set_state(lp, LDC_STATE_INIT);
1247
1248 INIT_HLIST_NODE(&lp->list);
1249 hlist_add_head(&lp->list, &ldc_channel_list);
1250
1251 INIT_HLIST_HEAD(&lp->mh_list);
1252
c21c4ab0
SV
1253 snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1254 snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1255
1256 err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1257 lp->rx_irq_name, lp);
1258 if (err)
1259 goto out_free_txq;
1260
1261 err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1262 lp->tx_irq_name, lp);
1263 if (err) {
1264 free_irq(lp->cfg.rx_irq, lp);
1265 goto out_free_txq;
1266 }
1267
e53e97ce
DM
1268 return lp;
1269
1270out_free_txq:
1271 free_queue(lp->tx_num_entries, lp->tx_base);
1272
1273out_free_mssbuf:
ed247e12 1274 kfree(mssbuf);
e53e97ce
DM
1275
1276out_free_iommu:
1277 ldc_iommu_release(lp);
1278
1279out_free_ldc:
1280 kfree(lp);
1281
1282out_err:
1283 return ERR_PTR(err);
1284}
1285EXPORT_SYMBOL(ldc_alloc);
1286
1678c2bd 1287void ldc_unbind(struct ldc_channel *lp)
e53e97ce
DM
1288{
1289 if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1290 free_irq(lp->cfg.rx_irq, lp);
1291 free_irq(lp->cfg.tx_irq, lp);
1678c2bd 1292 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
e53e97ce
DM
1293 }
1294
1295 if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1296 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1297 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1298 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1299 }
1300 if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1301 free_queue(lp->tx_num_entries, lp->tx_base);
1302 free_queue(lp->rx_num_entries, lp->rx_base);
1303 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1304 }
1305
1678c2bd
DE
1306 ldc_set_state(lp, LDC_STATE_INIT);
1307}
1308EXPORT_SYMBOL(ldc_unbind);
e53e97ce 1309
1678c2bd
DE
1310void ldc_free(struct ldc_channel *lp)
1311{
1312 ldc_unbind(lp);
1313 hlist_del(&lp->list);
ed247e12 1314 kfree(lp->mssbuf);
e53e97ce
DM
1315 ldc_iommu_release(lp);
1316
1317 kfree(lp);
1318}
1319EXPORT_SYMBOL(ldc_free);
1320
1321/* Bind the channel. This registers the LDC queues with
1322 * the hypervisor and puts the channel into a pseudo-listening
1323 * state. This does not initiate a handshake, ldc_connect() does
1324 * that.
1325 */
c21c4ab0 1326int ldc_bind(struct ldc_channel *lp)
e53e97ce
DM
1327{
1328 unsigned long hv_err, flags;
1329 int err = -EINVAL;
1330
c21c4ab0 1331 if (lp->state != LDC_STATE_INIT)
b7c2a757 1332 return -EINVAL;
e53e97ce 1333
b7c2a757
DM
1334 spin_lock_irqsave(&lp->lock, flags);
1335
1336 enable_irq(lp->cfg.rx_irq);
1337 enable_irq(lp->cfg.tx_irq);
e53e97ce
DM
1338
1339 lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1340
1341 err = -ENODEV;
1342 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1343 if (hv_err)
b7c2a757 1344 goto out_free_irqs;
e53e97ce
DM
1345
1346 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1347 if (hv_err)
b7c2a757 1348 goto out_free_irqs;
e53e97ce
DM
1349
1350 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1351 if (hv_err)
1352 goto out_unmap_tx;
1353
1354 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1355 if (hv_err)
1356 goto out_unmap_tx;
1357
1358 lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1359
1360 hv_err = sun4v_ldc_tx_get_state(lp->id,
1361 &lp->tx_head,
1362 &lp->tx_tail,
1363 &lp->chan_state);
1364 err = -EBUSY;
1365 if (hv_err)
1366 goto out_unmap_rx;
1367
1368 lp->tx_acked = lp->tx_head;
1369
1370 lp->hs_state = LDC_HS_OPEN;
1371 ldc_set_state(lp, LDC_STATE_BOUND);
1372
1373 spin_unlock_irqrestore(&lp->lock, flags);
1374
1375 return 0;
1376
1377out_unmap_rx:
1378 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1379 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1380
1381out_unmap_tx:
1382 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1383
b7c2a757 1384out_free_irqs:
e53e97ce
DM
1385 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1386 free_irq(lp->cfg.tx_irq, lp);
e53e97ce
DM
1387 free_irq(lp->cfg.rx_irq, lp);
1388
e53e97ce
DM
1389 spin_unlock_irqrestore(&lp->lock, flags);
1390
1391 return err;
1392}
1393EXPORT_SYMBOL(ldc_bind);
1394
1395int ldc_connect(struct ldc_channel *lp)
1396{
1397 unsigned long flags;
1398 int err;
1399
1400 if (lp->cfg.mode == LDC_MODE_RAW)
1401 return -EINVAL;
1402
1403 spin_lock_irqsave(&lp->lock, flags);
1404
1405 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1406 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1407 lp->hs_state != LDC_HS_OPEN)
4ec1b010 1408 err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
e53e97ce
DM
1409 else
1410 err = start_handshake(lp);
1411
1412 spin_unlock_irqrestore(&lp->lock, flags);
1413
1414 return err;
1415}
1416EXPORT_SYMBOL(ldc_connect);
1417
1418int ldc_disconnect(struct ldc_channel *lp)
1419{
1420 unsigned long hv_err, flags;
1421 int err;
1422
1423 if (lp->cfg.mode == LDC_MODE_RAW)
1424 return -EINVAL;
1425
1426 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1427 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1428 return -EINVAL;
1429
1430 spin_lock_irqsave(&lp->lock, flags);
1431
1432 err = -ENODEV;
1433 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1434 if (hv_err)
1435 goto out_err;
1436
1437 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1438 if (hv_err)
1439 goto out_err;
1440
1441 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1442 if (hv_err)
1443 goto out_err;
1444
1445 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1446 if (hv_err)
1447 goto out_err;
1448
1449 ldc_set_state(lp, LDC_STATE_BOUND);
1450 lp->hs_state = LDC_HS_OPEN;
1451 lp->flags |= LDC_FLAG_RESET;
1452
1453 spin_unlock_irqrestore(&lp->lock, flags);
1454
1455 return 0;
1456
1457out_err:
1458 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1459 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1460 free_irq(lp->cfg.tx_irq, lp);
1461 free_irq(lp->cfg.rx_irq, lp);
1462 lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1463 LDC_FLAG_REGISTERED_QUEUES);
1464 ldc_set_state(lp, LDC_STATE_INIT);
1465
1466 spin_unlock_irqrestore(&lp->lock, flags);
1467
1468 return err;
1469}
1470EXPORT_SYMBOL(ldc_disconnect);
1471
1472int ldc_state(struct ldc_channel *lp)
1473{
1474 return lp->state;
1475}
1476EXPORT_SYMBOL(ldc_state);
1477
1478static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1479{
1480 struct ldc_packet *p;
1481 unsigned long new_tail;
1482 int err;
1483
1484 if (size > LDC_PACKET_SIZE)
1485 return -EMSGSIZE;
1486
1487 p = data_get_tx_packet(lp, &new_tail);
1488 if (!p)
1489 return -EAGAIN;
1490
1491 memcpy(p, buf, size);
1492
1493 err = send_tx_packet(lp, p, new_tail);
1494 if (!err)
1495 err = size;
1496
1497 return err;
1498}
1499
1500static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1501{
1502 struct ldc_packet *p;
1503 unsigned long hv_err, new;
1504 int err;
1505
1506 if (size < LDC_PACKET_SIZE)
1507 return -EINVAL;
1508
1509 hv_err = sun4v_ldc_rx_get_state(lp->id,
1510 &lp->rx_head,
1511 &lp->rx_tail,
1512 &lp->chan_state);
1513 if (hv_err)
1514 return ldc_abort(lp);
1515
1516 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1517 lp->chan_state == LDC_CHANNEL_RESETTING)
1518 return -ECONNRESET;
1519
1520 if (lp->rx_head == lp->rx_tail)
1521 return 0;
1522
1523 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1524 memcpy(buf, p, LDC_PACKET_SIZE);
1525
1526 new = rx_advance(lp, lp->rx_head);
1527 lp->rx_head = new;
1528
1529 err = __set_rx_head(lp, new);
1530 if (err < 0)
1531 err = -ECONNRESET;
1532 else
1533 err = LDC_PACKET_SIZE;
1534
1535 return err;
1536}
1537
1538static const struct ldc_mode_ops raw_ops = {
1539 .write = write_raw,
1540 .read = read_raw,
1541};
1542
1543static int write_nonraw(struct ldc_channel *lp, const void *buf,
1544 unsigned int size)
1545{
1546 unsigned long hv_err, tail;
1547 unsigned int copied;
1548 u32 seq;
1549 int err;
1550
1551 hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1552 &lp->chan_state);
1553 if (unlikely(hv_err))
1554 return -EBUSY;
1555
1556 if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1557 return ldc_abort(lp);
1558
1559 if (!tx_has_space_for(lp, size))
1560 return -EAGAIN;
1561
1562 seq = lp->snd_nxt;
1563 copied = 0;
1564 tail = lp->tx_tail;
1565 while (copied < size) {
1566 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1567 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1568 p->u.u_data :
1569 p->u.r.r_data);
1570 int data_len;
1571
1572 p->type = LDC_DATA;
1573 p->stype = LDC_INFO;
1574 p->ctrl = 0;
1575
1576 data_len = size - copied;
1577 if (data_len > lp->mss)
1578 data_len = lp->mss;
1579
1580 BUG_ON(data_len > LDC_LEN);
1581
1582 p->env = (data_len |
1583 (copied == 0 ? LDC_START : 0) |
1584 (data_len == size - copied ? LDC_STOP : 0));
1585
1586 p->seqid = ++seq;
1587
1588 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1589 p->type,
1590 p->stype,
1591 p->ctrl,
1592 p->env,
1593 p->seqid);
1594
1595 memcpy(data, buf, data_len);
1596 buf += data_len;
1597 copied += data_len;
1598
1599 tail = tx_advance(lp, tail);
1600 }
1601
1602 err = set_tx_tail(lp, tail);
1603 if (!err) {
1604 lp->snd_nxt = seq;
1605 err = size;
1606 }
1607
1608 return err;
1609}
1610
1611static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1612 struct ldc_packet *first_frag)
1613{
1614 int err;
1615
1616 if (first_frag)
1617 lp->rcv_nxt = first_frag->seqid - 1;
1618
1619 err = send_data_nack(lp, p);
1620 if (err)
1621 return err;
1622
1623 err = __set_rx_head(lp, lp->rx_tail);
1624 if (err < 0)
1625 return ldc_abort(lp);
1626
1627 return 0;
1628}
1629
1630static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1631{
1632 if (p->stype & LDC_ACK) {
1633 int err = process_data_ack(lp, p);
1634 if (err)
1635 return err;
1636 }
1637 if (p->stype & LDC_NACK)
1638 return ldc_abort(lp);
1639
1640 return 0;
1641}
1642
1643static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1644{
1645 unsigned long dummy;
1646 int limit = 1000;
1647
1648 ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1649 cur_head, lp->rx_head, lp->rx_tail);
1650 while (limit-- > 0) {
1651 unsigned long hv_err;
1652
1653 hv_err = sun4v_ldc_rx_get_state(lp->id,
1654 &dummy,
1655 &lp->rx_tail,
1656 &lp->chan_state);
1657 if (hv_err)
1658 return ldc_abort(lp);
1659
e53e97ce
DM
1660 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1661 lp->chan_state == LDC_CHANNEL_RESETTING)
1662 return -ECONNRESET;
1663
1664 if (cur_head != lp->rx_tail) {
cb481235
DM
1665 ldcdbg(DATA, "DATA WAIT DONE "
1666 "head[%lx] tail[%lx] chan_state[%lx]\n",
1667 dummy, lp->rx_tail, lp->chan_state);
e53e97ce
DM
1668 return 0;
1669 }
1670
1671 udelay(1);
1672 }
1673 return -EAGAIN;
1674}
1675
1676static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1677{
1678 int err = __set_rx_head(lp, head);
1679
1680 if (err < 0)
1681 return ldc_abort(lp);
1682
1683 lp->rx_head = head;
1684 return 0;
1685}
1686
cb481235
DM
1687static void send_data_ack(struct ldc_channel *lp)
1688{
1689 unsigned long new_tail;
1690 struct ldc_packet *p;
1691
1692 p = data_get_tx_packet(lp, &new_tail);
1693 if (likely(p)) {
1694 int err;
1695
1696 memset(p, 0, sizeof(*p));
1697 p->type = LDC_DATA;
1698 p->stype = LDC_ACK;
1699 p->ctrl = 0;
1700 p->seqid = lp->snd_nxt + 1;
1701 p->u.r.ackid = lp->rcv_nxt;
1702
1703 err = send_tx_packet(lp, p, new_tail);
1704 if (!err)
1705 lp->snd_nxt++;
1706 }
1707}
1708
e53e97ce
DM
1709static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1710{
1711 struct ldc_packet *first_frag;
1712 unsigned long hv_err, new;
1713 int err, copied;
1714
1715 hv_err = sun4v_ldc_rx_get_state(lp->id,
1716 &lp->rx_head,
1717 &lp->rx_tail,
1718 &lp->chan_state);
1719 if (hv_err)
1720 return ldc_abort(lp);
1721
1722 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1723 lp->chan_state == LDC_CHANNEL_RESETTING)
1724 return -ECONNRESET;
1725
1726 if (lp->rx_head == lp->rx_tail)
1727 return 0;
1728
1729 first_frag = NULL;
1730 copied = err = 0;
1731 new = lp->rx_head;
1732 while (1) {
1733 struct ldc_packet *p;
1734 int pkt_len;
1735
1736 BUG_ON(new == lp->rx_tail);
1737 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1738
cb481235 1739 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
e53e97ce
DM
1740 "rcv_nxt[%08x]\n",
1741 p->type,
1742 p->stype,
1743 p->ctrl,
1744 p->env,
1745 p->seqid,
cb481235 1746 p->u.r.ackid,
e53e97ce
DM
1747 lp->rcv_nxt);
1748
1749 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1750 err = rx_bad_seq(lp, p, first_frag);
1751 copied = 0;
1752 break;
1753 }
1754
1755 if (p->type & LDC_CTRL) {
1756 err = process_control_frame(lp, p);
1757 if (err < 0)
1758 break;
1759 err = 0;
1760 }
1761
1762 lp->rcv_nxt = p->seqid;
1763
1764 if (!(p->type & LDC_DATA)) {
1765 new = rx_advance(lp, new);
1766 goto no_data;
1767 }
1768 if (p->stype & (LDC_ACK | LDC_NACK)) {
1769 err = data_ack_nack(lp, p);
1770 if (err)
1771 break;
1772 }
1773 if (!(p->stype & LDC_INFO)) {
1774 new = rx_advance(lp, new);
cb481235
DM
1775 err = rx_set_head(lp, new);
1776 if (err)
1777 break;
e53e97ce
DM
1778 goto no_data;
1779 }
1780
1781 pkt_len = p->env & LDC_LEN;
1782
1783 /* Every initial packet starts with the START bit set.
1784 *
1785 * Singleton packets will have both START+STOP set.
1786 *
1787 * Fragments will have START set in the first frame, STOP
1788 * set in the last frame, and neither bit set in middle
1789 * frames of the packet.
1790 *
1791 * Therefore if we are at the beginning of a packet and
1792 * we don't see START, or we are in the middle of a fragmented
1793 * packet and do see START, we are unsynchronized and should
1794 * flush the RX queue.
1795 */
1796 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1797 (first_frag != NULL && (p->env & LDC_START))) {
1798 if (!first_frag)
1799 new = rx_advance(lp, new);
1800
1801 err = rx_set_head(lp, new);
1802 if (err)
1803 break;
1804
1805 if (!first_frag)
1806 goto no_data;
1807 }
1808 if (!first_frag)
1809 first_frag = p;
1810
1811 if (pkt_len > size - copied) {
1812 /* User didn't give us a big enough buffer,
1813 * what to do? This is a pretty serious error.
1814 *
1815 * Since we haven't updated the RX ring head to
1816 * consume any of the packets, signal the error
1817 * to the user and just leave the RX ring alone.
1818 *
1819 * This seems the best behavior because this allows
1820 * a user of the LDC layer to start with a small
1821 * RX buffer for ldc_read() calls and use -EMSGSIZE
1822 * as a cue to enlarge it's read buffer.
1823 */
1824 err = -EMSGSIZE;
1825 break;
1826 }
1827
1828 /* Ok, we are gonna eat this one. */
1829 new = rx_advance(lp, new);
1830
1831 memcpy(buf,
1832 (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1833 p->u.u_data : p->u.r.r_data), pkt_len);
1834 buf += pkt_len;
1835 copied += pkt_len;
1836
1837 if (p->env & LDC_STOP)
1838 break;
1839
1840no_data:
1841 if (new == lp->rx_tail) {
1842 err = rx_data_wait(lp, new);
1843 if (err)
1844 break;
1845 }
1846 }
1847
1848 if (!err)
1849 err = rx_set_head(lp, new);
1850
1851 if (err && first_frag)
1852 lp->rcv_nxt = first_frag->seqid - 1;
1853
cb481235 1854 if (!err) {
e53e97ce 1855 err = copied;
cb481235
DM
1856 if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1857 send_data_ack(lp);
1858 }
e53e97ce
DM
1859
1860 return err;
1861}
1862
1863static const struct ldc_mode_ops nonraw_ops = {
1864 .write = write_nonraw,
1865 .read = read_nonraw,
1866};
1867
1868static int write_stream(struct ldc_channel *lp, const void *buf,
1869 unsigned int size)
1870{
1871 if (size > lp->cfg.mtu)
1872 size = lp->cfg.mtu;
1873 return write_nonraw(lp, buf, size);
1874}
1875
1876static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1877{
1878 if (!lp->mssbuf_len) {
cb481235 1879 int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
e53e97ce
DM
1880 if (err < 0)
1881 return err;
1882
1883 lp->mssbuf_len = err;
1884 lp->mssbuf_off = 0;
1885 }
1886
1887 if (size > lp->mssbuf_len)
1888 size = lp->mssbuf_len;
1889 memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1890
1891 lp->mssbuf_off += size;
1892 lp->mssbuf_len -= size;
1893
1894 return size;
1895}
1896
1897static const struct ldc_mode_ops stream_ops = {
1898 .write = write_stream,
1899 .read = read_stream,
1900};
1901
1902int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1903{
1904 unsigned long flags;
1905 int err;
1906
1907 if (!buf)
1908 return -EINVAL;
1909
1910 if (!size)
1911 return 0;
1912
1913 spin_lock_irqsave(&lp->lock, flags);
1914
1915 if (lp->hs_state != LDC_HS_COMPLETE)
1916 err = -ENOTCONN;
1917 else
1918 err = lp->mops->write(lp, buf, size);
1919
1920 spin_unlock_irqrestore(&lp->lock, flags);
1921
1922 return err;
1923}
1924EXPORT_SYMBOL(ldc_write);
1925
1926int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1927{
1928 unsigned long flags;
1929 int err;
1930
1931 if (!buf)
1932 return -EINVAL;
1933
1934 if (!size)
1935 return 0;
1936
1937 spin_lock_irqsave(&lp->lock, flags);
1938
1939 if (lp->hs_state != LDC_HS_COMPLETE)
1940 err = -ENOTCONN;
1941 else
1942 err = lp->mops->read(lp, buf, size);
1943
1944 spin_unlock_irqrestore(&lp->lock, flags);
1945
1946 return err;
1947}
1948EXPORT_SYMBOL(ldc_read);
1949
e53e97ce
DM
1950static u64 pagesize_code(void)
1951{
1952 switch (PAGE_SIZE) {
1953 default:
1954 case (8ULL * 1024ULL):
1955 return 0;
1956 case (64ULL * 1024ULL):
1957 return 1;
1958 case (512ULL * 1024ULL):
1959 return 2;
1960 case (4ULL * 1024ULL * 1024ULL):
1961 return 3;
1962 case (32ULL * 1024ULL * 1024ULL):
1963 return 4;
1964 case (256ULL * 1024ULL * 1024ULL):
1965 return 5;
1966 }
1967}
1968
1969static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1970{
1971 return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1972 (index << PAGE_SHIFT) |
1973 page_offset);
1974}
1975
e53e97ce
DM
1976
1977static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1978 unsigned long npages)
1979{
1980 long entry;
1981
671d7732
SV
1982 entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_table, npages,
1983 NULL, __this_cpu_read(ldc_pool_hash));
e53e97ce
DM
1984 if (unlikely(entry < 0))
1985 return NULL;
1986
1987 return iommu->page_table + entry;
1988}
1989
1990static u64 perm_to_mte(unsigned int map_perm)
1991{
1992 u64 mte_base;
1993
1994 mte_base = pagesize_code();
1995
1996 if (map_perm & LDC_MAP_SHADOW) {
1997 if (map_perm & LDC_MAP_R)
1998 mte_base |= LDC_MTE_COPY_R;
1999 if (map_perm & LDC_MAP_W)
2000 mte_base |= LDC_MTE_COPY_W;
2001 }
2002 if (map_perm & LDC_MAP_DIRECT) {
2003 if (map_perm & LDC_MAP_R)
2004 mte_base |= LDC_MTE_READ;
2005 if (map_perm & LDC_MAP_W)
2006 mte_base |= LDC_MTE_WRITE;
2007 if (map_perm & LDC_MAP_X)
2008 mte_base |= LDC_MTE_EXEC;
2009 }
2010 if (map_perm & LDC_MAP_IO) {
2011 if (map_perm & LDC_MAP_R)
2012 mte_base |= LDC_MTE_IOMMU_R;
2013 if (map_perm & LDC_MAP_W)
2014 mte_base |= LDC_MTE_IOMMU_W;
2015 }
2016
2017 return mte_base;
2018}
2019
2020static int pages_in_region(unsigned long base, long len)
2021{
2022 int count = 0;
2023
2024 do {
2025 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2026
2027 len -= (new - base);
2028 base = new;
2029 count++;
2030 } while (len > 0);
2031
2032 return count;
2033}
2034
2035struct cookie_state {
2036 struct ldc_mtable_entry *page_table;
2037 struct ldc_trans_cookie *cookies;
2038 u64 mte_base;
2039 u64 prev_cookie;
2040 u32 pte_idx;
2041 u32 nc;
2042};
2043
2044static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2045 unsigned long off, unsigned long len)
2046{
2047 do {
2048 unsigned long tlen, new = pa + PAGE_SIZE;
2049 u64 this_cookie;
2050
2051 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2052
2053 tlen = PAGE_SIZE;
2054 if (off)
2055 tlen = PAGE_SIZE - off;
2056 if (tlen > len)
2057 tlen = len;
2058
2059 this_cookie = make_cookie(sp->pte_idx,
2060 pagesize_code(), off);
2061
2062 off = 0;
2063
2064 if (this_cookie == sp->prev_cookie) {
2065 sp->cookies[sp->nc - 1].cookie_size += tlen;
2066 } else {
2067 sp->cookies[sp->nc].cookie_addr = this_cookie;
2068 sp->cookies[sp->nc].cookie_size = tlen;
2069 sp->nc++;
2070 }
2071 sp->prev_cookie = this_cookie + tlen;
2072
2073 sp->pte_idx++;
2074
2075 len -= tlen;
2076 pa = new;
2077 } while (len > 0);
2078}
2079
2080static int sg_count_one(struct scatterlist *sg)
2081{
58b053e4 2082 unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
e53e97ce
DM
2083 long len = sg->length;
2084
2085 if ((sg->offset | len) & (8UL - 1))
2086 return -EFAULT;
2087
2088 return pages_in_region(base + sg->offset, len);
2089}
2090
2091static int sg_count_pages(struct scatterlist *sg, int num_sg)
2092{
2093 int count;
2094 int i;
2095
2096 count = 0;
2097 for (i = 0; i < num_sg; i++) {
2098 int err = sg_count_one(sg + i);
2099 if (err < 0)
2100 return err;
2101 count += err;
2102 }
2103
2104 return count;
2105}
2106
2107int ldc_map_sg(struct ldc_channel *lp,
2108 struct scatterlist *sg, int num_sg,
2109 struct ldc_trans_cookie *cookies, int ncookies,
2110 unsigned int map_perm)
2111{
671d7732 2112 unsigned long i, npages;
e53e97ce
DM
2113 struct ldc_mtable_entry *base;
2114 struct cookie_state state;
2115 struct ldc_iommu *iommu;
2116 int err;
2117
2118 if (map_perm & ~LDC_MAP_ALL)
2119 return -EINVAL;
2120
2121 err = sg_count_pages(sg, num_sg);
2122 if (err < 0)
2123 return err;
2124
2125 npages = err;
2126 if (err > ncookies)
2127 return -EMSGSIZE;
2128
2129 iommu = &lp->iommu;
2130
e53e97ce 2131 base = alloc_npages(iommu, npages);
e53e97ce
DM
2132
2133 if (!base)
2134 return -ENOMEM;
2135
2136 state.page_table = iommu->page_table;
2137 state.cookies = cookies;
2138 state.mte_base = perm_to_mte(map_perm);
2139 state.prev_cookie = ~(u64)0;
2140 state.pte_idx = (base - iommu->page_table);
2141 state.nc = 0;
2142
2143 for (i = 0; i < num_sg; i++)
d91c5e88 2144 fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
e53e97ce
DM
2145 sg[i].offset, sg[i].length);
2146
2147 return state.nc;
2148}
2149EXPORT_SYMBOL(ldc_map_sg);
2150
2151int ldc_map_single(struct ldc_channel *lp,
2152 void *buf, unsigned int len,
2153 struct ldc_trans_cookie *cookies, int ncookies,
2154 unsigned int map_perm)
2155{
671d7732 2156 unsigned long npages, pa;
e53e97ce
DM
2157 struct ldc_mtable_entry *base;
2158 struct cookie_state state;
2159 struct ldc_iommu *iommu;
2160
2161 if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2162 return -EINVAL;
2163
2164 pa = __pa(buf);
2165 if ((pa | len) & (8UL - 1))
2166 return -EFAULT;
2167
2168 npages = pages_in_region(pa, len);
2169
2170 iommu = &lp->iommu;
2171
e53e97ce 2172 base = alloc_npages(iommu, npages);
e53e97ce
DM
2173
2174 if (!base)
2175 return -ENOMEM;
2176
2177 state.page_table = iommu->page_table;
2178 state.cookies = cookies;
2179 state.mte_base = perm_to_mte(map_perm);
2180 state.prev_cookie = ~(u64)0;
2181 state.pte_idx = (base - iommu->page_table);
2182 state.nc = 0;
2183 fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
42db672d 2184 BUG_ON(state.nc > ncookies);
e53e97ce
DM
2185
2186 return state.nc;
2187}
2188EXPORT_SYMBOL(ldc_map_single);
2189
671d7732 2190
e53e97ce
DM
2191static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2192 u64 cookie, u64 size)
2193{
671d7732
SV
2194 unsigned long npages;
2195 struct ldc_demap_arg demap_arg;
e53e97ce 2196
671d7732
SV
2197 demap_arg.ldc_iommu = iommu;
2198 demap_arg.cookie = cookie;
2199 demap_arg.id = id;
e53e97ce 2200
671d7732
SV
2201 npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2202 iommu_tbl_range_free(&iommu->iommu_table, cookie, npages, true,
2203 &demap_arg);
e53e97ce 2204
e53e97ce
DM
2205}
2206
2207void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2208 int ncookies)
2209{
2210 struct ldc_iommu *iommu = &lp->iommu;
e53e97ce 2211 int i;
671d7732 2212 unsigned long flags;
e53e97ce
DM
2213
2214 spin_lock_irqsave(&iommu->lock, flags);
2215 for (i = 0; i < ncookies; i++) {
2216 u64 addr = cookies[i].cookie_addr;
2217 u64 size = cookies[i].cookie_size;
2218
2219 free_npages(lp->id, iommu, addr, size);
2220 }
2221 spin_unlock_irqrestore(&iommu->lock, flags);
2222}
2223EXPORT_SYMBOL(ldc_unmap);
2224
2225int ldc_copy(struct ldc_channel *lp, int copy_dir,
2226 void *buf, unsigned int len, unsigned long offset,
2227 struct ldc_trans_cookie *cookies, int ncookies)
2228{
2229 unsigned int orig_len;
2230 unsigned long ra;
2231 int i;
2232
2233 if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2234 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2235 lp->id, copy_dir);
2236 return -EINVAL;
2237 }
2238
2239 ra = __pa(buf);
2240 if ((ra | len | offset) & (8UL - 1)) {
2241 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2242 "ra[%lx] len[%x] offset[%lx]\n",
2243 lp->id, ra, len, offset);
2244 return -EFAULT;
2245 }
2246
2247 if (lp->hs_state != LDC_HS_COMPLETE ||
2248 (lp->flags & LDC_FLAG_RESET)) {
2249 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2250 "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2251 return -ECONNRESET;
2252 }
2253
2254 orig_len = len;
2255 for (i = 0; i < ncookies; i++) {
2256 unsigned long cookie_raddr = cookies[i].cookie_addr;
2257 unsigned long this_len = cookies[i].cookie_size;
2258 unsigned long actual_len;
2259
2260 if (unlikely(offset)) {
2261 unsigned long this_off = offset;
2262
2263 if (this_off > this_len)
2264 this_off = this_len;
2265
2266 offset -= this_off;
2267 this_len -= this_off;
2268 if (!this_len)
2269 continue;
2270 cookie_raddr += this_off;
2271 }
2272
2273 if (this_len > len)
2274 this_len = len;
2275
2276 while (1) {
2277 unsigned long hv_err;
2278
2279 hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2280 cookie_raddr, ra,
2281 this_len, &actual_len);
2282 if (unlikely(hv_err)) {
2283 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2284 "HV error %lu\n",
2285 lp->id, hv_err);
2286 if (lp->hs_state != LDC_HS_COMPLETE ||
2287 (lp->flags & LDC_FLAG_RESET))
2288 return -ECONNRESET;
2289 else
2290 return -EFAULT;
2291 }
2292
2293 cookie_raddr += actual_len;
2294 ra += actual_len;
2295 len -= actual_len;
2296 if (actual_len == this_len)
2297 break;
2298
2299 this_len -= actual_len;
2300 }
2301
2302 if (!len)
2303 break;
2304 }
2305
2306 /* It is caller policy what to do about short copies.
2307 * For example, a networking driver can declare the
2308 * packet a runt and drop it.
2309 */
2310
2311 return orig_len - len;
2312}
2313EXPORT_SYMBOL(ldc_copy);
2314
2315void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2316 struct ldc_trans_cookie *cookies, int *ncookies,
2317 unsigned int map_perm)
2318{
2319 void *buf;
2320 int err;
2321
2322 if (len & (8UL - 1))
2323 return ERR_PTR(-EINVAL);
2324
2325 buf = kzalloc(len, GFP_KERNEL);
2326 if (!buf)
2327 return ERR_PTR(-ENOMEM);
2328
2329 err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2330 if (err < 0) {
2331 kfree(buf);
2332 return ERR_PTR(err);
2333 }
2334 *ncookies = err;
2335
2336 return buf;
2337}
2338EXPORT_SYMBOL(ldc_alloc_exp_dring);
2339
2340void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2341 struct ldc_trans_cookie *cookies, int ncookies)
2342{
2343 ldc_unmap(lp, cookies, ncookies);
2344 kfree(buf);
2345}
2346EXPORT_SYMBOL(ldc_free_exp_dring);
2347
2348static int __init ldc_init(void)
2349{
e53e97ce 2350 unsigned long major, minor;
43fdf274 2351 struct mdesc_handle *hp;
e53e97ce 2352 const u64 *v;
6fab2600 2353 int err;
43fdf274 2354 u64 mp;
e53e97ce 2355
43fdf274
DM
2356 hp = mdesc_grab();
2357 if (!hp)
e53e97ce
DM
2358 return -ENODEV;
2359
43fdf274 2360 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
6fab2600 2361 err = -ENODEV;
43fdf274 2362 if (mp == MDESC_NODE_NULL)
6fab2600 2363 goto out;
43fdf274
DM
2364
2365 v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
e53e97ce 2366 if (!v)
6fab2600 2367 goto out;
e53e97ce
DM
2368
2369 major = 1;
2370 minor = 0;
2371 if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2372 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
6fab2600 2373 goto out;
e53e97ce
DM
2374 }
2375
2376 printk(KERN_INFO "%s", version);
2377
2378 if (!*v) {
2379 printk(KERN_INFO PFX "Domaining disabled.\n");
6fab2600 2380 goto out;
e53e97ce
DM
2381 }
2382 ldom_domaining_enabled = 1;
6fab2600 2383 err = 0;
e53e97ce 2384
6fab2600
DM
2385out:
2386 mdesc_release(hp);
2387 return err;
e53e97ce
DM
2388}
2389
2390core_initcall(ldc_init);