1 /* pci_sun4v.c: SUN4V specific PCI controller support.
3 * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
6 #include <linux/kernel.h>
7 #include <linux/types.h>
9 #include <linux/init.h>
10 #include <linux/slab.h>
11 #include <linux/interrupt.h>
12 #include <linux/percpu.h>
13 #include <linux/irq.h>
14 #include <linux/msi.h>
17 #include <asm/iommu.h>
20 #include <asm/pstate.h>
21 #include <asm/oplib.h>
22 #include <asm/hypervisor.h>
26 #include "iommu_common.h"
28 #include "pci_sun4v.h"
30 #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64))
32 struct pci_iommu_batch
{
33 struct pci_dev
*pdev
; /* Device mapping is for. */
34 unsigned long prot
; /* IOMMU page protections */
35 unsigned long entry
; /* Index into IOTSB. */
36 u64
*pglist
; /* List of physical pages */
37 unsigned long npages
; /* Number of pages in list. */
40 static DEFINE_PER_CPU(struct pci_iommu_batch
, pci_iommu_batch
);
42 /* Interrupts must be disabled. */
43 static inline void pci_iommu_batch_start(struct pci_dev
*pdev
, unsigned long prot
, unsigned long entry
)
45 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
53 /* Interrupts must be disabled. */
54 static long pci_iommu_batch_flush(struct pci_iommu_batch
*p
)
56 struct pci_pbm_info
*pbm
= p
->pdev
->dev
.archdata
.host_controller
;
57 unsigned long devhandle
= pbm
->devhandle
;
58 unsigned long prot
= p
->prot
;
59 unsigned long entry
= p
->entry
;
60 u64
*pglist
= p
->pglist
;
61 unsigned long npages
= p
->npages
;
66 num
= pci_sun4v_iommu_map(devhandle
, HV_PCI_TSBID(0, entry
),
67 npages
, prot
, __pa(pglist
));
68 if (unlikely(num
< 0)) {
69 if (printk_ratelimit())
70 printk("pci_iommu_batch_flush: IOMMU map of "
71 "[%08lx:%08lx:%lx:%lx:%lx] failed with "
73 devhandle
, HV_PCI_TSBID(0, entry
),
74 npages
, prot
, __pa(pglist
), num
);
89 /* Interrupts must be disabled. */
90 static inline long pci_iommu_batch_add(u64 phys_page
)
92 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
94 BUG_ON(p
->npages
>= PGLIST_NENTS
);
96 p
->pglist
[p
->npages
++] = phys_page
;
97 if (p
->npages
== PGLIST_NENTS
)
98 return pci_iommu_batch_flush(p
);
103 /* Interrupts must be disabled. */
104 static inline long pci_iommu_batch_end(void)
106 struct pci_iommu_batch
*p
= &__get_cpu_var(pci_iommu_batch
);
108 BUG_ON(p
->npages
>= PGLIST_NENTS
);
110 return pci_iommu_batch_flush(p
);
113 static long pci_arena_alloc(struct pci_iommu_arena
*arena
, unsigned long npages
)
115 unsigned long n
, i
, start
, end
, limit
;
118 limit
= arena
->limit
;
123 n
= find_next_zero_bit(arena
->map
, limit
, start
);
125 if (unlikely(end
>= limit
)) {
126 if (likely(pass
< 1)) {
132 /* Scanned the whole thing, give up. */
137 for (i
= n
; i
< end
; i
++) {
138 if (test_bit(i
, arena
->map
)) {
144 for (i
= n
; i
< end
; i
++)
145 __set_bit(i
, arena
->map
);
152 static void pci_arena_free(struct pci_iommu_arena
*arena
, unsigned long base
, unsigned long npages
)
156 for (i
= base
; i
< (base
+ npages
); i
++)
157 __clear_bit(i
, arena
->map
);
160 static void *pci_4v_alloc_consistent(struct pci_dev
*pdev
, size_t size
, dma_addr_t
*dma_addrp
, gfp_t gfp
)
162 struct pci_iommu
*iommu
;
163 unsigned long flags
, order
, first_page
, npages
, n
;
167 size
= IO_PAGE_ALIGN(size
);
168 order
= get_order(size
);
169 if (unlikely(order
>= MAX_ORDER
))
172 npages
= size
>> IO_PAGE_SHIFT
;
174 first_page
= __get_free_pages(gfp
, order
);
175 if (unlikely(first_page
== 0UL))
178 memset((char *)first_page
, 0, PAGE_SIZE
<< order
);
180 iommu
= pdev
->dev
.archdata
.iommu
;
182 spin_lock_irqsave(&iommu
->lock
, flags
);
183 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
184 spin_unlock_irqrestore(&iommu
->lock
, flags
);
186 if (unlikely(entry
< 0L))
187 goto arena_alloc_fail
;
189 *dma_addrp
= (iommu
->page_table_map_base
+
190 (entry
<< IO_PAGE_SHIFT
));
191 ret
= (void *) first_page
;
192 first_page
= __pa(first_page
);
194 local_irq_save(flags
);
196 pci_iommu_batch_start(pdev
,
197 (HV_PCI_MAP_ATTR_READ
|
198 HV_PCI_MAP_ATTR_WRITE
),
201 for (n
= 0; n
< npages
; n
++) {
202 long err
= pci_iommu_batch_add(first_page
+ (n
* PAGE_SIZE
));
203 if (unlikely(err
< 0L))
207 if (unlikely(pci_iommu_batch_end() < 0L))
210 local_irq_restore(flags
);
215 /* Interrupts are disabled. */
216 spin_lock(&iommu
->lock
);
217 pci_arena_free(&iommu
->arena
, entry
, npages
);
218 spin_unlock_irqrestore(&iommu
->lock
, flags
);
221 free_pages(first_page
, order
);
225 static void pci_4v_free_consistent(struct pci_dev
*pdev
, size_t size
, void *cpu
, dma_addr_t dvma
)
227 struct pci_pbm_info
*pbm
;
228 struct pci_iommu
*iommu
;
229 unsigned long flags
, order
, npages
, entry
;
232 npages
= IO_PAGE_ALIGN(size
) >> IO_PAGE_SHIFT
;
233 iommu
= pdev
->dev
.archdata
.iommu
;
234 pbm
= pdev
->dev
.archdata
.host_controller
;
235 devhandle
= pbm
->devhandle
;
236 entry
= ((dvma
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
238 spin_lock_irqsave(&iommu
->lock
, flags
);
240 pci_arena_free(&iommu
->arena
, entry
, npages
);
245 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
249 } while (npages
!= 0);
251 spin_unlock_irqrestore(&iommu
->lock
, flags
);
253 order
= get_order(size
);
255 free_pages((unsigned long)cpu
, order
);
258 static dma_addr_t
pci_4v_map_single(struct pci_dev
*pdev
, void *ptr
, size_t sz
, int direction
)
260 struct pci_iommu
*iommu
;
261 unsigned long flags
, npages
, oaddr
;
262 unsigned long i
, base_paddr
;
267 iommu
= pdev
->dev
.archdata
.iommu
;
269 if (unlikely(direction
== PCI_DMA_NONE
))
272 oaddr
= (unsigned long)ptr
;
273 npages
= IO_PAGE_ALIGN(oaddr
+ sz
) - (oaddr
& IO_PAGE_MASK
);
274 npages
>>= IO_PAGE_SHIFT
;
276 spin_lock_irqsave(&iommu
->lock
, flags
);
277 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
278 spin_unlock_irqrestore(&iommu
->lock
, flags
);
280 if (unlikely(entry
< 0L))
283 bus_addr
= (iommu
->page_table_map_base
+
284 (entry
<< IO_PAGE_SHIFT
));
285 ret
= bus_addr
| (oaddr
& ~IO_PAGE_MASK
);
286 base_paddr
= __pa(oaddr
& IO_PAGE_MASK
);
287 prot
= HV_PCI_MAP_ATTR_READ
;
288 if (direction
!= PCI_DMA_TODEVICE
)
289 prot
|= HV_PCI_MAP_ATTR_WRITE
;
291 local_irq_save(flags
);
293 pci_iommu_batch_start(pdev
, prot
, entry
);
295 for (i
= 0; i
< npages
; i
++, base_paddr
+= IO_PAGE_SIZE
) {
296 long err
= pci_iommu_batch_add(base_paddr
);
297 if (unlikely(err
< 0L))
300 if (unlikely(pci_iommu_batch_end() < 0L))
303 local_irq_restore(flags
);
308 if (printk_ratelimit())
310 return PCI_DMA_ERROR_CODE
;
313 /* Interrupts are disabled. */
314 spin_lock(&iommu
->lock
);
315 pci_arena_free(&iommu
->arena
, entry
, npages
);
316 spin_unlock_irqrestore(&iommu
->lock
, flags
);
318 return PCI_DMA_ERROR_CODE
;
321 static void pci_4v_unmap_single(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
323 struct pci_pbm_info
*pbm
;
324 struct pci_iommu
*iommu
;
325 unsigned long flags
, npages
;
329 if (unlikely(direction
== PCI_DMA_NONE
)) {
330 if (printk_ratelimit())
335 iommu
= pdev
->dev
.archdata
.iommu
;
336 pbm
= pdev
->dev
.archdata
.host_controller
;
337 devhandle
= pbm
->devhandle
;
339 npages
= IO_PAGE_ALIGN(bus_addr
+ sz
) - (bus_addr
& IO_PAGE_MASK
);
340 npages
>>= IO_PAGE_SHIFT
;
341 bus_addr
&= IO_PAGE_MASK
;
343 spin_lock_irqsave(&iommu
->lock
, flags
);
345 entry
= (bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
;
346 pci_arena_free(&iommu
->arena
, entry
, npages
);
351 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
355 } while (npages
!= 0);
357 spin_unlock_irqrestore(&iommu
->lock
, flags
);
360 #define SG_ENT_PHYS_ADDRESS(SG) \
361 (__pa(page_address((SG)->page)) + (SG)->offset)
363 static inline long fill_sg(long entry
, struct pci_dev
*pdev
,
364 struct scatterlist
*sg
,
365 int nused
, int nelems
, unsigned long prot
)
367 struct scatterlist
*dma_sg
= sg
;
368 struct scatterlist
*sg_end
= sg
+ nelems
;
372 local_irq_save(flags
);
374 pci_iommu_batch_start(pdev
, prot
, entry
);
376 for (i
= 0; i
< nused
; i
++) {
377 unsigned long pteval
= ~0UL;
380 dma_npages
= ((dma_sg
->dma_address
& (IO_PAGE_SIZE
- 1UL)) +
382 ((IO_PAGE_SIZE
- 1UL))) >> IO_PAGE_SHIFT
;
384 unsigned long offset
;
387 /* If we are here, we know we have at least one
388 * more page to map. So walk forward until we
389 * hit a page crossing, and begin creating new
390 * mappings from that spot.
395 tmp
= SG_ENT_PHYS_ADDRESS(sg
);
397 if (((tmp
^ pteval
) >> IO_PAGE_SHIFT
) != 0UL) {
398 pteval
= tmp
& IO_PAGE_MASK
;
399 offset
= tmp
& (IO_PAGE_SIZE
- 1UL);
402 if (((tmp
^ (tmp
+ len
- 1UL)) >> IO_PAGE_SHIFT
) != 0UL) {
403 pteval
= (tmp
+ IO_PAGE_SIZE
) & IO_PAGE_MASK
;
405 len
-= (IO_PAGE_SIZE
- (tmp
& (IO_PAGE_SIZE
- 1UL)));
411 pteval
= (pteval
& IOPTE_PAGE
);
415 err
= pci_iommu_batch_add(pteval
);
416 if (unlikely(err
< 0L))
417 goto iommu_map_failed
;
419 pteval
+= IO_PAGE_SIZE
;
420 len
-= (IO_PAGE_SIZE
- offset
);
425 pteval
= (pteval
& IOPTE_PAGE
) + len
;
428 /* Skip over any tail mappings we've fully mapped,
429 * adjusting pteval along the way. Stop when we
430 * detect a page crossing event.
432 while (sg
< sg_end
&&
433 (pteval
<< (64 - IO_PAGE_SHIFT
)) != 0UL &&
434 (pteval
== SG_ENT_PHYS_ADDRESS(sg
)) &&
436 (SG_ENT_PHYS_ADDRESS(sg
) + sg
->length
- 1UL)) >> IO_PAGE_SHIFT
) == 0UL) {
437 pteval
+= sg
->length
;
440 if ((pteval
<< (64 - IO_PAGE_SHIFT
)) == 0UL)
442 } while (dma_npages
!= 0);
446 if (unlikely(pci_iommu_batch_end() < 0L))
447 goto iommu_map_failed
;
449 local_irq_restore(flags
);
453 local_irq_restore(flags
);
457 static int pci_4v_map_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
459 struct pci_iommu
*iommu
;
460 unsigned long flags
, npages
, prot
;
462 struct scatterlist
*sgtmp
;
466 /* Fast path single entry scatterlists. */
468 sglist
->dma_address
=
469 pci_4v_map_single(pdev
,
470 (page_address(sglist
->page
) + sglist
->offset
),
471 sglist
->length
, direction
);
472 if (unlikely(sglist
->dma_address
== PCI_DMA_ERROR_CODE
))
474 sglist
->dma_length
= sglist
->length
;
478 iommu
= pdev
->dev
.archdata
.iommu
;
480 if (unlikely(direction
== PCI_DMA_NONE
))
483 /* Step 1: Prepare scatter list. */
484 npages
= prepare_sg(sglist
, nelems
);
486 /* Step 2: Allocate a cluster and context, if necessary. */
487 spin_lock_irqsave(&iommu
->lock
, flags
);
488 entry
= pci_arena_alloc(&iommu
->arena
, npages
);
489 spin_unlock_irqrestore(&iommu
->lock
, flags
);
491 if (unlikely(entry
< 0L))
494 dma_base
= iommu
->page_table_map_base
+
495 (entry
<< IO_PAGE_SHIFT
);
497 /* Step 3: Normalize DMA addresses. */
501 while (used
&& sgtmp
->dma_length
) {
502 sgtmp
->dma_address
+= dma_base
;
506 used
= nelems
- used
;
508 /* Step 4: Create the mappings. */
509 prot
= HV_PCI_MAP_ATTR_READ
;
510 if (direction
!= PCI_DMA_TODEVICE
)
511 prot
|= HV_PCI_MAP_ATTR_WRITE
;
513 err
= fill_sg(entry
, pdev
, sglist
, used
, nelems
, prot
);
514 if (unlikely(err
< 0L))
515 goto iommu_map_failed
;
520 if (printk_ratelimit())
525 spin_lock_irqsave(&iommu
->lock
, flags
);
526 pci_arena_free(&iommu
->arena
, entry
, npages
);
527 spin_unlock_irqrestore(&iommu
->lock
, flags
);
532 static void pci_4v_unmap_sg(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
534 struct pci_pbm_info
*pbm
;
535 struct pci_iommu
*iommu
;
536 unsigned long flags
, i
, npages
;
538 u32 devhandle
, bus_addr
;
540 if (unlikely(direction
== PCI_DMA_NONE
)) {
541 if (printk_ratelimit())
545 iommu
= pdev
->dev
.archdata
.iommu
;
546 pbm
= pdev
->dev
.archdata
.host_controller
;
547 devhandle
= pbm
->devhandle
;
549 bus_addr
= sglist
->dma_address
& IO_PAGE_MASK
;
551 for (i
= 1; i
< nelems
; i
++)
552 if (sglist
[i
].dma_length
== 0)
555 npages
= (IO_PAGE_ALIGN(sglist
[i
].dma_address
+ sglist
[i
].dma_length
) -
556 bus_addr
) >> IO_PAGE_SHIFT
;
558 entry
= ((bus_addr
- iommu
->page_table_map_base
) >> IO_PAGE_SHIFT
);
560 spin_lock_irqsave(&iommu
->lock
, flags
);
562 pci_arena_free(&iommu
->arena
, entry
, npages
);
567 num
= pci_sun4v_iommu_demap(devhandle
, HV_PCI_TSBID(0, entry
),
571 } while (npages
!= 0);
573 spin_unlock_irqrestore(&iommu
->lock
, flags
);
576 static void pci_4v_dma_sync_single_for_cpu(struct pci_dev
*pdev
, dma_addr_t bus_addr
, size_t sz
, int direction
)
578 /* Nothing to do... */
581 static void pci_4v_dma_sync_sg_for_cpu(struct pci_dev
*pdev
, struct scatterlist
*sglist
, int nelems
, int direction
)
583 /* Nothing to do... */
586 const struct pci_iommu_ops pci_sun4v_iommu_ops
= {
587 .alloc_consistent
= pci_4v_alloc_consistent
,
588 .free_consistent
= pci_4v_free_consistent
,
589 .map_single
= pci_4v_map_single
,
590 .unmap_single
= pci_4v_unmap_single
,
591 .map_sg
= pci_4v_map_sg
,
592 .unmap_sg
= pci_4v_unmap_sg
,
593 .dma_sync_single_for_cpu
= pci_4v_dma_sync_single_for_cpu
,
594 .dma_sync_sg_for_cpu
= pci_4v_dma_sync_sg_for_cpu
,
597 static inline int pci_sun4v_out_of_range(struct pci_pbm_info
*pbm
, unsigned int bus
, unsigned int device
, unsigned int func
)
599 if (bus
< pbm
->pci_first_busno
||
600 bus
> pbm
->pci_last_busno
)
605 static int pci_sun4v_read_pci_cfg(struct pci_bus
*bus_dev
, unsigned int devfn
,
606 int where
, int size
, u32
*value
)
608 struct pci_pbm_info
*pbm
= bus_dev
->sysdata
;
609 u32 devhandle
= pbm
->devhandle
;
610 unsigned int bus
= bus_dev
->number
;
611 unsigned int device
= PCI_SLOT(devfn
);
612 unsigned int func
= PCI_FUNC(devfn
);
615 if (bus_dev
== pbm
->pci_bus
&& devfn
== 0x00)
616 return pci_host_bridge_read_pci_cfg(bus_dev
, devfn
, where
,
618 if (pci_sun4v_out_of_range(pbm
, bus
, device
, func
)) {
621 ret
= pci_sun4v_config_get(devhandle
,
622 HV_PCI_DEVICE_BUILD(bus
, device
, func
),
625 printk("rcfg: [%x:%x:%x:%d]=[%lx]\n",
626 devhandle
, HV_PCI_DEVICE_BUILD(bus
, device
, func
),
635 *value
= ret
& 0xffff;
638 *value
= ret
& 0xffffffff;
643 return PCIBIOS_SUCCESSFUL
;
646 static int pci_sun4v_write_pci_cfg(struct pci_bus
*bus_dev
, unsigned int devfn
,
647 int where
, int size
, u32 value
)
649 struct pci_pbm_info
*pbm
= bus_dev
->sysdata
;
650 u32 devhandle
= pbm
->devhandle
;
651 unsigned int bus
= bus_dev
->number
;
652 unsigned int device
= PCI_SLOT(devfn
);
653 unsigned int func
= PCI_FUNC(devfn
);
656 if (bus_dev
== pbm
->pci_bus
&& devfn
== 0x00)
657 return pci_host_bridge_write_pci_cfg(bus_dev
, devfn
, where
,
659 if (pci_sun4v_out_of_range(pbm
, bus
, device
, func
)) {
662 ret
= pci_sun4v_config_put(devhandle
,
663 HV_PCI_DEVICE_BUILD(bus
, device
, func
),
666 printk("wcfg: [%x:%x:%x:%d] v[%x] == [%lx]\n",
667 devhandle
, HV_PCI_DEVICE_BUILD(bus
, device
, func
),
668 where
, size
, value
, ret
);
671 return PCIBIOS_SUCCESSFUL
;
674 static struct pci_ops pci_sun4v_ops
= {
675 .read
= pci_sun4v_read_pci_cfg
,
676 .write
= pci_sun4v_write_pci_cfg
,
680 static void pbm_scan_bus(struct pci_controller_info
*p
,
681 struct pci_pbm_info
*pbm
)
683 pbm
->pci_bus
= pci_scan_one_pbm(pbm
);
686 static void pci_sun4v_scan_bus(struct pci_controller_info
*p
)
688 struct property
*prop
;
689 struct device_node
*dp
;
691 if ((dp
= p
->pbm_A
.prom_node
) != NULL
) {
692 prop
= of_find_property(dp
, "66mhz-capable", NULL
);
693 p
->pbm_A
.is_66mhz_capable
= (prop
!= NULL
);
695 pbm_scan_bus(p
, &p
->pbm_A
);
697 if ((dp
= p
->pbm_B
.prom_node
) != NULL
) {
698 prop
= of_find_property(dp
, "66mhz-capable", NULL
);
699 p
->pbm_B
.is_66mhz_capable
= (prop
!= NULL
);
701 pbm_scan_bus(p
, &p
->pbm_B
);
704 /* XXX register error interrupt handlers XXX */
707 static unsigned long probe_existing_entries(struct pci_pbm_info
*pbm
,
708 struct pci_iommu
*iommu
)
710 struct pci_iommu_arena
*arena
= &iommu
->arena
;
711 unsigned long i
, cnt
= 0;
714 devhandle
= pbm
->devhandle
;
715 for (i
= 0; i
< arena
->limit
; i
++) {
716 unsigned long ret
, io_attrs
, ra
;
718 ret
= pci_sun4v_iommu_getmap(devhandle
,
722 if (page_in_phys_avail(ra
)) {
723 pci_sun4v_iommu_demap(devhandle
,
724 HV_PCI_TSBID(0, i
), 1);
727 __set_bit(i
, arena
->map
);
735 static void pci_sun4v_iommu_init(struct pci_pbm_info
*pbm
)
737 struct pci_iommu
*iommu
= pbm
->iommu
;
738 struct property
*prop
;
739 unsigned long num_tsb_entries
, sz
;
740 u32 vdma
[2], dma_mask
, dma_offset
;
743 prop
= of_find_property(pbm
->prom_node
, "virtual-dma", NULL
);
745 u32
*val
= prop
->value
;
750 /* No property, use default values. */
751 vdma
[0] = 0x80000000;
752 vdma
[1] = 0x80000000;
758 dma_mask
|= 0x1fffffff;
763 dma_mask
|= 0x3fffffff;
768 dma_mask
|= 0x7fffffff;
773 prom_printf("PCI-SUN4V: strange virtual-dma size.\n");
777 tsbsize
*= (8 * 1024);
779 num_tsb_entries
= tsbsize
/ sizeof(iopte_t
);
781 dma_offset
= vdma
[0];
783 /* Setup initial software IOMMU state. */
784 spin_lock_init(&iommu
->lock
);
785 iommu
->ctx_lowest_free
= 1;
786 iommu
->page_table_map_base
= dma_offset
;
787 iommu
->dma_addr_mask
= dma_mask
;
789 /* Allocate and initialize the free area map. */
790 sz
= num_tsb_entries
/ 8;
791 sz
= (sz
+ 7UL) & ~7UL;
792 iommu
->arena
.map
= kzalloc(sz
, GFP_KERNEL
);
793 if (!iommu
->arena
.map
) {
794 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
797 iommu
->arena
.limit
= num_tsb_entries
;
799 sz
= probe_existing_entries(pbm
, iommu
);
801 printk("%s: Imported %lu TSB entries from OBP\n",
805 static void pci_sun4v_get_bus_range(struct pci_pbm_info
*pbm
)
807 struct property
*prop
;
808 unsigned int *busrange
;
810 prop
= of_find_property(pbm
->prom_node
, "bus-range", NULL
);
812 busrange
= prop
->value
;
814 pbm
->pci_first_busno
= busrange
[0];
815 pbm
->pci_last_busno
= busrange
[1];
819 #ifdef CONFIG_PCI_MSI
820 struct pci_sun4v_msiq_entry
{
822 #define MSIQ_VERSION_MASK 0xffffffff00000000UL
823 #define MSIQ_VERSION_SHIFT 32
824 #define MSIQ_TYPE_MASK 0x00000000000000ffUL
825 #define MSIQ_TYPE_SHIFT 0
826 #define MSIQ_TYPE_NONE 0x00
827 #define MSIQ_TYPE_MSG 0x01
828 #define MSIQ_TYPE_MSI32 0x02
829 #define MSIQ_TYPE_MSI64 0x03
830 #define MSIQ_TYPE_INTX 0x08
831 #define MSIQ_TYPE_NONE2 0xff
836 u64 req_id
; /* bus/device/func */
837 #define MSIQ_REQID_BUS_MASK 0xff00UL
838 #define MSIQ_REQID_BUS_SHIFT 8
839 #define MSIQ_REQID_DEVICE_MASK 0x00f8UL
840 #define MSIQ_REQID_DEVICE_SHIFT 3
841 #define MSIQ_REQID_FUNC_MASK 0x0007UL
842 #define MSIQ_REQID_FUNC_SHIFT 0
846 /* The format of this value is message type dependant.
847 * For MSI bits 15:0 are the data from the MSI packet.
848 * For MSI-X bits 31:0 are the data from the MSI packet.
849 * For MSG, the message code and message routing code where:
850 * bits 39:32 is the bus/device/fn of the msg target-id
851 * bits 18:16 is the message routing code
852 * bits 7:0 is the message code
853 * For INTx the low order 2-bits are:
864 /* For now this just runs as a pre-handler for the real interrupt handler.
865 * So we just walk through the queue and ACK all the entries, update the
866 * head pointer, and return.
868 * In the longer term it would be nice to do something more integrated
869 * wherein we can pass in some of this MSI info to the drivers. This
870 * would be most useful for PCIe fabric error messages, although we could
871 * invoke those directly from the loop here in order to pass the info around.
873 static void pci_sun4v_msi_prehandler(unsigned int ino
, void *data1
, void *data2
)
875 struct pci_pbm_info
*pbm
= data1
;
876 struct pci_sun4v_msiq_entry
*base
, *ep
;
877 unsigned long msiqid
, orig_head
, head
, type
, err
;
879 msiqid
= (unsigned long) data2
;
882 err
= pci_sun4v_msiq_gethead(pbm
->devhandle
, msiqid
, &head
);
886 if (unlikely(head
>= (pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
))))
889 head
/= sizeof(struct pci_sun4v_msiq_entry
);
891 base
= (pbm
->msi_queues
+ ((msiqid
- pbm
->msiq_first
) *
892 (pbm
->msiq_ent_count
*
893 sizeof(struct pci_sun4v_msiq_entry
))));
895 while ((ep
->version_type
& MSIQ_TYPE_MASK
) != 0) {
896 type
= (ep
->version_type
& MSIQ_TYPE_MASK
) >> MSIQ_TYPE_SHIFT
;
897 if (unlikely(type
!= MSIQ_TYPE_MSI32
&&
898 type
!= MSIQ_TYPE_MSI64
))
901 pci_sun4v_msi_setstate(pbm
->devhandle
,
902 ep
->msi_data
/* msi_num */,
905 /* Clear the entry. */
906 ep
->version_type
&= ~MSIQ_TYPE_MASK
;
908 /* Go to next entry in ring. */
910 if (head
>= pbm
->msiq_ent_count
)
915 if (likely(head
!= orig_head
)) {
916 /* ACK entries by updating head pointer. */
917 head
*= sizeof(struct pci_sun4v_msiq_entry
);
918 err
= pci_sun4v_msiq_sethead(pbm
->devhandle
, msiqid
, head
);
925 printk(KERN_EMERG
"MSI: Hypervisor set head gives error %lu\n", err
);
929 printk(KERN_EMERG
"MSI: Hypervisor get head gives error %lu\n", err
);
932 printk(KERN_EMERG
"MSI: devhandle[%x] msiqid[%lx] head[%lu]\n",
933 pbm
->devhandle
, msiqid
, head
);
937 printk(KERN_EMERG
"MSI: Hypervisor gives bad offset %lx max(%lx)\n",
938 head
, pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
));
942 printk(KERN_EMERG
"MSI: Entry has bad type %lx\n", type
);
946 static int msi_bitmap_alloc(struct pci_pbm_info
*pbm
)
948 unsigned long size
, bits_per_ulong
;
950 bits_per_ulong
= sizeof(unsigned long) * 8;
951 size
= (pbm
->msi_num
+ (bits_per_ulong
- 1)) & ~(bits_per_ulong
- 1);
953 BUG_ON(size
% sizeof(unsigned long));
955 pbm
->msi_bitmap
= kzalloc(size
, GFP_KERNEL
);
956 if (!pbm
->msi_bitmap
)
962 static void msi_bitmap_free(struct pci_pbm_info
*pbm
)
964 kfree(pbm
->msi_bitmap
);
965 pbm
->msi_bitmap
= NULL
;
968 static int msi_queue_alloc(struct pci_pbm_info
*pbm
)
970 unsigned long q_size
, alloc_size
, pages
, order
;
973 q_size
= pbm
->msiq_ent_count
* sizeof(struct pci_sun4v_msiq_entry
);
974 alloc_size
= (pbm
->msiq_num
* q_size
);
975 order
= get_order(alloc_size
);
976 pages
= __get_free_pages(GFP_KERNEL
| __GFP_COMP
, order
);
978 printk(KERN_ERR
"MSI: Cannot allocate MSI queues (o=%lu).\n",
982 memset((char *)pages
, 0, PAGE_SIZE
<< order
);
983 pbm
->msi_queues
= (void *) pages
;
985 for (i
= 0; i
< pbm
->msiq_num
; i
++) {
986 unsigned long err
, base
= __pa(pages
+ (i
* q_size
));
987 unsigned long ret1
, ret2
;
989 err
= pci_sun4v_msiq_conf(pbm
->devhandle
,
991 base
, pbm
->msiq_ent_count
);
993 printk(KERN_ERR
"MSI: msiq register fails (err=%lu)\n",
998 err
= pci_sun4v_msiq_info(pbm
->devhandle
,
1002 printk(KERN_ERR
"MSI: Cannot read msiq (err=%lu)\n",
1006 if (ret1
!= base
|| ret2
!= pbm
->msiq_ent_count
) {
1007 printk(KERN_ERR
"MSI: Bogus qconf "
1008 "expected[%lx:%x] got[%lx:%lx]\n",
1009 base
, pbm
->msiq_ent_count
,
1018 free_pages(pages
, order
);
1022 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1027 val
= of_get_property(pbm
->prom_node
, "#msi-eqs", &len
);
1028 if (!val
|| len
!= 4)
1030 pbm
->msiq_num
= *val
;
1031 if (pbm
->msiq_num
) {
1032 const struct msiq_prop
{
1037 const struct msi_range_prop
{
1041 const struct addr_range_prop
{
1050 val
= of_get_property(pbm
->prom_node
, "msi-eq-size", &len
);
1051 if (!val
|| len
!= 4)
1054 pbm
->msiq_ent_count
= *val
;
1056 mqp
= of_get_property(pbm
->prom_node
,
1057 "msi-eq-to-devino", &len
);
1058 if (!mqp
|| len
!= sizeof(struct msiq_prop
))
1061 pbm
->msiq_first
= mqp
->first_msiq
;
1062 pbm
->msiq_first_devino
= mqp
->first_devino
;
1064 val
= of_get_property(pbm
->prom_node
, "#msi", &len
);
1065 if (!val
|| len
!= 4)
1067 pbm
->msi_num
= *val
;
1069 mrng
= of_get_property(pbm
->prom_node
, "msi-ranges", &len
);
1070 if (!mrng
|| len
!= sizeof(struct msi_range_prop
))
1072 pbm
->msi_first
= mrng
->first_msi
;
1074 val
= of_get_property(pbm
->prom_node
, "msi-data-mask", &len
);
1075 if (!val
|| len
!= 4)
1077 pbm
->msi_data_mask
= *val
;
1079 val
= of_get_property(pbm
->prom_node
, "msix-data-width", &len
);
1080 if (!val
|| len
!= 4)
1082 pbm
->msix_data_width
= *val
;
1084 arng
= of_get_property(pbm
->prom_node
, "msi-address-ranges",
1086 if (!arng
|| len
!= sizeof(struct addr_range_prop
))
1088 pbm
->msi32_start
= ((u64
)arng
->msi32_high
<< 32) |
1089 (u64
) arng
->msi32_low
;
1090 pbm
->msi64_start
= ((u64
)arng
->msi64_high
<< 32) |
1091 (u64
) arng
->msi64_low
;
1092 pbm
->msi32_len
= arng
->msi32_len
;
1093 pbm
->msi64_len
= arng
->msi64_len
;
1095 if (msi_bitmap_alloc(pbm
))
1098 if (msi_queue_alloc(pbm
)) {
1099 msi_bitmap_free(pbm
);
1103 printk(KERN_INFO
"%s: MSI Queue first[%u] num[%u] count[%u] "
1106 pbm
->msiq_first
, pbm
->msiq_num
,
1107 pbm
->msiq_ent_count
,
1108 pbm
->msiq_first_devino
);
1109 printk(KERN_INFO
"%s: MSI first[%u] num[%u] mask[0x%x] "
1112 pbm
->msi_first
, pbm
->msi_num
, pbm
->msi_data_mask
,
1113 pbm
->msix_data_width
);
1114 printk(KERN_INFO
"%s: MSI addr32[0x%lx:0x%x] "
1115 "addr64[0x%lx:0x%x]\n",
1117 pbm
->msi32_start
, pbm
->msi32_len
,
1118 pbm
->msi64_start
, pbm
->msi64_len
);
1119 printk(KERN_INFO
"%s: MSI queues at RA [%p]\n",
1128 printk(KERN_INFO
"%s: No MSI support.\n", pbm
->name
);
1131 static int alloc_msi(struct pci_pbm_info
*pbm
)
1135 for (i
= 0; i
< pbm
->msi_num
; i
++) {
1136 if (!test_and_set_bit(i
, pbm
->msi_bitmap
))
1137 return i
+ pbm
->msi_first
;
1143 static void free_msi(struct pci_pbm_info
*pbm
, int msi_num
)
1145 msi_num
-= pbm
->msi_first
;
1146 clear_bit(msi_num
, pbm
->msi_bitmap
);
1149 static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p
,
1150 struct pci_dev
*pdev
,
1151 struct msi_desc
*entry
)
1153 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1154 unsigned long devino
, msiqid
;
1160 msi_num
= alloc_msi(pbm
);
1164 devino
= sun4v_build_msi(pbm
->devhandle
, virt_irq_p
,
1165 pbm
->msiq_first_devino
,
1166 (pbm
->msiq_first_devino
+
1172 set_irq_msi(*virt_irq_p
, entry
);
1174 msiqid
= ((devino
- pbm
->msiq_first_devino
) +
1178 if (pci_sun4v_msiq_setstate(pbm
->devhandle
, msiqid
, HV_MSIQSTATE_IDLE
))
1182 if (pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_VALID
))
1185 if (pci_sun4v_msi_setmsiq(pbm
->devhandle
,
1187 (entry
->msi_attrib
.is_64
?
1188 HV_MSITYPE_MSI64
: HV_MSITYPE_MSI32
)))
1191 if (pci_sun4v_msi_setstate(pbm
->devhandle
, msi_num
, HV_MSISTATE_IDLE
))
1194 if (pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_VALID
))
1197 pdev
->dev
.archdata
.msi_num
= msi_num
;
1199 if (entry
->msi_attrib
.is_64
) {
1200 msg
.address_hi
= pbm
->msi64_start
>> 32;
1201 msg
.address_lo
= pbm
->msi64_start
& 0xffffffff;
1204 msg
.address_lo
= pbm
->msi32_start
;
1207 write_msi_msg(*virt_irq_p
, &msg
);
1209 irq_install_pre_handler(*virt_irq_p
,
1210 pci_sun4v_msi_prehandler
,
1211 pbm
, (void *) msiqid
);
1216 free_msi(pbm
, msi_num
);
1217 sun4v_destroy_msi(*virt_irq_p
);
1223 static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq
,
1224 struct pci_dev
*pdev
)
1226 struct pci_pbm_info
*pbm
= pdev
->dev
.archdata
.host_controller
;
1227 unsigned long msiqid
, err
;
1228 unsigned int msi_num
;
1230 msi_num
= pdev
->dev
.archdata
.msi_num
;
1231 err
= pci_sun4v_msi_getmsiq(pbm
->devhandle
, msi_num
, &msiqid
);
1233 printk(KERN_ERR
"%s: getmsiq gives error %lu\n",
1238 pci_sun4v_msi_setvalid(pbm
->devhandle
, msi_num
, HV_MSIVALID_INVALID
);
1239 pci_sun4v_msiq_setvalid(pbm
->devhandle
, msiqid
, HV_MSIQ_INVALID
);
1241 free_msi(pbm
, msi_num
);
1243 /* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ
1246 sun4v_destroy_msi(virt_irq
);
1248 #else /* CONFIG_PCI_MSI */
1249 static void pci_sun4v_msi_init(struct pci_pbm_info
*pbm
)
1252 #endif /* !(CONFIG_PCI_MSI) */
1254 static void pci_sun4v_pbm_init(struct pci_controller_info
*p
, struct device_node
*dp
, u32 devhandle
)
1256 struct pci_pbm_info
*pbm
;
1258 if (devhandle
& 0x40)
1264 pbm
->prom_node
= dp
;
1266 pbm
->devhandle
= devhandle
;
1268 pbm
->name
= dp
->full_name
;
1270 printk("%s: SUN4V PCI Bus Module\n", pbm
->name
);
1272 pci_determine_mem_io_space(pbm
);
1274 pci_sun4v_get_bus_range(pbm
);
1275 pci_sun4v_iommu_init(pbm
);
1276 pci_sun4v_msi_init(pbm
);
1279 void sun4v_pci_init(struct device_node
*dp
, char *model_name
)
1281 struct pci_controller_info
*p
;
1282 struct pci_iommu
*iommu
;
1283 struct property
*prop
;
1284 struct linux_prom64_registers
*regs
;
1288 prop
= of_find_property(dp
, "reg", NULL
);
1291 devhandle
= (regs
->phys_addr
>> 32UL) & 0x0fffffff;
1293 for (p
= pci_controller_root
; p
; p
= p
->next
) {
1294 struct pci_pbm_info
*pbm
;
1296 if (p
->pbm_A
.prom_node
&& p
->pbm_B
.prom_node
)
1299 pbm
= (p
->pbm_A
.prom_node
?
1303 if (pbm
->devhandle
== (devhandle
^ 0x40)) {
1304 pci_sun4v_pbm_init(p
, dp
, devhandle
);
1309 for_each_possible_cpu(i
) {
1310 unsigned long page
= get_zeroed_page(GFP_ATOMIC
);
1313 goto fatal_memory_error
;
1315 per_cpu(pci_iommu_batch
, i
).pglist
= (u64
*) page
;
1318 p
= kzalloc(sizeof(struct pci_controller_info
), GFP_ATOMIC
);
1320 goto fatal_memory_error
;
1322 iommu
= kzalloc(sizeof(struct pci_iommu
), GFP_ATOMIC
);
1324 goto fatal_memory_error
;
1326 p
->pbm_A
.iommu
= iommu
;
1328 iommu
= kzalloc(sizeof(struct pci_iommu
), GFP_ATOMIC
);
1330 goto fatal_memory_error
;
1332 p
->pbm_B
.iommu
= iommu
;
1334 p
->next
= pci_controller_root
;
1335 pci_controller_root
= p
;
1337 p
->index
= pci_num_controllers
++;
1339 p
->scan_bus
= pci_sun4v_scan_bus
;
1340 #ifdef CONFIG_PCI_MSI
1341 p
->setup_msi_irq
= pci_sun4v_setup_msi_irq
;
1342 p
->teardown_msi_irq
= pci_sun4v_teardown_msi_irq
;
1344 p
->pci_ops
= &pci_sun4v_ops
;
1346 /* Like PSYCHO and SCHIZO we have a 2GB aligned area
1349 pci_memspace_mask
= 0x7fffffffUL
;
1351 pci_sun4v_pbm_init(p
, dp
, devhandle
);
1355 prom_printf("SUN4V_PCI: Fatal memory allocation error.\n");