]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * arch/parisc/lib/io.c | |
3 | * | |
4 | * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard | |
5 | * Copyright (c) Randolph Chung 2001 <tausq@debian.org> | |
6 | * | |
7 | * IO accessing functions which shouldn't be inlined because they're too big | |
8 | */ | |
9 | ||
10 | #include <linux/kernel.h> | |
11 | #include <linux/module.h> | |
12 | #include <asm/io.h> | |
13 | ||
14 | /* Copies a block of memory to a device in an efficient manner. | |
15 | * Assumes the device can cope with 32-bit transfers. If it can't, | |
16 | * don't use this function. | |
17 | */ | |
18 | void memcpy_toio(volatile void __iomem *dst, const void *src, int count) | |
19 | { | |
20 | if (((unsigned long)dst & 3) != ((unsigned long)src & 3)) | |
21 | goto bytecopy; | |
22 | while ((unsigned long)dst & 3) { | |
23 | writeb(*(char *)src, dst++); | |
24 | src++; | |
25 | count--; | |
26 | } | |
27 | while (count > 3) { | |
28 | __raw_writel(*(u32 *)src, dst); | |
29 | src += 4; | |
30 | dst += 4; | |
31 | count -= 4; | |
32 | } | |
33 | bytecopy: | |
34 | while (count--) { | |
35 | writeb(*(char *)src, dst++); | |
36 | src++; | |
37 | } | |
38 | } | |
39 | ||
40 | /* | |
41 | ** Copies a block of memory from a device in an efficient manner. | |
42 | ** Assumes the device can cope with 32-bit transfers. If it can't, | |
43 | ** don't use this function. | |
44 | ** | |
45 | ** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM: | |
46 | ** 27341/64 = 427 cyc per int | |
47 | ** 61311/128 = 478 cyc per short | |
48 | ** 122637/256 = 479 cyc per byte | |
49 | ** Ergo bus latencies dominant (not transfer size). | |
50 | ** Minimize total number of transfers at cost of CPU cycles. | |
51 | ** TODO: only look at src alignment and adjust the stores to dest. | |
52 | */ | |
53 | void memcpy_fromio(void *dst, const volatile void __iomem *src, int count) | |
54 | { | |
55 | /* first compare alignment of src/dst */ | |
56 | if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) ) | |
57 | goto bytecopy; | |
58 | ||
59 | if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) ) | |
60 | goto shortcopy; | |
61 | ||
62 | /* Then check for misaligned start address */ | |
63 | if ((unsigned long)src & 1) { | |
64 | *(u8 *)dst = readb(src); | |
65 | src++; | |
66 | dst++; | |
67 | count--; | |
68 | if (count < 2) goto bytecopy; | |
69 | } | |
70 | ||
71 | if ((unsigned long)src & 2) { | |
72 | *(u16 *)dst = __raw_readw(src); | |
73 | src += 2; | |
74 | dst += 2; | |
75 | count -= 2; | |
76 | } | |
77 | ||
78 | while (count > 3) { | |
79 | *(u32 *)dst = __raw_readl(src); | |
80 | dst += 4; | |
81 | src += 4; | |
82 | count -= 4; | |
83 | } | |
84 | ||
85 | shortcopy: | |
86 | while (count > 1) { | |
87 | *(u16 *)dst = __raw_readw(src); | |
88 | src += 2; | |
89 | dst += 2; | |
90 | count -= 2; | |
91 | } | |
92 | ||
93 | bytecopy: | |
94 | while (count--) { | |
95 | *(char *)dst = readb(src); | |
96 | src++; | |
97 | dst++; | |
98 | } | |
99 | } | |
100 | ||
101 | /* Sets a block of memory on a device to a given value. | |
102 | * Assumes the device can cope with 32-bit transfers. If it can't, | |
103 | * don't use this function. | |
104 | */ | |
105 | void memset_io(volatile void __iomem *addr, unsigned char val, int count) | |
106 | { | |
107 | u32 val32 = (val << 24) | (val << 16) | (val << 8) | val; | |
108 | while ((unsigned long)addr & 3) { | |
109 | writeb(val, addr++); | |
110 | count--; | |
111 | } | |
112 | while (count > 3) { | |
113 | __raw_writel(val32, addr); | |
114 | addr += 4; | |
115 | count -= 4; | |
116 | } | |
117 | while (count--) { | |
118 | writeb(val, addr++); | |
119 | } | |
120 | } | |
121 | ||
122 | /* | |
123 | * Read COUNT 8-bit bytes from port PORT into memory starting at | |
124 | * SRC. | |
125 | */ | |
126 | void insb (unsigned long port, void *dst, unsigned long count) | |
127 | { | |
128 | unsigned char *p; | |
129 | ||
130 | p = (unsigned char *)dst; | |
131 | ||
132 | while (((unsigned long)p) & 0x3) { | |
133 | if (!count) | |
134 | return; | |
135 | count--; | |
136 | *p = inb(port); | |
137 | p++; | |
138 | } | |
139 | ||
140 | while (count >= 4) { | |
141 | unsigned int w; | |
142 | count -= 4; | |
143 | w = inb(port) << 24; | |
144 | w |= inb(port) << 16; | |
145 | w |= inb(port) << 8; | |
146 | w |= inb(port); | |
147 | *(unsigned int *) p = w; | |
148 | p += 4; | |
149 | } | |
150 | ||
151 | while (count) { | |
152 | --count; | |
153 | *p = inb(port); | |
154 | p++; | |
155 | } | |
156 | } | |
157 | ||
158 | ||
159 | /* | |
160 | * Read COUNT 16-bit words from port PORT into memory starting at | |
161 | * SRC. SRC must be at least short aligned. This is used by the | |
162 | * IDE driver to read disk sectors. Performance is important, but | |
163 | * the interfaces seems to be slow: just using the inlined version | |
164 | * of the inw() breaks things. | |
165 | */ | |
166 | void insw (unsigned long port, void *dst, unsigned long count) | |
167 | { | |
168 | unsigned int l = 0, l2; | |
169 | unsigned char *p; | |
170 | ||
171 | p = (unsigned char *)dst; | |
172 | ||
173 | if (!count) | |
174 | return; | |
175 | ||
176 | switch (((unsigned long)p) & 0x3) | |
177 | { | |
178 | case 0x00: /* Buffer 32-bit aligned */ | |
179 | while (count>=2) { | |
180 | ||
181 | count -= 2; | |
182 | l = cpu_to_le16(inw(port)) << 16; | |
183 | l |= cpu_to_le16(inw(port)); | |
184 | *(unsigned int *)p = l; | |
185 | p += 4; | |
186 | } | |
187 | if (count) { | |
188 | *(unsigned short *)p = cpu_to_le16(inw(port)); | |
189 | } | |
190 | break; | |
191 | ||
192 | case 0x02: /* Buffer 16-bit aligned */ | |
193 | *(unsigned short *)p = cpu_to_le16(inw(port)); | |
194 | p += 2; | |
195 | count--; | |
196 | while (count>=2) { | |
197 | ||
198 | count -= 2; | |
199 | l = cpu_to_le16(inw(port)) << 16; | |
200 | l |= cpu_to_le16(inw(port)); | |
201 | *(unsigned int *)p = l; | |
202 | p += 4; | |
203 | } | |
204 | if (count) { | |
205 | *(unsigned short *)p = cpu_to_le16(inw(port)); | |
206 | } | |
207 | break; | |
208 | ||
209 | case 0x01: /* Buffer 8-bit aligned */ | |
210 | case 0x03: | |
211 | /* I don't bother with 32bit transfers | |
212 | * in this case, 16bit will have to do -- DE */ | |
213 | --count; | |
214 | ||
215 | l = cpu_to_le16(inw(port)); | |
216 | *p = l >> 8; | |
217 | p++; | |
218 | while (count--) | |
219 | { | |
220 | l2 = cpu_to_le16(inw(port)); | |
221 | *(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8); | |
222 | p += 2; | |
223 | l = l2; | |
224 | } | |
225 | *p = l & 0xff; | |
226 | break; | |
227 | } | |
228 | } | |
229 | ||
230 | ||
231 | ||
232 | /* | |
233 | * Read COUNT 32-bit words from port PORT into memory starting at | |
234 | * SRC. Now works with any alignment in SRC. Performance is important, | |
235 | * but the interfaces seems to be slow: just using the inlined version | |
236 | * of the inl() breaks things. | |
237 | */ | |
238 | void insl (unsigned long port, void *dst, unsigned long count) | |
239 | { | |
240 | unsigned int l = 0, l2; | |
241 | unsigned char *p; | |
242 | ||
243 | p = (unsigned char *)dst; | |
244 | ||
245 | if (!count) | |
246 | return; | |
247 | ||
248 | switch (((unsigned long) dst) & 0x3) | |
249 | { | |
250 | case 0x00: /* Buffer 32-bit aligned */ | |
251 | while (count--) | |
252 | { | |
253 | *(unsigned int *)p = cpu_to_le32(inl(port)); | |
254 | p += 4; | |
255 | } | |
256 | break; | |
257 | ||
258 | case 0x02: /* Buffer 16-bit aligned */ | |
259 | --count; | |
260 | ||
261 | l = cpu_to_le32(inl(port)); | |
262 | *(unsigned short *)p = l >> 16; | |
263 | p += 2; | |
264 | ||
265 | while (count--) | |
266 | { | |
267 | l2 = cpu_to_le32(inl(port)); | |
268 | *(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16); | |
269 | p += 4; | |
270 | l = l2; | |
271 | } | |
272 | *(unsigned short *)p = l & 0xffff; | |
273 | break; | |
274 | case 0x01: /* Buffer 8-bit aligned */ | |
275 | --count; | |
276 | ||
277 | l = cpu_to_le32(inl(port)); | |
278 | *(unsigned char *)p = l >> 24; | |
279 | p++; | |
280 | *(unsigned short *)p = (l >> 8) & 0xffff; | |
281 | p += 2; | |
282 | while (count--) | |
283 | { | |
284 | l2 = cpu_to_le32(inl(port)); | |
285 | *(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8); | |
286 | p += 4; | |
287 | l = l2; | |
288 | } | |
289 | *p = l & 0xff; | |
290 | break; | |
291 | case 0x03: /* Buffer 8-bit aligned */ | |
292 | --count; | |
293 | ||
294 | l = cpu_to_le32(inl(port)); | |
295 | *p = l >> 24; | |
296 | p++; | |
297 | while (count--) | |
298 | { | |
299 | l2 = cpu_to_le32(inl(port)); | |
300 | *(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24; | |
301 | p += 4; | |
302 | l = l2; | |
303 | } | |
304 | *(unsigned short *)p = (l >> 8) & 0xffff; | |
305 | p += 2; | |
306 | *p = l & 0xff; | |
307 | break; | |
308 | } | |
309 | } | |
310 | ||
311 | ||
312 | /* | |
313 | * Like insb but in the opposite direction. | |
314 | * Don't worry as much about doing aligned memory transfers: | |
315 | * doing byte reads the "slow" way isn't nearly as slow as | |
316 | * doing byte writes the slow way (no r-m-w cycle). | |
317 | */ | |
318 | void outsb(unsigned long port, const void * src, unsigned long count) | |
319 | { | |
320 | const unsigned char *p; | |
321 | ||
322 | p = (const unsigned char *)src; | |
323 | while (count) { | |
324 | count--; | |
325 | outb(*p, port); | |
326 | p++; | |
327 | } | |
328 | } | |
329 | ||
330 | /* | |
331 | * Like insw but in the opposite direction. This is used by the IDE | |
332 | * driver to write disk sectors. Performance is important, but the | |
333 | * interfaces seems to be slow: just using the inlined version of the | |
334 | * outw() breaks things. | |
335 | */ | |
336 | void outsw (unsigned long port, const void *src, unsigned long count) | |
337 | { | |
338 | unsigned int l = 0, l2; | |
339 | const unsigned char *p; | |
340 | ||
341 | p = (const unsigned char *)src; | |
342 | ||
343 | if (!count) | |
344 | return; | |
345 | ||
346 | switch (((unsigned long)p) & 0x3) | |
347 | { | |
348 | case 0x00: /* Buffer 32-bit aligned */ | |
349 | while (count>=2) { | |
350 | count -= 2; | |
351 | l = *(unsigned int *)p; | |
352 | p += 4; | |
353 | outw(le16_to_cpu(l >> 16), port); | |
354 | outw(le16_to_cpu(l & 0xffff), port); | |
355 | } | |
356 | if (count) { | |
357 | outw(le16_to_cpu(*(unsigned short*)p), port); | |
358 | } | |
359 | break; | |
360 | ||
361 | case 0x02: /* Buffer 16-bit aligned */ | |
362 | ||
363 | outw(le16_to_cpu(*(unsigned short*)p), port); | |
364 | p += 2; | |
365 | count--; | |
366 | ||
367 | while (count>=2) { | |
368 | count -= 2; | |
369 | l = *(unsigned int *)p; | |
370 | p += 4; | |
371 | outw(le16_to_cpu(l >> 16), port); | |
372 | outw(le16_to_cpu(l & 0xffff), port); | |
373 | } | |
374 | if (count) { | |
375 | outw(le16_to_cpu(*(unsigned short *)p), port); | |
376 | } | |
377 | break; | |
378 | ||
379 | case 0x01: /* Buffer 8-bit aligned */ | |
380 | /* I don't bother with 32bit transfers | |
381 | * in this case, 16bit will have to do -- DE */ | |
382 | ||
383 | l = *p << 8; | |
384 | p++; | |
385 | count--; | |
386 | while (count) | |
387 | { | |
388 | count--; | |
389 | l2 = *(unsigned short *)p; | |
390 | p += 2; | |
391 | outw(le16_to_cpu(l | l2 >> 8), port); | |
392 | l = l2 << 8; | |
393 | } | |
394 | l2 = *(unsigned char *)p; | |
395 | outw (le16_to_cpu(l | l2>>8), port); | |
396 | break; | |
397 | ||
398 | } | |
399 | } | |
400 | ||
401 | ||
402 | /* | |
403 | * Like insl but in the opposite direction. This is used by the IDE | |
404 | * driver to write disk sectors. Works with any alignment in SRC. | |
405 | * Performance is important, but the interfaces seems to be slow: | |
406 | * just using the inlined version of the outl() breaks things. | |
407 | */ | |
408 | void outsl (unsigned long port, const void *src, unsigned long count) | |
409 | { | |
410 | unsigned int l = 0, l2; | |
411 | const unsigned char *p; | |
412 | ||
413 | p = (const unsigned char *)src; | |
414 | ||
415 | if (!count) | |
416 | return; | |
417 | ||
418 | switch (((unsigned long)p) & 0x3) | |
419 | { | |
420 | case 0x00: /* Buffer 32-bit aligned */ | |
421 | while (count--) | |
422 | { | |
423 | outl(le32_to_cpu(*(unsigned int *)p), port); | |
424 | p += 4; | |
425 | } | |
426 | break; | |
427 | ||
428 | case 0x02: /* Buffer 16-bit aligned */ | |
429 | --count; | |
430 | ||
431 | l = *(unsigned short *)p; | |
432 | p += 2; | |
433 | ||
434 | while (count--) | |
435 | { | |
436 | l2 = *(unsigned int *)p; | |
437 | p += 4; | |
438 | outl (le32_to_cpu(l << 16 | l2 >> 16), port); | |
439 | l = l2; | |
440 | } | |
441 | l2 = *(unsigned short *)p; | |
442 | outl (le32_to_cpu(l << 16 | l2), port); | |
443 | break; | |
444 | case 0x01: /* Buffer 8-bit aligned */ | |
445 | --count; | |
446 | ||
447 | l = *p << 24; | |
448 | p++; | |
449 | l |= *(unsigned short *)p << 8; | |
450 | p += 2; | |
451 | ||
452 | while (count--) | |
453 | { | |
454 | l2 = *(unsigned int *)p; | |
455 | p += 4; | |
456 | outl (le32_to_cpu(l | l2 >> 24), port); | |
457 | l = l2 << 8; | |
458 | } | |
459 | l2 = *p; | |
460 | outl (le32_to_cpu(l | l2), port); | |
461 | break; | |
462 | case 0x03: /* Buffer 8-bit aligned */ | |
463 | --count; | |
464 | ||
465 | l = *p << 24; | |
466 | p++; | |
467 | ||
468 | while (count--) | |
469 | { | |
470 | l2 = *(unsigned int *)p; | |
471 | p += 4; | |
472 | outl (le32_to_cpu(l | l2 >> 8), port); | |
473 | l = l2 << 24; | |
474 | } | |
475 | l2 = *(unsigned short *)p << 16; | |
476 | p += 2; | |
477 | l2 |= *p; | |
478 | outl (le32_to_cpu(l | l2), port); | |
479 | break; | |
480 | } | |
481 | } | |
482 | ||
483 | EXPORT_SYMBOL(insb); | |
484 | EXPORT_SYMBOL(insw); | |
485 | EXPORT_SYMBOL(insl); | |
486 | EXPORT_SYMBOL(outsb); | |
487 | EXPORT_SYMBOL(outsw); | |
488 | EXPORT_SYMBOL(outsl); |