]>
Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ |
8d379dad | 2 | |
8d379dad JB |
3 | #include <linux/linkage.h> |
4 | #include <asm/dwarf2.h> | |
5 | ||
1da177e4 LT |
6 | /* |
7 | * ISO C memset - set a memory block to a byte value. | |
8 | * | |
9 | * rdi destination | |
10 | * rsi value (char) | |
11 | * rdx count (bytes) | |
12 | * | |
13 | * rax original destination | |
14 | */ | |
7269e881 JB |
15 | .section .altinstr_replacement, "ax", @progbits |
16 | .Lmemset_c: | |
8d379dad JB |
17 | movq %rdi,%r9 |
18 | movl %edx,%r8d | |
19 | andl $7,%r8d | |
20 | movl %edx,%ecx | |
21 | shrl $3,%ecx | |
22 | /* expand byte value */ | |
23 | movzbl %sil,%esi | |
24 | movabs $0x0101010101010101,%rax | |
25 | mulq %rsi /* with rax, clobbers rdx */ | |
26 | rep stosq | |
27 | movl %r8d,%ecx | |
28 | rep stosb | |
29 | movq %r9,%rax | |
30 | ret | |
7269e881 JB |
31 | .Lmemset_e: |
32 | .previous | |
8d379dad JB |
33 | |
34 | ENTRY(memset) | |
35 | ENTRY(__memset) | |
36 | CFI_STARTPROC | |
7bcd3f34 AK |
37 | movq %rdi,%r10 |
38 | movq %rdx,%r11 | |
39 | ||
40 | /* expand byte value */ | |
41 | movzbl %sil,%ecx | |
42 | movabs $0x0101010101010101,%rax | |
43 | mul %rcx /* with rax, clobbers rdx */ | |
44 | ||
45 | /* align dst */ | |
46 | movl %edi,%r9d | |
47 | andl $7,%r9d | |
48 | jnz .Lbad_alignment | |
8d379dad | 49 | CFI_REMEMBER_STATE |
7bcd3f34 AK |
50 | .Lafter_bad_alignment: |
51 | ||
52 | movl %r11d,%ecx | |
53 | shrl $6,%ecx | |
54 | jz .Lhandle_tail | |
55 | ||
56 | .p2align 4 | |
57 | .Lloop_64: | |
58 | decl %ecx | |
59 | movq %rax,(%rdi) | |
60 | movq %rax,8(%rdi) | |
61 | movq %rax,16(%rdi) | |
62 | movq %rax,24(%rdi) | |
63 | movq %rax,32(%rdi) | |
64 | movq %rax,40(%rdi) | |
65 | movq %rax,48(%rdi) | |
66 | movq %rax,56(%rdi) | |
67 | leaq 64(%rdi),%rdi | |
68 | jnz .Lloop_64 | |
69 | ||
70 | /* Handle tail in loops. The loops should be faster than hard | |
71 | to predict jump tables. */ | |
72 | .p2align 4 | |
73 | .Lhandle_tail: | |
74 | movl %r11d,%ecx | |
75 | andl $63&(~7),%ecx | |
76 | jz .Lhandle_7 | |
77 | shrl $3,%ecx | |
78 | .p2align 4 | |
79 | .Lloop_8: | |
80 | decl %ecx | |
81 | movq %rax,(%rdi) | |
82 | leaq 8(%rdi),%rdi | |
83 | jnz .Lloop_8 | |
84 | ||
85 | .Lhandle_7: | |
86 | movl %r11d,%ecx | |
87 | andl $7,%ecx | |
88 | jz .Lende | |
89 | .p2align 4 | |
90 | .Lloop_1: | |
91 | decl %ecx | |
92 | movb %al,(%rdi) | |
93 | leaq 1(%rdi),%rdi | |
94 | jnz .Lloop_1 | |
95 | ||
96 | .Lende: | |
97 | movq %r10,%rax | |
98 | ret | |
99 | ||
8d379dad | 100 | CFI_RESTORE_STATE |
7bcd3f34 AK |
101 | .Lbad_alignment: |
102 | cmpq $7,%r11 | |
103 | jbe .Lhandle_7 | |
104 | movq %rax,(%rdi) /* unaligned store */ | |
105 | movq $8,%r8 | |
106 | subq %r9,%r8 | |
107 | addq %r8,%rdi | |
108 | subq %r8,%r11 | |
109 | jmp .Lafter_bad_alignment | |
8d379dad JB |
110 | .Lfinal: |
111 | CFI_ENDPROC | |
112 | ENDPROC(memset) | |
113 | ENDPROC(__memset) | |
7bcd3f34 AK |
114 | |
115 | /* Some CPUs run faster using the string instructions. | |
116 | It is also a lot simpler. Use this when possible */ | |
117 | ||
118 | #include <asm/cpufeature.h> | |
119 | ||
120 | .section .altinstructions,"a" | |
121 | .align 8 | |
8d379dad | 122 | .quad memset |
7269e881 | 123 | .quad .Lmemset_c |
83a7a2ad | 124 | .word X86_FEATURE_REP_GOOD |
8d379dad | 125 | .byte .Lfinal - memset |
7269e881 | 126 | .byte .Lmemset_e - .Lmemset_c |
7bcd3f34 | 127 | .previous |