]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | # Licensed to the Apache Software Foundation (ASF) under one |
2 | # or more contributor license agreements. See the NOTICE file | |
3 | # distributed with this work for additional information | |
4 | # regarding copyright ownership. The ASF licenses this file | |
5 | # to you under the Apache License, Version 2.0 (the | |
6 | # "License"); you may not use this file except in compliance | |
7 | # with the License. You may obtain a copy of the License at | |
8 | # | |
9 | # http://www.apache.org/licenses/LICENSE-2.0 | |
10 | # | |
11 | # Unless required by applicable law or agreed to in writing, software | |
12 | # distributed under the License is distributed on an "AS IS" BASIS, | |
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | # See the License for the specific language governing permissions and | |
15 | # limitations under the License. | |
16 | ||
17 | GO_BUILD=go build | |
18 | GO_GEN=go generate | |
19 | GO_TEST=go test | |
20 | GOPATH=$(realpath ../../../../../..) | |
21 | ||
22 | # this converts rotate instructions from "ro[lr] <reg>" -> "ro[lr] <reg>, 1" for yasm compatibility | |
23 | PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' | |
24 | ||
25 | C2GOASM=c2goasm -a -f | |
26 | CC=clang | |
27 | C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables \ | |
28 | -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib | |
29 | ASM_FLAGS_AVX2=-mavx2 -mfma -mllvm -force-vector-width=32 | |
30 | ASM_FLAGS_SSE4=-msse4 | |
31 | ||
32 | GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') | |
33 | ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') | |
34 | ||
35 | INTEL_SOURCES := \ | |
36 | float64_avx2_amd64.s float64_sse4_amd64.s \ | |
37 | int64_avx2_amd64.s int64_sse4_amd64.s \ | |
38 | uint64_avx2_amd64.s uint64_sse4_amd64.s | |
39 | ||
40 | .PHONEY: assembly | |
41 | ||
42 | assembly: $(INTEL_SOURCES) | |
43 | ||
44 | generate: ../bin/tmpl | |
45 | ../bin/tmpl -i -data=float64.tmpldata type.go.tmpl=float64.go type_amd64.go.tmpl=float64_amd64.go type_s390x.go.tmpl=float64_s390x.go type_noasm.go.tmpl=float64_noasm.go type_test.go.tmpl=float64_test.go | |
46 | ../bin/tmpl -i -data=float64.tmpldata -d arch=avx2 type_simd_amd64.go.tmpl=float64_avx2_amd64.go | |
47 | ../bin/tmpl -i -data=float64.tmpldata -d arch=sse4 type_simd_amd64.go.tmpl=float64_sse4_amd64.go | |
48 | ../bin/tmpl -i -data=int64.tmpldata type.go.tmpl=int64.go type_amd64.go.tmpl=int64_amd64.go type_s390x.go.tmpl=int64_s390x.go type_noasm.go.tmpl=int64_noasm.go type_test.go.tmpl=int64_test.go | |
49 | ../bin/tmpl -i -data=int64.tmpldata -d arch=avx2 type_simd_amd64.go.tmpl=int64_avx2_amd64.go | |
50 | ../bin/tmpl -i -data=int64.tmpldata -d arch=sse4 type_simd_amd64.go.tmpl=int64_sse4_amd64.go | |
51 | ../bin/tmpl -i -data=uint64.tmpldata type.go.tmpl=uint64.go type_amd64.go.tmpl=uint64_amd64.go type_s390x.go.tmpl=uint64_s390x.go type_noasm.go.tmpl=uint64_noasm.go type_test.go.tmpl=uint64_test.go | |
52 | ../bin/tmpl -i -data=uint64.tmpldata -d arch=avx2 type_simd_amd64.go.tmpl=uint64_avx2_amd64.go | |
53 | ../bin/tmpl -i -data=uint64.tmpldata -d arch=sse4 type_simd_amd64.go.tmpl=uint64_sse4_amd64.go | |
54 | ||
55 | _lib/float64_avx2.s: _lib/float64.c | |
56 | $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ | |
57 | ||
58 | _lib/float64_sse4.s: _lib/float64.c | |
59 | $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ | |
60 | ||
61 | float64_avx2_amd64.s: _lib/float64_avx2.s | |
62 | $(C2GOASM) -a -f $^ $@ | |
63 | ||
64 | float64_sse4_amd64.s: _lib/float64_sse4.s | |
65 | $(C2GOASM) -a -f $^ $@ | |
66 | ||
67 | _lib/int64_avx2.s: _lib/int64.c | |
68 | $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ | |
69 | ||
70 | _lib/int64_sse4.s: _lib/int64.c | |
71 | $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ | |
72 | ||
73 | int64_avx2_amd64.s: _lib/int64_avx2.s | |
74 | $(C2GOASM) -a -f $^ $@ | |
75 | ||
76 | int64_sse4_amd64.s: _lib/int64_sse4.s | |
77 | $(C2GOASM) -a -f $^ $@ | |
78 | ||
79 | _lib/uint64_avx2.s: _lib/uint64.c | |
80 | $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ | |
81 | ||
82 | _lib/uint64_sse4.s: _lib/uint64.c | |
83 | $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ | |
84 | ||
85 | uint64_avx2_amd64.s: _lib/uint64_avx2.s | |
86 | $(C2GOASM) -a -f $^ $@ | |
87 | ||
88 | uint64_sse4_amd64.s: _lib/uint64_sse4.s | |
89 | $(C2GOASM) -a -f $^ $@ | |
90 |