]> git.proxmox.com Git - mirror_qemu.git/blob - target/hexagon/imported/mpy.idef
Merge remote-tracking branch 'remotes/quic/tags/pull-hex-20210629' into staging
[mirror_qemu.git] / target / hexagon / imported / mpy.idef
1 /*
2 * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /*
19 * Multiply Instructions
20 */
21
22
23 #define STD_SP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\
24 Q6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(1,RsV),fGETHALF(1,RtV))));})\
25 Q6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(1,RtV)))));})\
26 Q6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(1,RsV),fGETHALF(0,RtV))));})\
27 Q6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(0,RtV)))));})\
28 Q6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(0,RsV),fGETHALF(1,RtV))));})\
29 Q6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(1,RtV)))));})\
30 Q6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(0,RsV),fGETHALF(0,RtV))));})\
31 Q6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(0,RtV)))));})
32
33 /*****************************************************/
34 /* multiply 16x16->32 signed instructions */
35 /*****************************************************/
36 STD_SP_MODES(mpy_acc, "Rx32+=mpy", ,RxV,RxV+ ,fMPY16SS, ,fPASS,fPASS)
37 STD_SP_MODES(mpy_nac, "Rx32-=mpy", ,RxV,RxV- ,fMPY16SS, ,fPASS,fPASS)
38 STD_SP_MODES(mpy_acc_sat,"Rx32+=mpy", ,RxV,RxV+ ,fMPY16SS,":sat" ,fSAT, fPASS)
39 STD_SP_MODES(mpy_nac_sat,"Rx32-=mpy", ,RxV,RxV- ,fMPY16SS,":sat" ,fSAT, fPASS)
40 STD_SP_MODES(mpy, "Rd32=mpy", ,RdV, ,fMPY16SS, ,fPASS,fPASS)
41 STD_SP_MODES(mpy_sat, "Rd32=mpy", ,RdV, ,fMPY16SS,":sat" ,fSAT, fPASS)
42 STD_SP_MODES(mpy_rnd, "Rd32=mpy", ,RdV, ,fMPY16SS,":rnd" ,fPASS,fROUND)
43 STD_SP_MODES(mpy_sat_rnd,"Rd32=mpy", ,RdV, ,fMPY16SS,":rnd:sat",fSAT, fROUND)
44 STD_SP_MODES(mpyd_acc, "Rxx32+=mpy",,RxxV,RxxV+ ,fMPY16SS, ,fPASS,fPASS)
45 STD_SP_MODES(mpyd_nac, "Rxx32-=mpy",,RxxV,RxxV- ,fMPY16SS, ,fPASS,fPASS)
46 STD_SP_MODES(mpyd, "Rdd32=mpy", ,RddV, ,fMPY16SS, ,fPASS,fPASS)
47 STD_SP_MODES(mpyd_rnd, "Rdd32=mpy", ,RddV, ,fMPY16SS,":rnd" ,fPASS,fROUND)
48
49
50 /*****************************************************/
51 /* multiply 16x16->32 unsigned instructions */
52 /*****************************************************/
53 #define STD_USP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\
54 Q6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(1,RsV),fGETUHALF(1,RtV))));})\
55 Q6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(1,RtV)))));})\
56 Q6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(1,RsV),fGETUHALF(0,RtV))));})\
57 Q6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(0,RtV)))));})\
58 Q6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(0,RsV),fGETUHALF(1,RtV))));})\
59 Q6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(1,RtV)))));})\
60 Q6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(0,RsV),fGETUHALF(0,RtV))));})\
61 Q6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(0,RtV)))));})
62
63 STD_USP_MODES(mpyu_acc, "Rx32+=mpyu", ,RxV,RxV+ ,fMPY16UU, ,fPASS,fPASS)
64 STD_USP_MODES(mpyu_nac, "Rx32-=mpyu", ,RxV,RxV- ,fMPY16UU, ,fPASS,fPASS)
65 STD_USP_MODES(mpyu, "Rd32=mpyu", ATTRIBS() ,RdV, ,fMPY16UU, ,fPASS,fPASS)
66 STD_USP_MODES(mpyud_acc, "Rxx32+=mpyu",,RxxV,RxxV+,fMPY16UU, ,fPASS,fPASS)
67 STD_USP_MODES(mpyud_nac, "Rxx32-=mpyu",,RxxV,RxxV-,fMPY16UU, ,fPASS,fPASS)
68 STD_USP_MODES(mpyud, "Rdd32=mpyu", ATTRIBS() ,RddV, ,fMPY16UU, ,fPASS,fPASS)
69
70 /**********************************************/
71 /* mpy 16x#s8->32 */
72 /**********************************************/
73
74 Q6INSN(M2_mpysip,"Rd32=+mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
75 "32-bit Multiply by unsigned immediate",
76 { fIMMEXT(uiV); RdV=RsV*uiV; })
77
78 Q6INSN(M2_mpysin,"Rd32=-mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
79 "32-bit Multiply by unsigned immediate, negate result",
80 { RdV=RsV*-uiV; })
81
82 Q6INSN(M2_macsip,"Rx32+=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
83 "32-bit Multiply-Add by unsigned immediate",
84 { fIMMEXT(uiV); RxV=RxV + (RsV*uiV);})
85
86 Q6INSN(M2_macsin,"Rx32-=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
87 "32-bit Multiply-Subtract by unsigned immediate",
88 { fIMMEXT(uiV); RxV=RxV - (RsV*uiV);})
89
90
91 /**********************************************/
92 /* multiply/mac 32x32->64 instructions */
93 /**********************************************/
94 Q6INSN(M2_dpmpyss_s0, "Rdd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32SS(RsV,RtV);})
95 Q6INSN(M2_dpmpyss_acc_s0,"Rxx32+=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32SS(RsV,RtV);})
96 Q6INSN(M2_dpmpyss_nac_s0,"Rxx32-=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32SS(RsV,RtV);})
97
98 Q6INSN(M2_dpmpyuu_s0, "Rdd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
99 Q6INSN(M2_dpmpyuu_acc_s0,"Rxx32+=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
100 Q6INSN(M2_dpmpyuu_nac_s0,"Rxx32-=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
101
102
103 /******************************************************/
104 /* multiply/mac 32x32->32 (upper) instructions */
105 /******************************************************/
106 Q6INSN(M2_mpy_up, "Rd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>32;})
107 Q6INSN(M2_mpy_up_s1, "Rd32=mpy(Rs32,Rt32):<<1", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>31;})
108 Q6INSN(M2_mpy_up_s1_sat, "Rd32=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RdV=fSAT(fMPY32SS(RsV,RtV)>>31);})
109 Q6INSN(M2_mpyu_up, "Rd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV))>>32;})
110 Q6INSN(M2_mpysu_up, "Rd32=mpysu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SU(RsV,fCAST4u(RtV))>>32;})
111 Q6INSN(M2_dpmpyss_rnd_s0,"Rd32=mpy(Rs32,Rt32):rnd", ATTRIBS(),"Multiply 32x32",{RdV=(fMPY32SS(RsV,RtV)+fCONSTLL(0x80000000))>>32;})
112
113 Q6INSN(M4_mac_up_s1_sat, "Rx32+=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT( (fSE32_64(RxV)) + (fMPY32SS(RsV,RtV)>>31));})
114 Q6INSN(M4_nac_up_s1_sat, "Rx32-=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT( (fSE32_64(RxV)) - (fMPY32SS(RsV,RtV)>>31));})
115
116
117 /**********************************************/
118 /* 32x32->32 multiply (lower) */
119 /**********************************************/
120
121 Q6INSN(M2_mpyi,"Rd32=mpyi(Rs32,Rt32)",ATTRIBS(),
122 "Multiply Integer",
123 { RdV=RsV*RtV;})
124
125 Q6INSN(M2_maci,"Rx32+=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
126 "Multiply-Accumulate Integer",
127 { RxV=RxV + RsV*RtV;})
128
129 Q6INSN(M2_mnaci,"Rx32-=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
130 "Multiply-Neg-Accumulate Integer",
131 { RxV=RxV - RsV*RtV;})
132
133 /****** WHY ARE THESE IN MPY.IDEF? **********/
134
135 Q6INSN(M2_acci,"Rx32+=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
136 "Add with accumulate",
137 { RxV=RxV + RsV + RtV;})
138
139 Q6INSN(M2_accii,"Rx32+=add(Rs32,#s8)",ATTRIBS(A_ARCHV2),
140 "Add with accumulate",
141 { fIMMEXT(siV); RxV=RxV + RsV + siV;})
142
143 Q6INSN(M2_nacci,"Rx32-=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
144 "Add with neg accumulate",
145 { RxV=RxV - (RsV + RtV);})
146
147 Q6INSN(M2_naccii,"Rx32-=add(Rs32,#s8)",ATTRIBS(A_ARCHV2),
148 "Add with neg accumulate",
149 { fIMMEXT(siV); RxV=RxV - (RsV + siV);})
150
151 Q6INSN(M2_subacc,"Rx32+=sub(Rt32,Rs32)",ATTRIBS(A_ARCHV2),
152 "Sub with accumulate",
153 { RxV=RxV + RtV - RsV;})
154
155
156
157
158 Q6INSN(M4_mpyrr_addr,"Ry32=add(Ru32,mpyi(Ry32,Rs32))",ATTRIBS(),
159 "Mpy by immed and add immed",
160 { RyV = RuV + RsV*RyV;})
161
162 Q6INSN(M4_mpyri_addr_u2,"Rd32=add(Ru32,mpyi(#u6:2,Rs32))",ATTRIBS(),
163 "Mpy by immed and add immed",
164 { RdV = RuV + RsV*uiV;})
165
166 Q6INSN(M4_mpyri_addr,"Rd32=add(Ru32,mpyi(Rs32,#u6))",ATTRIBS(),
167 "Mpy by immed and add immed",
168 { fIMMEXT(uiV); RdV = RuV + RsV*uiV;})
169
170
171
172 Q6INSN(M4_mpyri_addi,"Rd32=add(#u6,mpyi(Rs32,#U6))",ATTRIBS(),
173 "Mpy by immed and add immed",
174 { fIMMEXT(uiV); RdV = uiV + RsV*UiV;})
175
176
177
178 Q6INSN(M4_mpyrr_addi,"Rd32=add(#u6,mpyi(Rs32,Rt32))",ATTRIBS(),
179 "Mpy by immed and add immed",
180 { fIMMEXT(uiV); RdV = uiV + RsV*RtV;})
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198 /**********************************************/
199 /* vector mac 2x[16x16 -> 32] */
200 /**********************************************/
201
202 #undef vmac_sema
203 #define vmac_sema(N)\
204 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\
205 fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
206 }
207 Q6INSN(M2_vmpy2s_s0,"Rdd32=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
208 Q6INSN(M2_vmpy2s_s1,"Rdd32=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
209
210
211 #undef vmac_sema
212 #define vmac_sema(N)\
213 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\
214 fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
215 }
216 Q6INSN(M2_vmac2s_s0,"Rxx32+=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
217 Q6INSN(M2_vmac2s_s1,"Rxx32+=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
218
219 #undef vmac_sema
220 #define vmac_sema(N)\
221 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\
222 fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\
223 }
224 Q6INSN(M2_vmpy2su_s0,"Rdd32=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
225 Q6INSN(M2_vmpy2su_s1,"Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
226
227
228 #undef vmac_sema
229 #define vmac_sema(N)\
230 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\
231 fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\
232 }
233 Q6INSN(M2_vmac2su_s0,"Rxx32+=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
234 Q6INSN(M2_vmac2su_s1,"Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
235
236
237
238 #undef vmac_sema
239 #define vmac_sema(N)\
240 { fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\
241 fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) + 0x8000))));\
242 }
243 Q6INSN(M2_vmpy2s_s0pack,"Rd32=vmpyh(Rs32,Rt32):rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
244 Q6INSN(M2_vmpy2s_s1pack,"Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(1))
245
246
247 #undef vmac_sema
248 #define vmac_sema(N)\
249 { fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)));\
250 fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)));\
251 }
252 Q6INSN(M2_vmac2,"Rxx32+=vmpyh(Rs32,Rt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
253
254 #undef vmac_sema
255 #define vmac_sema(N)\
256 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\
257 fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\
258 }
259 Q6INSN(M2_vmpy2es_s0,"Rdd32=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
260 Q6INSN(M2_vmpy2es_s1,"Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
261
262 #undef vmac_sema
263 #define vmac_sema(N)\
264 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\
265 fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\
266 }
267 Q6INSN(M2_vmac2es_s0,"Rxx32+=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
268 Q6INSN(M2_vmac2es_s1,"Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
269
270 #undef vmac_sema
271 #define vmac_sema(N)\
272 { fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)));\
273 fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)));\
274 }
275 Q6INSN(M2_vmac2es,"Rxx32+=vmpyeh(Rss32,Rtt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
276
277
278
279
280 /********************************************************/
281 /* vrmpyh, aka Big Mac, aka Mac Daddy, aka Mac-ac-ac-ac */
282 /* vector mac 4x[16x16] + 64 ->64 */
283 /********************************************************/
284
285
286 #undef vmac_sema
287 #define vmac_sema(N)\
288 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\
289 + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\
290 + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\
291 + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
292 }
293 Q6INSN(M2_vrmac_s0,"Rxx32+=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0))
294
295 #undef vmac_sema
296 #define vmac_sema(N)\
297 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\
298 + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\
299 + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\
300 + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
301 }
302 Q6INSN(M2_vrmpy_s0,"Rdd32=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0))
303
304
305
306 /******************************************************/
307 /* vector dual macs. just like complex */
308 /******************************************************/
309
310
311 /* With round&pack */
312 #undef dmpy_sema
313 #define dmpy_sema(N)\
314 { fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
315 fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))) + 0x8000))));\
316 fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
317 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))) + 0x8000))));\
318 }
319 Q6INSN(M2_vdmpyrs_s0,"Rd32=vdmpy(Rss32,Rtt32):rnd:sat",ATTRIBS(), "vector dual mac w/ round&pack",dmpy_sema(0))
320 Q6INSN(M2_vdmpyrs_s1,"Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"vector dual mac w/ round&pack",dmpy_sema(1))
321
322
323
324
325
326 /******************************************************/
327 /* vector byte multiplies */
328 /******************************************************/
329
330
331 Q6INSN(M5_vrmpybuu,"Rdd32=vrmpybu(Rss32,Rtt32)",ATTRIBS(),
332 "vector dual mpy bytes",
333 {
334 fSETWORD(0,RddV,(fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) +
335 fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) +
336 fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) +
337 fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV))));
338 fSETWORD(1,RddV,(fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) +
339 fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) +
340 fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) +
341 fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV))));
342 })
343
344 Q6INSN(M5_vrmacbuu,"Rxx32+=vrmpybu(Rss32,Rtt32)",ATTRIBS(),
345 "vector dual mac bytes",
346 {
347 fSETWORD(0,RxxV,(fGETWORD(0,RxxV) +
348 fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) +
349 fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) +
350 fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) +
351 fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV))));
352 fSETWORD(1,RxxV,(fGETWORD(1,RxxV) +
353 fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) +
354 fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) +
355 fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) +
356 fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV))));
357 })
358
359
360 Q6INSN(M5_vrmpybsu,"Rdd32=vrmpybsu(Rss32,Rtt32)",ATTRIBS(),
361 "vector dual mpy bytes",
362 {
363 fSETWORD(0,RddV,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
364 fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) +
365 fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
366 fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))));
367 fSETWORD(1,RddV,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
368 fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) +
369 fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
370 fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))));
371 })
372
373 Q6INSN(M5_vrmacbsu,"Rxx32+=vrmpybsu(Rss32,Rtt32)",ATTRIBS(),
374 "vector dual mac bytes",
375 {
376 fSETWORD(0,RxxV,(fGETWORD(0,RxxV) +
377 fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
378 fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) +
379 fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
380 fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))));
381 fSETWORD(1,RxxV,(fGETWORD(1,RxxV) +
382 fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
383 fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) +
384 fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
385 fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))));
386 })
387
388
389 Q6INSN(M5_vmpybuu,"Rdd32=vmpybu(Rs32,Rt32)",ATTRIBS(),
390 "vector mpy bytes",
391 {
392 fSETHALF(0,RddV,(fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV))));
393 fSETHALF(1,RddV,(fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV))));
394 fSETHALF(2,RddV,(fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV))));
395 fSETHALF(3,RddV,(fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV))));
396 })
397
398 Q6INSN(M5_vmpybsu,"Rdd32=vmpybsu(Rs32,Rt32)",ATTRIBS(),
399 "vector mpy bytes",
400 {
401 fSETHALF(0,RddV,(fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV))));
402 fSETHALF(1,RddV,(fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV))));
403 fSETHALF(2,RddV,(fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV))));
404 fSETHALF(3,RddV,(fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV))));
405 })
406
407
408 Q6INSN(M5_vmacbuu,"Rxx32+=vmpybu(Rs32,Rt32)",ATTRIBS(),
409 "vector mac bytes",
410 {
411 fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV))));
412 fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV))));
413 fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV))));
414 fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV))));
415 })
416
417 Q6INSN(M5_vmacbsu,"Rxx32+=vmpybsu(Rs32,Rt32)",ATTRIBS(),
418 "vector mac bytes",
419 {
420 fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV))));
421 fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV))));
422 fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV))));
423 fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV))));
424 })
425
426
427
428 Q6INSN(M5_vdmpybsu,"Rdd32=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(),
429 "vector quad mpy bytes",
430 {
431 fSETHALF(0,RddV,fSATN(16,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
432 fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)))));
433 fSETHALF(1,RddV,fSATN(16,(fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
434 fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))));
435 fSETHALF(2,RddV,fSATN(16,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
436 fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)))));
437 fSETHALF(3,RddV,fSATN(16,(fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
438 fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))));
439 })
440
441
442 Q6INSN(M5_vdmacbsu,"Rxx32+=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(),
443 "vector quad mac bytes",
444 {
445 fSETHALF(0,RxxV,fSATN(16,(fGETHALF(0,RxxV) +
446 fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
447 fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)))));
448 fSETHALF(1,RxxV,fSATN(16,(fGETHALF(1,RxxV) +
449 fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
450 fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))));
451 fSETHALF(2,RxxV,fSATN(16,(fGETHALF(2,RxxV) +
452 fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
453 fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)))));
454 fSETHALF(3,RxxV,fSATN(16,(fGETHALF(3,RxxV) +
455 fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
456 fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))));
457 })
458
459
460
461 /* Full version */
462 #undef dmpy_sema
463 #define dmpy_sema(N)\
464 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
465 fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\
466 fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
467 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\
468 }
469 Q6INSN(M2_vdmacs_s0,"Rxx32+=vdmpy(Rss32,Rtt32):sat",ATTRIBS(), "",dmpy_sema(0))
470 Q6INSN(M2_vdmacs_s1,"Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1))
471
472 #undef dmpy_sema
473 #define dmpy_sema(N)\
474 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
475 fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\
476 fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
477 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\
478 }
479
480 Q6INSN(M2_vdmpys_s0,"Rdd32=vdmpy(Rss32,Rtt32):sat",ATTRIBS(), "",dmpy_sema(0))
481 Q6INSN(M2_vdmpys_s1,"Rdd32=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1))
482
483
484
485 /******************************************************/
486 /* complex multiply/mac with */
487 /* real&imag are packed together and always saturated */
488 /* to protect against overflow. */
489 /******************************************************/
490
491 #undef cmpy_sema
492 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\
493 { fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
494 fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))) + 0x8000))));\
495 fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
496 fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\
497 }
498 Q6INSN(M2_cmpyrs_s0,"Rd32=cmpy(Rs32,Rt32):rnd:sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-))
499 Q6INSN(M2_cmpyrs_s1,"Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
500
501
502 Q6INSN(M2_cmpyrsc_s0,"Rd32=cmpy(Rs32,Rt32*):rnd:sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+))
503 Q6INSN(M2_cmpyrsc_s1,"Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
504
505
506 #undef cmpy_sema
507 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\
508 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
509 fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\
510 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
511 fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
512 }
513 Q6INSN(M2_cmacs_s0,"Rxx32+=cmpy(Rs32,Rt32):sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-))
514 Q6INSN(M2_cmacs_s1,"Rxx32+=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
515
516 /* EJP: Need mac versions w/ CONJ T? */
517 Q6INSN(M2_cmacsc_s0,"Rxx32+=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+))
518 Q6INSN(M2_cmacsc_s1,"Rxx32+=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
519
520
521 #undef cmpy_sema
522 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\
523 { fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
524 fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\
525 fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
526 fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
527 }
528
529 Q6INSN(M2_cmpys_s0,"Rdd32=cmpy(Rs32,Rt32):sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-))
530 Q6INSN(M2_cmpys_s1,"Rdd32=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
531
532 Q6INSN(M2_cmpysc_s0,"Rdd32=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+))
533 Q6INSN(M2_cmpysc_s1,"Rdd32=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
534
535
536
537 #undef cmpy_sema
538 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\
539 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
540 fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))))));\
541 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
542 fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))))));\
543 }
544 Q6INSN(M2_cnacs_s0,"Rxx32-=cmpy(Rs32,Rt32):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,+,-))
545 Q6INSN(M2_cnacs_s1,"Rxx32-=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,+,-))
546
547 /* EJP: need CONJ versions? */
548 Q6INSN(M2_cnacsc_s0,"Rxx32-=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+))
549 Q6INSN(M2_cnacsc_s1,"Rxx32-=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
550
551
552 /******************************************************/
553 /* complex interpolation */
554 /* Given a pair of complex values, scale by a,b, sum */
555 /* Saturate/shift1 and round/pack */
556 /******************************************************/
557
558 #undef vrcmpys_sema
559 #define vrcmpys_sema(N,INWORD) \
560 { fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
561 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\
562 fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
563 fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\
564 }
565
566
567
568 Q6INSN(M2_vrcmpys_s1_h,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
569 Q6INSN(M2_vrcmpys_s1_l,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
570
571 #undef vrcmpys_sema
572 #define vrcmpys_sema(N,INWORD) \
573 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
574 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\
575 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
576 fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\
577 }
578
579
580
581 Q6INSN(M2_vrcmpys_acc_s1_h,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
582 Q6INSN(M2_vrcmpys_acc_s1_l,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
583
584 #undef vrcmpys_sema
585 #define vrcmpys_sema(N,INWORD) \
586 { fSETHALF(1,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
587 fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD))) + 0x8000)));\
588 fSETHALF(0,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
589 fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD))) + 0x8000)));\
590 }
591
592 Q6INSN(M2_vrcmpys_s1rp_h,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
593 Q6INSN(M2_vrcmpys_s1rp_l,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
594
595 /**************************************************************/
596 /* mixed mode 32x16 vector dual multiplies */
597 /* */
598 /**************************************************************/
599
600 /* SIGNED 32 x SIGNED 16 */
601
602
603 #undef mixmpy_sema
604 #define mixmpy_sema(N)\
605 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)) ); \
606 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)) ); \
607 }
608 Q6INSN(M2_mmacls_s0,"Rxx32+=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
609 Q6INSN(M2_mmacls_s1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
610
611 #undef mixmpy_sema
612 #define mixmpy_sema(N)\
613 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16) )); \
614 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16 ))); \
615 }
616 Q6INSN(M2_mmachs_s0,"Rxx32+=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
617 Q6INSN(M2_mmachs_s1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
618
619 #undef mixmpy_sema
620 #define mixmpy_sema(N)\
621 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)); \
622 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)); \
623 }
624 Q6INSN(M2_mmpyl_s0,"Rdd32=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
625 Q6INSN(M2_mmpyl_s1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
626
627 #undef mixmpy_sema
628 #define mixmpy_sema(N)\
629 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16)); \
630 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16)); \
631 }
632 Q6INSN(M2_mmpyh_s0,"Rdd32=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
633 Q6INSN(M2_mmpyh_s1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
634
635
636 /* With rounding */
637
638 #undef mixmpy_sema
639 #define mixmpy_sema(N)\
640 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)) ); \
641 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)) ); \
642 }
643 Q6INSN(M2_mmacls_rs0,"Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
644 Q6INSN(M2_mmacls_rs1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
645
646 #undef mixmpy_sema
647 #define mixmpy_sema(N)\
648 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16) )); \
649 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16 ))); \
650 }
651 Q6INSN(M2_mmachs_rs0,"Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
652 Q6INSN(M2_mmachs_rs1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
653
654 #undef mixmpy_sema
655 #define mixmpy_sema(N)\
656 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)); \
657 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)); \
658 }
659 Q6INSN(M2_mmpyl_rs0,"Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
660 Q6INSN(M2_mmpyl_rs1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
661
662 #undef mixmpy_sema
663 #define mixmpy_sema(N)\
664 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16)); \
665 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16)); \
666 }
667 Q6INSN(M2_mmpyh_rs0,"Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
668 Q6INSN(M2_mmpyh_rs1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
669
670
671 #undef mixmpy_sema
672 #define mixmpy_sema(DEST,EQUALS,N)\
673 { DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)));}
674
675 Q6INSN(M4_vrmpyeh_s0,"Rdd32=vrmpyweh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RddV,=,0))
676 Q6INSN(M4_vrmpyeh_s1,"Rdd32=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1))
677 Q6INSN(M4_vrmpyeh_acc_s0,"Rxx32+=vrmpyweh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0))
678 Q6INSN(M4_vrmpyeh_acc_s1,"Rxx32+=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1))
679
680 #undef mixmpy_sema
681 #define mixmpy_sema(DEST,EQUALS,N)\
682 { DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)));}
683
684 Q6INSN(M4_vrmpyoh_s0,"Rdd32=vrmpywoh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RddV,=,0))
685 Q6INSN(M4_vrmpyoh_s1,"Rdd32=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1))
686 Q6INSN(M4_vrmpyoh_acc_s0,"Rxx32+=vrmpywoh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0))
687 Q6INSN(M4_vrmpyoh_acc_s1,"Rxx32+=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1))
688
689
690
691
692
693
694 #undef mixmpy_sema
695 #define mixmpy_sema(N,H,RND)\
696 { RdV = fSAT((fSCALE(N,fMPY3216SS(RsV,fGETHALF(H,RtV)))RND)>>16); \
697 }
698 Q6INSN(M2_hmmpyl_rs1,"Rd32=mpy(Rs32,Rt.L32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,+0x8000))
699 Q6INSN(M2_hmmpyh_rs1,"Rd32=mpy(Rs32,Rt.H32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,+0x8000))
700 Q6INSN(M2_hmmpyl_s1,"Rd32=mpy(Rs32,Rt.L32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,))
701 Q6INSN(M2_hmmpyh_s1,"Rd32=mpy(Rs32,Rt.H32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,))
702
703
704
705
706
707
708
709
710
711 /* SIGNED 32 x UNSIGNED 16 */
712
713 #undef mixmpy_sema
714 #define mixmpy_sema(N)\
715 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)) ); \
716 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)) ); \
717 }
718 Q6INSN(M2_mmaculs_s0,"Rxx32+=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
719 Q6INSN(M2_mmaculs_s1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
720
721 #undef mixmpy_sema
722 #define mixmpy_sema(N)\
723 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16) )); \
724 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16 ))); \
725 }
726 Q6INSN(M2_mmacuhs_s0,"Rxx32+=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
727 Q6INSN(M2_mmacuhs_s1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
728
729 #undef mixmpy_sema
730 #define mixmpy_sema(N)\
731 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)); \
732 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)); \
733 }
734 Q6INSN(M2_mmpyul_s0,"Rdd32=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
735 Q6INSN(M2_mmpyul_s1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
736
737 #undef mixmpy_sema
738 #define mixmpy_sema(N)\
739 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16)); \
740 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16)); \
741 }
742 Q6INSN(M2_mmpyuh_s0,"Rdd32=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
743 Q6INSN(M2_mmpyuh_s1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
744
745
746 /* With rounding */
747
748 #undef mixmpy_sema
749 #define mixmpy_sema(N)\
750 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)) ); \
751 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)) ); \
752 }
753 Q6INSN(M2_mmaculs_rs0,"Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
754 Q6INSN(M2_mmaculs_rs1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
755
756 #undef mixmpy_sema
757 #define mixmpy_sema(N)\
758 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16) )); \
759 fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16 ))); \
760 }
761 Q6INSN(M2_mmacuhs_rs0,"Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
762 Q6INSN(M2_mmacuhs_rs1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
763
764 #undef mixmpy_sema
765 #define mixmpy_sema(N)\
766 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)); \
767 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)); \
768 }
769 Q6INSN(M2_mmpyul_rs0,"Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
770 Q6INSN(M2_mmpyul_rs1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
771
772 #undef mixmpy_sema
773 #define mixmpy_sema(N)\
774 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16)); \
775 fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16)); \
776 }
777 Q6INSN(M2_mmpyuh_rs0,"Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0))
778 Q6INSN(M2_mmpyuh_rs1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
779
780
781 /**************************************************************/
782 /* complex mac with full 64-bit accum - no sat, no shift */
783 /* either do real or accum, never both */
784 /**************************************************************/
785
786 Q6INSN(M2_vrcmaci_s0,"Rxx32+=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Imaginary",
787 {
788 RxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
789 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
790 fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
791 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
792 })
793
794 Q6INSN(M2_vrcmacr_s0,"Rxx32+=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Real",
795 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
796 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
797 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
798 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
799 })
800
801 Q6INSN(M2_vrcmaci_s0c,"Rxx32+=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Imaginary",
802 {
803 RxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \
804 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
805 fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \
806 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
807 })
808
809 Q6INSN(M2_vrcmacr_s0c,"Rxx32+=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Real",
810 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \
811 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
812 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \
813 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
814 })
815
816 Q6INSN(M2_cmaci_s0,"Rxx32+=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Imaginary",
817 {
818 RxxV = RxxV + fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \
819 fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV));
820 })
821
822 Q6INSN(M2_cmacr_s0,"Rxx32+=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Real",
823 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \
824 fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV));
825 })
826
827
828 Q6INSN(M2_vrcmpyi_s0,"Rdd32=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Imaginary",
829 {
830 RddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
831 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
832 fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
833 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
834 })
835
836 Q6INSN(M2_vrcmpyr_s0,"Rdd32=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Real",
837 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
838 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
839 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
840 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
841 })
842
843 Q6INSN(M2_vrcmpyi_s0c,"Rdd32=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Imaginary",
844 {
845 RddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \
846 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
847 fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \
848 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
849 })
850
851 Q6INSN(M2_vrcmpyr_s0c,"Rdd32=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Real",
852 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \
853 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
854 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \
855 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
856 })
857
858 Q6INSN(M2_cmpyi_s0,"Rdd32=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Imaginary",
859 {
860 RddV = fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \
861 fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV));
862 })
863
864 Q6INSN(M2_cmpyr_s0,"Rdd32=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Real",
865 { RddV = fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \
866 fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV));
867 })
868
869
870 /**************************************************************/
871 /* Complex mpy/mac with 2x32 bit accum, sat, shift */
872 /* 32x16 real or imag */
873 /**************************************************************/
874
875 #if 1
876
877 Q6INSN(M4_cmpyi_wh,"Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
878 {
879 RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV))
880 + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV))
881 + 0x4000)>>15);
882 })
883
884
885 Q6INSN(M4_cmpyr_wh,"Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
886 {
887 RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV))
888 - fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV))
889 + 0x4000)>>15);
890 })
891
892 Q6INSN(M4_cmpyi_whc,"Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
893 {
894 RdV = fSAT( ( fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV))
895 - fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV))
896 + 0x4000)>>15);
897 })
898
899
900 Q6INSN(M4_cmpyr_whc,"Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
901 {
902 RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV))
903 + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV))
904 + 0x4000)>>15);
905 })
906
907
908 #endif
909
910 /**************************************************************/
911 /* Vector mpy/mac with 2x32 bit accum, sat, shift */
912 /* either do real or imag, never both */
913 /**************************************************************/
914
915 #undef VCMPYSEMI
916 #define VCMPYSEMI(DST,ACC0,ACC1,SHIFT,SAT) \
917 fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
918 fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV))))); \
919 fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
920 fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV))))); \
921
922 #undef VCMPYSEMR
923 #define VCMPYSEMR(DST,ACC0,ACC1,SHIFT,SAT) \
924 fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
925 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))))); \
926 fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
927 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))))); \
928
929
930 #undef VCMPYIR
931 #define VCMPYIR(TAGBASE,DSTSYN,DSTVAL,ACCSEM,ACCVAL0,ACCVAL1,SHIFTSYN,SHIFTVAL,SATSYN,SATVAL) \
932 Q6INSN(M2_##TAGBASE##i,DSTSYN ACCSEM "vcmpyi(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \
933 "Vector Complex Multiply Imaginary", { VCMPYSEMI(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); }) \
934 Q6INSN(M2_##TAGBASE##r,DSTSYN ACCSEM "vcmpyr(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \
935 "Vector Complex Multiply Imaginary", { VCMPYSEMR(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); })
936
937
938 VCMPYIR(vcmpy_s0_sat_,"Rdd32",RddV,"=",,,"",0,":sat",fSAT)
939 VCMPYIR(vcmpy_s1_sat_,"Rdd32",RddV,"=",,,":<<1",1,":sat",fSAT)
940 VCMPYIR(vcmac_s0_sat_,"Rxx32",RxxV,"+=",fGETWORD(0,RxxV) + ,fGETWORD(1,RxxV) + ,"",0,":sat",fSAT)
941
942
943 /**********************************************************************
944 * Rotation -- by 0, 90, 180, or 270 means mult by 1, J, -1, -J *
945 *********************************************************************/
946
947 Q6INSN(S2_vcrotate,"Rdd32=vcrotate(Rss32,Rt32)",ATTRIBS(A_ARCHV2),"Rotate complex value by multiple of PI/2",
948 {
949 fHIDE(size1u_t tmp;)
950 tmp = fEXTRACTU_RANGE(RtV,1,0);
951 if (tmp == 0) { /* No rotation */
952 fSETHALF(0,RddV,fGETHALF(0,RssV));
953 fSETHALF(1,RddV,fGETHALF(1,RssV));
954 } else if (tmp == 1) { /* Multiply by -J */
955 fSETHALF(0,RddV,fGETHALF(1,RssV));
956 fSETHALF(1,RddV,fSATH(-fGETHALF(0,RssV)));
957 } else if (tmp == 2) { /* Multiply by J */
958 fSETHALF(0,RddV,fSATH(-fGETHALF(1,RssV)));
959 fSETHALF(1,RddV,fGETHALF(0,RssV));
960 } else { /* Multiply by -1 */
961 fHIDE(if (tmp != 3) fatal("C is broken");)
962 fSETHALF(0,RddV,fSATH(-fGETHALF(0,RssV)));
963 fSETHALF(1,RddV,fSATH(-fGETHALF(1,RssV)));
964 }
965 tmp = fEXTRACTU_RANGE(RtV,3,2);
966 if (tmp == 0) { /* No rotation */
967 fSETHALF(2,RddV,fGETHALF(2,RssV));
968 fSETHALF(3,RddV,fGETHALF(3,RssV));
969 } else if (tmp == 1) { /* Multiply by -J */
970 fSETHALF(2,RddV,fGETHALF(3,RssV));
971 fSETHALF(3,RddV,fSATH(-fGETHALF(2,RssV)));
972 } else if (tmp == 2) { /* Multiply by J */
973 fSETHALF(2,RddV,fSATH(-fGETHALF(3,RssV)));
974 fSETHALF(3,RddV,fGETHALF(2,RssV));
975 } else { /* Multiply by -1 */
976 fHIDE(if (tmp != 3) fatal("C is broken");)
977 fSETHALF(2,RddV,fSATH(-fGETHALF(2,RssV)));
978 fSETHALF(3,RddV,fSATH(-fGETHALF(3,RssV)));
979 }
980 })
981
982
983 Q6INSN(S4_vrcrotate_acc,"Rxx32+=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes",
984 {
985 fHIDE(int i; int tmpr; int tmpi; unsigned int control;)
986 fHIDE(int sumr; int sumi;)
987 sumr = 0;
988 sumi = 0;
989 control = fGETUBYTE(uiV,RtV);
990 for (i = 0; i < 8; i += 2) {
991 tmpr = fGETBYTE(i ,RssV);
992 tmpi = fGETBYTE(i+1,RssV);
993 switch (control & 3) {
994 case 0: /* No Rotation */
995 sumr += tmpr;
996 sumi += tmpi;
997 break;
998 case 1: /* Multiply by -J */
999 sumr += tmpi;
1000 sumi -= tmpr;
1001 break;
1002 case 2: /* Multiply by J */
1003 sumr -= tmpi;
1004 sumi += tmpr;
1005 break;
1006 case 3: /* Multiply by -1 */
1007 sumr -= tmpr;
1008 sumi -= tmpi;
1009 break;
1010 fHIDE(default: fatal("C is broken!");)
1011 }
1012 control = control >> 2;
1013 }
1014 fSETWORD(0,RxxV,fGETWORD(0,RxxV) + sumr);
1015 fSETWORD(1,RxxV,fGETWORD(1,RxxV) + sumi);
1016 })
1017
1018 Q6INSN(S4_vrcrotate,"Rdd32=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes",
1019 {
1020 fHIDE(int i; int tmpr; int tmpi; unsigned int control;)
1021 fHIDE(int sumr; int sumi;)
1022 sumr = 0;
1023 sumi = 0;
1024 control = fGETUBYTE(uiV,RtV);
1025 for (i = 0; i < 8; i += 2) {
1026 tmpr = fGETBYTE(i ,RssV);
1027 tmpi = fGETBYTE(i+1,RssV);
1028 switch (control & 3) {
1029 case 0: /* No Rotation */
1030 sumr += tmpr;
1031 sumi += tmpi;
1032 break;
1033 case 1: /* Multiply by -J */
1034 sumr += tmpi;
1035 sumi -= tmpr;
1036 break;
1037 case 2: /* Multiply by J */
1038 sumr -= tmpi;
1039 sumi += tmpr;
1040 break;
1041 case 3: /* Multiply by -1 */
1042 sumr -= tmpr;
1043 sumi -= tmpi;
1044 break;
1045 fHIDE(default: fatal("C is broken!");)
1046 }
1047 control = control >> 2;
1048 }
1049 fSETWORD(0,RddV,sumr);
1050 fSETWORD(1,RddV,sumi);
1051 })
1052
1053
1054 Q6INSN(S2_vcnegh,"Rdd32=vcnegh(Rss32,Rt32)",ATTRIBS(),"Conditional Negate halfwords",
1055 {
1056 fHIDE(int i;)
1057 for (i = 0; i < 4; i++) {
1058 if (fGETBIT(i,RtV)) {
1059 fSETHALF(i,RddV,fSATH(-fGETHALF(i,RssV)));
1060 } else {
1061 fSETHALF(i,RddV,fGETHALF(i,RssV));
1062 }
1063 }
1064 })
1065
1066 Q6INSN(S2_vrcnegh,"Rxx32+=vrcnegh(Rss32,Rt32)",ATTRIBS(),"Vector Reduce Conditional Negate halfwords",
1067 {
1068 fHIDE(int i;)
1069 for (i = 0; i < 4; i++) {
1070 if (fGETBIT(i,RtV)) {
1071 RxxV += -fGETHALF(i,RssV);
1072 } else {
1073 RxxV += fGETHALF(i,RssV);
1074 }
1075 }
1076 })
1077
1078
1079 /**********************************************************************
1080 * Finite-field multiplies. Written by David Hoyle *
1081 *********************************************************************/
1082
1083 Q6INSN(M4_pmpyw,"Rdd32=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)",
1084 {
1085 fHIDE(int i; unsigned int y;)
1086 fHIDE(unsigned long long x; unsigned long long prod;)
1087 x = fGETUWORD(0, RsV);
1088 y = fGETUWORD(0, RtV);
1089
1090 prod = 0;
1091 for(i=0; i < 32; i++) {
1092 if((y >> i) & 1) prod ^= (x << i);
1093 }
1094 RddV = prod;
1095 })
1096
1097 Q6INSN(M4_vpmpyh,"Rdd32=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)",
1098 {
1099 fHIDE(int i; unsigned int x0; unsigned int x1;)
1100 fHIDE(unsigned int y0; unsigned int y1;)
1101 fHIDE(unsigned int prod0; unsigned int prod1;)
1102
1103 x0 = fGETUHALF(0, RsV);
1104 x1 = fGETUHALF(1, RsV);
1105 y0 = fGETUHALF(0, RtV);
1106 y1 = fGETUHALF(1, RtV);
1107
1108 prod0 = prod1 = 0;
1109 for(i=0; i < 16; i++) {
1110 if((y0 >> i) & 1) prod0 ^= (x0 << i);
1111 if((y1 >> i) & 1) prod1 ^= (x1 << i);
1112 }
1113 fSETHALF(0,RddV,fGETUHALF(0,prod0));
1114 fSETHALF(1,RddV,fGETUHALF(0,prod1));
1115 fSETHALF(2,RddV,fGETUHALF(1,prod0));
1116 fSETHALF(3,RddV,fGETUHALF(1,prod1));
1117 })
1118
1119 Q6INSN(M4_pmpyw_acc,"Rxx32^=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)",
1120 {
1121 fHIDE(int i; unsigned int y;)
1122 fHIDE(unsigned long long x; unsigned long long prod;)
1123 x = fGETUWORD(0, RsV);
1124 y = fGETUWORD(0, RtV);
1125
1126 prod = 0;
1127 for(i=0; i < 32; i++) {
1128 if((y >> i) & 1) prod ^= (x << i);
1129 }
1130 RxxV ^= prod;
1131 })
1132
1133 Q6INSN(M4_vpmpyh_acc,"Rxx32^=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)",
1134 {
1135 fHIDE(int i; unsigned int x0; unsigned int x1;)
1136 fHIDE(unsigned int y0; unsigned int y1;)
1137 fHIDE(unsigned int prod0; unsigned int prod1;)
1138
1139 x0 = fGETUHALF(0, RsV);
1140 x1 = fGETUHALF(1, RsV);
1141 y0 = fGETUHALF(0, RtV);
1142 y1 = fGETUHALF(1, RtV);
1143
1144 prod0 = prod1 = 0;
1145 for(i=0; i < 16; i++) {
1146 if((y0 >> i) & 1) prod0 ^= (x0 << i);
1147 if((y1 >> i) & 1) prod1 ^= (x1 << i);
1148 }
1149 fSETHALF(0,RxxV,fGETUHALF(0,RxxV) ^ fGETUHALF(0,prod0));
1150 fSETHALF(1,RxxV,fGETUHALF(1,RxxV) ^ fGETUHALF(0,prod1));
1151 fSETHALF(2,RxxV,fGETUHALF(2,RxxV) ^ fGETUHALF(1,prod0));
1152 fSETHALF(3,RxxV,fGETUHALF(3,RxxV) ^ fGETUHALF(1,prod1));
1153 })
1154
1155
1156 /* V70: TINY CORE */
1157
1158 #define CMPY64(TAG,NAME,DESC,OPERAND1,OP,W0,W1,W2,W3) \
1159 Q6INSN(M7_##TAG,"Rdd32=" NAME "(Rss32," OPERAND1 ")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 64-bit " DESC, { RddV = (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})\
1160 Q6INSN(M7_##TAG##_acc,"Rxx32+=" NAME "(Rss32,"OPERAND1")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply-Accumulate 64-bit " DESC, { RxxV += (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})
1161
1162 CMPY64(dcmpyrw, "cmpyrw","Real","Rtt32" ,-,0,0,1,1)
1163 CMPY64(dcmpyrwc,"cmpyrw","Real","Rtt32*",+,0,0,1,1)
1164 CMPY64(dcmpyiw, "cmpyiw","Imag","Rtt32" ,+,0,1,1,0)
1165 CMPY64(dcmpyiwc,"cmpyiw","Imag","Rtt32*",-,1,0,0,1)
1166
1167 #define CMPY128(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \
1168 Q6INSN(M7_##TAG,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real", \
1169 { \
1170 fHIDE(size16s_t acc128;)\
1171 fHIDE(size16s_t tmp128;)\
1172 fHIDE(size8s_t acc64;)\
1173 tmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\
1174 acc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\
1175 acc128 = OP(tmp128,acc128);\
1176 acc128 = fSHIFTR128(acc128, 31);\
1177 acc64 = fCAST16S_8S(acc128);\
1178 RdV = fSATW(acc64);\
1179 })
1180
1181
1182 CMPY128(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128)
1183 CMPY128(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128)
1184 CMPY128(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128)
1185 CMPY128(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)
1186
1187
1188 #define CMPY128RND(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \
1189 Q6INSN(M7_##TAG##_rnd,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:rnd:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real", \
1190 { \
1191 fHIDE(size16s_t acc128;)\
1192 fHIDE(size16s_t tmp128;)\
1193 fHIDE(size16s_t const128;)\
1194 fHIDE(size8s_t acc64;)\
1195 tmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\
1196 acc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\
1197 const128 = fCAST8S_16S(fCONSTLL(0x40000000));\
1198 acc128 = OP(tmp128,acc128);\
1199 acc128 = fADD128(acc128,const128);\
1200 acc128 = fSHIFTR128(acc128, 31);\
1201 acc64 = fCAST16S_8S(acc128);\
1202 RdV = fSATW(acc64);\
1203 })
1204
1205 CMPY128RND(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128)
1206 CMPY128RND(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128)
1207 CMPY128RND(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128)
1208 CMPY128RND(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)