]>
Commit | Line | Data |
---|---|---|
10602db8 DR |
1 | /* |
2 | * SpanDSP - a series of DSP components for telephony | |
3 | * | |
4 | * fir.h - General telephony FIR routines | |
5 | * | |
6 | * Written by Steve Underwood <steveu@coppice.org> | |
7 | * | |
8 | * Copyright (C) 2002 Steve Underwood | |
9 | * | |
10 | * All rights reserved. | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU General Public License version 2, as | |
14 | * published by the Free Software Foundation. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
10602db8 DR |
24 | */ |
25 | ||
10602db8 DR |
26 | #if !defined(_FIR_H_) |
27 | #define _FIR_H_ | |
28 | ||
29 | /* | |
30 | Blackfin NOTES & IDEAS: | |
31 | ||
32 | A simple dot product function is used to implement the filter. This performs | |
33 | just one MAC/cycle which is inefficient but was easy to implement as a first | |
34 | pass. The current Blackfin code also uses an unrolled form of the filter | |
35 | history to avoid 0 length hardware loop issues. This is wasteful of | |
36 | memory. | |
37 | ||
38 | Ideas for improvement: | |
39 | ||
40 | 1/ Rewrite filter for dual MAC inner loop. The issue here is handling | |
41 | history sample offsets that are 16 bit aligned - the dual MAC needs | |
42 | 32 bit aligmnent. There are some good examples in libbfdsp. | |
43 | ||
44 | 2/ Use the hardware circular buffer facility tohalve memory usage. | |
45 | ||
46 | 3/ Consider using internal memory. | |
47 | ||
48 | Using less memory might also improve speed as cache misses will be | |
49 | reduced. A drop in MIPs and memory approaching 50% should be | |
50 | possible. | |
51 | ||
52 | The foreground and background filters currenlty use a total of | |
53 | about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo | |
54 | can. | |
55 | */ | |
56 | ||
56791f0a GKH |
57 | /* |
58 | * 16 bit integer FIR descriptor. This defines the working state for a single | |
59 | * instance of an FIR filter using 16 bit integer coefficients. | |
60 | */ | |
c82895b8 | 61 | struct fir16_state_t { |
10602db8 DR |
62 | int taps; |
63 | int curr_pos; | |
64 | const int16_t *coeffs; | |
65 | int16_t *history; | |
c82895b8 | 66 | }; |
10602db8 | 67 | |
56791f0a GKH |
68 | /* |
69 | * 32 bit integer FIR descriptor. This defines the working state for a single | |
70 | * instance of an FIR filter using 32 bit integer coefficients, and filtering | |
71 | * 16 bit integer data. | |
72 | */ | |
c82895b8 | 73 | struct fir32_state_t { |
10602db8 DR |
74 | int taps; |
75 | int curr_pos; | |
76 | const int32_t *coeffs; | |
77 | int16_t *history; | |
c82895b8 | 78 | }; |
10602db8 | 79 | |
56791f0a GKH |
80 | /* |
81 | * Floating point FIR descriptor. This defines the working state for a single | |
82 | * instance of an FIR filter using floating point coefficients and data. | |
83 | */ | |
c82895b8 | 84 | struct fir_float_state_t { |
10602db8 DR |
85 | int taps; |
86 | int curr_pos; | |
87 | const float *coeffs; | |
88 | float *history; | |
c82895b8 | 89 | }; |
10602db8 | 90 | |
dc57a3ea AB |
91 | static inline const int16_t *fir16_create(struct fir16_state_t *fir, |
92 | const int16_t *coeffs, int taps) | |
10602db8 DR |
93 | { |
94 | fir->taps = taps; | |
95 | fir->curr_pos = taps - 1; | |
96 | fir->coeffs = coeffs; | |
c8b3953c | 97 | #if defined(__bfin__) |
4460a860 | 98 | fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); |
10602db8 | 99 | #else |
db2af149 | 100 | fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); |
10602db8 DR |
101 | #endif |
102 | return fir->history; | |
103 | } | |
10602db8 | 104 | |
dc57a3ea | 105 | static inline void fir16_flush(struct fir16_state_t *fir) |
10602db8 | 106 | { |
c8b3953c | 107 | #if defined(__bfin__) |
4460a860 | 108 | memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); |
10602db8 | 109 | #else |
4460a860 | 110 | memset(fir->history, 0, fir->taps * sizeof(int16_t)); |
10602db8 DR |
111 | #endif |
112 | } | |
10602db8 | 113 | |
dc57a3ea | 114 | static inline void fir16_free(struct fir16_state_t *fir) |
10602db8 | 115 | { |
db2af149 | 116 | kfree(fir->history); |
10602db8 | 117 | } |
10602db8 | 118 | |
f55ccbf6 | 119 | #ifdef __bfin__ |
10602db8 DR |
120 | static inline int32_t dot_asm(short *x, short *y, int len) |
121 | { | |
4460a860 M |
122 | int dot; |
123 | ||
124 | len--; | |
125 | ||
126 | __asm__("I0 = %1;\n\t" | |
127 | "I1 = %2;\n\t" | |
128 | "A0 = 0;\n\t" | |
129 | "R0.L = W[I0++] || R1.L = W[I1++];\n\t" | |
130 | "LOOP dot%= LC0 = %3;\n\t" | |
131 | "LOOP_BEGIN dot%=;\n\t" | |
132 | "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" | |
133 | "LOOP_END dot%=;\n\t" | |
134 | "A0 += R0.L*R1.L (IS);\n\t" | |
135 | "R0 = A0;\n\t" | |
136 | "%0 = R0;\n\t" | |
dc57a3ea AB |
137 | : "=&d"(dot) |
138 | : "a"(x), "a"(y), "a"(len) | |
139 | : "I0", "I1", "A1", "A0", "R0", "R1" | |
4460a860 M |
140 | ); |
141 | ||
142 | return dot; | |
10602db8 DR |
143 | } |
144 | #endif | |
10602db8 | 145 | |
dc57a3ea | 146 | static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) |
10602db8 | 147 | { |
4460a860 | 148 | int32_t y; |
c8b3953c | 149 | #if defined(__bfin__) |
4460a860 M |
150 | fir->history[fir->curr_pos] = sample; |
151 | fir->history[fir->curr_pos + fir->taps] = sample; | |
152 | y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos], | |
153 | fir->taps); | |
10602db8 | 154 | #else |
4460a860 M |
155 | int i; |
156 | int offset1; | |
157 | int offset2; | |
158 | ||
159 | fir->history[fir->curr_pos] = sample; | |
160 | ||
161 | offset2 = fir->curr_pos; | |
162 | offset1 = fir->taps - offset2; | |
163 | y = 0; | |
164 | for (i = fir->taps - 1; i >= offset1; i--) | |
165 | y += fir->coeffs[i] * fir->history[i - offset1]; | |
166 | for (; i >= 0; i--) | |
167 | y += fir->coeffs[i] * fir->history[i + offset2]; | |
10602db8 | 168 | #endif |
4460a860 M |
169 | if (fir->curr_pos <= 0) |
170 | fir->curr_pos = fir->taps; | |
171 | fir->curr_pos--; | |
172 | return (int16_t) (y >> 15); | |
10602db8 | 173 | } |
10602db8 | 174 | |
dc57a3ea AB |
175 | static inline const int16_t *fir32_create(struct fir32_state_t *fir, |
176 | const int32_t *coeffs, int taps) | |
10602db8 | 177 | { |
4460a860 M |
178 | fir->taps = taps; |
179 | fir->curr_pos = taps - 1; | |
180 | fir->coeffs = coeffs; | |
181 | fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); | |
182 | return fir->history; | |
10602db8 | 183 | } |
10602db8 | 184 | |
dc57a3ea | 185 | static inline void fir32_flush(struct fir32_state_t *fir) |
10602db8 | 186 | { |
4460a860 | 187 | memset(fir->history, 0, fir->taps * sizeof(int16_t)); |
10602db8 | 188 | } |
10602db8 | 189 | |
dc57a3ea | 190 | static inline void fir32_free(struct fir32_state_t *fir) |
10602db8 | 191 | { |
4460a860 | 192 | kfree(fir->history); |
10602db8 | 193 | } |
10602db8 | 194 | |
dc57a3ea | 195 | static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) |
10602db8 | 196 | { |
4460a860 M |
197 | int i; |
198 | int32_t y; | |
199 | int offset1; | |
200 | int offset2; | |
201 | ||
202 | fir->history[fir->curr_pos] = sample; | |
203 | offset2 = fir->curr_pos; | |
204 | offset1 = fir->taps - offset2; | |
205 | y = 0; | |
206 | for (i = fir->taps - 1; i >= offset1; i--) | |
207 | y += fir->coeffs[i] * fir->history[i - offset1]; | |
208 | for (; i >= 0; i--) | |
209 | y += fir->coeffs[i] * fir->history[i + offset2]; | |
210 | if (fir->curr_pos <= 0) | |
211 | fir->curr_pos = fir->taps; | |
212 | fir->curr_pos--; | |
213 | return (int16_t) (y >> 15); | |
10602db8 | 214 | } |
10602db8 | 215 | |
10602db8 | 216 | #endif |