]>
Commit | Line | Data |
---|---|---|
10602db8 DR |
1 | /* |
2 | * SpanDSP - a series of DSP components for telephony | |
3 | * | |
4 | * echo.c - A line echo canceller. This code is being developed | |
5 | * against and partially complies with G168. | |
6 | * | |
7 | * Written by Steve Underwood <steveu@coppice.org> | |
8 | * and David Rowe <david_at_rowetel_dot_com> | |
9 | * | |
10 | * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe | |
11 | * | |
12 | * Based on a bit from here, a bit from there, eye of toad, ear of | |
13 | * bat, 15 years of failed attempts by David and a few fried brain | |
14 | * cells. | |
15 | * | |
16 | * All rights reserved. | |
17 | * | |
18 | * This program is free software; you can redistribute it and/or modify | |
19 | * it under the terms of the GNU General Public License version 2, as | |
20 | * published by the Free Software Foundation. | |
21 | * | |
22 | * This program is distributed in the hope that it will be useful, | |
23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 | * GNU General Public License for more details. | |
26 | * | |
27 | * You should have received a copy of the GNU General Public License | |
28 | * along with this program; if not, write to the Free Software | |
29 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
10602db8 DR |
30 | */ |
31 | ||
32 | /*! \file */ | |
33 | ||
34 | /* Implementation Notes | |
35 | David Rowe | |
36 | April 2007 | |
37 | ||
38 | This code started life as Steve's NLMS algorithm with a tap | |
39 | rotation algorithm to handle divergence during double talk. I | |
40 | added a Geigel Double Talk Detector (DTD) [2] and performed some | |
41 | G168 tests. However I had trouble meeting the G168 requirements, | |
42 | especially for double talk - there were always cases where my DTD | |
43 | failed, for example where near end speech was under the 6dB | |
44 | threshold required for declaring double talk. | |
45 | ||
46 | So I tried a two path algorithm [1], which has so far given better | |
47 | results. The original tap rotation/Geigel algorithm is available | |
48 | in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit. | |
49 | It's probably possible to make it work if some one wants to put some | |
50 | serious work into it. | |
51 | ||
52 | At present no special treatment is provided for tones, which | |
53 | generally cause NLMS algorithms to diverge. Initial runs of a | |
54 | subset of the G168 tests for tones (e.g ./echo_test 6) show the | |
55 | current algorithm is passing OK, which is kind of surprising. The | |
56 | full set of tests needs to be performed to confirm this result. | |
57 | ||
58 | One other interesting change is that I have managed to get the NLMS | |
59 | code to work with 16 bit coefficients, rather than the original 32 | |
60 | bit coefficents. This reduces the MIPs and storage required. | |
61 | I evaulated the 16 bit port using g168_tests.sh and listening tests | |
62 | on 4 real-world samples. | |
63 | ||
64 | I also attempted the implementation of a block based NLMS update | |
65 | [2] but although this passes g168_tests.sh it didn't converge well | |
66 | on the real-world samples. I have no idea why, perhaps a scaling | |
67 | problem. The block based code is also available in SVN | |
68 | http://svn.rowetel.com/software/oslec/tags/before_16bit. If this | |
69 | code can be debugged, it will lead to further reduction in MIPS, as | |
70 | the block update code maps nicely onto DSP instruction sets (it's a | |
71 | dot product) compared to the current sample-by-sample update. | |
72 | ||
73 | Steve also has some nice notes on echo cancellers in echo.h | |
74 | ||
10602db8 DR |
75 | References: |
76 | ||
77 | [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo | |
78 | Path Models", IEEE Transactions on communications, COM-25, | |
79 | No. 6, June | |
80 | 1977. | |
81 | http://www.rowetel.com/images/echo/dual_path_paper.pdf | |
82 | ||
83 | [2] The classic, very useful paper that tells you how to | |
84 | actually build a real world echo canceller: | |
49bb9e6d GKH |
85 | Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice |
86 | Echo Canceller with a TMS320020, | |
87 | http://www.rowetel.com/images/echo/spra129.pdf | |
10602db8 DR |
88 | |
89 | [3] I have written a series of blog posts on this work, here is | |
90 | Part 1: http://www.rowetel.com/blog/?p=18 | |
91 | ||
92 | [4] The source code http://svn.rowetel.com/software/oslec/ | |
93 | ||
94 | [5] A nice reference on LMS filters: | |
49bb9e6d | 95 | http://en.wikipedia.org/wiki/Least_mean_squares_filter |
10602db8 DR |
96 | |
97 | Credits: | |
98 | ||
99 | Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan | |
100 | Muthukrishnan for their suggestions and email discussions. Thanks | |
101 | also to those people who collected echo samples for me such as | |
102 | Mark, Pawel, and Pavel. | |
103 | */ | |
104 | ||
49bb9e6d | 105 | #include <linux/kernel.h> |
10602db8 | 106 | #include <linux/module.h> |
10602db8 | 107 | #include <linux/slab.h> |
10602db8 | 108 | |
10602db8 DR |
109 | #include "echo.h" |
110 | ||
49bb9e6d GKH |
111 | #define MIN_TX_POWER_FOR_ADAPTION 64 |
112 | #define MIN_RX_POWER_FOR_ADAPTION 64 | |
113 | #define DTD_HANGOVER 600 /* 600 samples, or 75ms */ | |
114 | #define DC_LOG2BETA 3 /* log2() of DC filter Beta */ | |
10602db8 | 115 | |
10602db8 DR |
116 | /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ |
117 | ||
f55ccbf6 | 118 | #ifdef __bfin__ |
7a9aea51 | 119 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) |
10602db8 | 120 | { |
3ec50be5 | 121 | int i; |
4460a860 M |
122 | int offset1; |
123 | int offset2; | |
124 | int factor; | |
125 | int exp; | |
126 | int16_t *phist; | |
127 | int n; | |
128 | ||
129 | if (shift > 0) | |
130 | factor = clean << shift; | |
131 | else | |
132 | factor = clean >> -shift; | |
133 | ||
134 | /* Update the FIR taps */ | |
135 | ||
136 | offset2 = ec->curr_pos; | |
137 | offset1 = ec->taps - offset2; | |
138 | phist = &ec->fir_state_bg.history[offset2]; | |
139 | ||
140 | /* st: and en: help us locate the assembler in echo.s */ | |
141 | ||
dc57a3ea | 142 | /* asm("st:"); */ |
4460a860 | 143 | n = ec->taps; |
c020a7a4 | 144 | for (i = 0; i < n; i++) { |
4460a860 M |
145 | exp = *phist++ * factor; |
146 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | |
147 | } | |
dc57a3ea | 148 | /* asm("en:"); */ |
4460a860 M |
149 | |
150 | /* Note the asm for the inner loop above generated by Blackfin gcc | |
151 | 4.1.1 is pretty good (note even parallel instructions used): | |
152 | ||
153 | R0 = W [P0++] (X); | |
154 | R0 *= R2; | |
155 | R0 = R0 + R3 (NS) || | |
156 | R1 = W [P1] (X) || | |
157 | nop; | |
158 | R0 >>>= 15; | |
159 | R0 = R0 + R1; | |
160 | W [P1++] = R0; | |
161 | ||
162 | A block based update algorithm would be much faster but the | |
163 | above can't be improved on much. Every instruction saved in | |
164 | the loop above is 2 MIPs/ch! The for loop above is where the | |
165 | Blackfin spends most of it's time - about 17 MIPs/ch measured | |
166 | with speedtest.c with 256 taps (32ms). Write-back and | |
167 | Write-through cache gave about the same performance. | |
168 | */ | |
10602db8 DR |
169 | } |
170 | ||
171 | /* | |
172 | IDEAS for further optimisation of lms_adapt_bg(): | |
173 | ||
174 | 1/ The rounding is quite costly. Could we keep as 32 bit coeffs | |
175 | then make filter pluck the MS 16-bits of the coeffs when filtering? | |
176 | However this would lower potential optimisation of filter, as I | |
177 | think the dual-MAC architecture requires packed 16 bit coeffs. | |
178 | ||
179 | 2/ Block based update would be more efficient, as per comments above, | |
180 | could use dual MAC architecture. | |
181 | ||
182 | 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC | |
183 | packing. | |
184 | ||
185 | 4/ Execute the whole e/c in a block of say 20ms rather than sample | |
186 | by sample. Processing a few samples every ms is inefficient. | |
187 | */ | |
188 | ||
189 | #else | |
7a9aea51 | 190 | static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) |
10602db8 | 191 | { |
4460a860 M |
192 | int i; |
193 | ||
194 | int offset1; | |
195 | int offset2; | |
196 | int factor; | |
197 | int exp; | |
198 | ||
199 | if (shift > 0) | |
200 | factor = clean << shift; | |
201 | else | |
202 | factor = clean >> -shift; | |
203 | ||
204 | /* Update the FIR taps */ | |
205 | ||
206 | offset2 = ec->curr_pos; | |
207 | offset1 = ec->taps - offset2; | |
208 | ||
209 | for (i = ec->taps - 1; i >= offset1; i--) { | |
210 | exp = (ec->fir_state_bg.history[i - offset1] * factor); | |
211 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | |
212 | } | |
213 | for (; i >= 0; i--) { | |
214 | exp = (ec->fir_state_bg.history[i + offset2] * factor); | |
215 | ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); | |
216 | } | |
10602db8 DR |
217 | } |
218 | #endif | |
219 | ||
56791f0a | 220 | static inline int top_bit(unsigned int bits) |
196e76e8 DR |
221 | { |
222 | if (bits == 0) | |
56791f0a GKH |
223 | return -1; |
224 | else | |
7a9aea51 | 225 | return (int)fls((int32_t) bits) - 1; |
196e76e8 DR |
226 | } |
227 | ||
9d8f2d5d | 228 | struct oslec_state *oslec_create(int len, int adaption_mode) |
10602db8 | 229 | { |
4460a860 M |
230 | struct oslec_state *ec; |
231 | int i; | |
09024688 | 232 | const int16_t *history; |
4460a860 M |
233 | |
234 | ec = kzalloc(sizeof(*ec), GFP_KERNEL); | |
235 | if (!ec) | |
236 | return NULL; | |
237 | ||
238 | ec->taps = len; | |
239 | ec->log2taps = top_bit(len); | |
240 | ec->curr_pos = ec->taps - 1; | |
241 | ||
09024688 CD |
242 | ec->fir_taps16[0] = |
243 | kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | |
244 | if (!ec->fir_taps16[0]) | |
245 | goto error_oom_0; | |
246 | ||
247 | ec->fir_taps16[1] = | |
248 | kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | |
249 | if (!ec->fir_taps16[1]) | |
250 | goto error_oom_1; | |
4460a860 | 251 | |
09024688 CD |
252 | history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps); |
253 | if (!history) | |
254 | goto error_state; | |
255 | history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps); | |
256 | if (!history) | |
257 | goto error_state_bg; | |
4460a860 | 258 | |
dc57a3ea | 259 | for (i = 0; i < 5; i++) |
4460a860 | 260 | ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0; |
4460a860 M |
261 | |
262 | ec->cng_level = 1000; | |
263 | oslec_adaption_mode(ec, adaption_mode); | |
264 | ||
265 | ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); | |
266 | if (!ec->snapshot) | |
09024688 | 267 | goto error_snap; |
4460a860 M |
268 | |
269 | ec->cond_met = 0; | |
0c474826 LN |
270 | ec->pstates = 0; |
271 | ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; | |
272 | ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; | |
4460a860 | 273 | ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; |
0c474826 LN |
274 | ec->lbgn = ec->lbgn_acc = 0; |
275 | ec->lbgn_upper = 200; | |
276 | ec->lbgn_upper_acc = ec->lbgn_upper << 13; | |
4460a860 M |
277 | |
278 | return ec; | |
279 | ||
09024688 CD |
280 | error_snap: |
281 | fir16_free(&ec->fir_state_bg); | |
282 | error_state_bg: | |
283 | fir16_free(&ec->fir_state); | |
284 | error_state: | |
285 | kfree(ec->fir_taps16[1]); | |
286 | error_oom_1: | |
287 | kfree(ec->fir_taps16[0]); | |
288 | error_oom_0: | |
4460a860 M |
289 | kfree(ec); |
290 | return NULL; | |
10602db8 | 291 | } |
9d8f2d5d | 292 | EXPORT_SYMBOL_GPL(oslec_create); |
10602db8 | 293 | |
9d8f2d5d | 294 | void oslec_free(struct oslec_state *ec) |
10602db8 DR |
295 | { |
296 | int i; | |
297 | ||
298 | fir16_free(&ec->fir_state); | |
299 | fir16_free(&ec->fir_state_bg); | |
4460a860 | 300 | for (i = 0; i < 2; i++) |
10602db8 DR |
301 | kfree(ec->fir_taps16[i]); |
302 | kfree(ec->snapshot); | |
303 | kfree(ec); | |
304 | } | |
9d8f2d5d | 305 | EXPORT_SYMBOL_GPL(oslec_free); |
10602db8 | 306 | |
9d8f2d5d | 307 | void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode) |
10602db8 | 308 | { |
4460a860 | 309 | ec->adaption_mode = adaption_mode; |
10602db8 | 310 | } |
9d8f2d5d | 311 | EXPORT_SYMBOL_GPL(oslec_adaption_mode); |
10602db8 | 312 | |
9d8f2d5d | 313 | void oslec_flush(struct oslec_state *ec) |
10602db8 | 314 | { |
4460a860 | 315 | int i; |
10602db8 | 316 | |
0c474826 LN |
317 | ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; |
318 | ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; | |
4460a860 | 319 | ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; |
10602db8 | 320 | |
0c474826 LN |
321 | ec->lbgn = ec->lbgn_acc = 0; |
322 | ec->lbgn_upper = 200; | |
323 | ec->lbgn_upper_acc = ec->lbgn_upper << 13; | |
10602db8 | 324 | |
4460a860 | 325 | ec->nonupdate_dwell = 0; |
10602db8 | 326 | |
4460a860 M |
327 | fir16_flush(&ec->fir_state); |
328 | fir16_flush(&ec->fir_state_bg); | |
329 | ec->fir_state.curr_pos = ec->taps - 1; | |
330 | ec->fir_state_bg.curr_pos = ec->taps - 1; | |
331 | for (i = 0; i < 2; i++) | |
332 | memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t)); | |
10602db8 | 333 | |
4460a860 | 334 | ec->curr_pos = ec->taps - 1; |
0c474826 | 335 | ec->pstates = 0; |
10602db8 | 336 | } |
9d8f2d5d | 337 | EXPORT_SYMBOL_GPL(oslec_flush); |
10602db8 | 338 | |
4460a860 M |
339 | void oslec_snapshot(struct oslec_state *ec) |
340 | { | |
341 | memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t)); | |
10602db8 | 342 | } |
9d8f2d5d | 343 | EXPORT_SYMBOL_GPL(oslec_snapshot); |
10602db8 | 344 | |
49bb9e6d | 345 | /* Dual Path Echo Canceller */ |
10602db8 | 346 | |
9d8f2d5d | 347 | int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx) |
10602db8 | 348 | { |
4460a860 M |
349 | int32_t echo_value; |
350 | int clean_bg; | |
3ec50be5 JJ |
351 | int tmp; |
352 | int tmp1; | |
4460a860 | 353 | |
49bb9e6d GKH |
354 | /* |
355 | * Input scaling was found be required to prevent problems when tx | |
356 | * starts clipping. Another possible way to handle this would be the | |
357 | * filter coefficent scaling. | |
358 | */ | |
4460a860 M |
359 | |
360 | ec->tx = tx; | |
361 | ec->rx = rx; | |
362 | tx >>= 1; | |
363 | rx >>= 1; | |
364 | ||
365 | /* | |
49bb9e6d GKH |
366 | * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision |
367 | * required otherwise values do not track down to 0. Zero at DC, Pole | |
196e76e8 | 368 | * at (1-Beta) on real axis. Some chip sets (like Si labs) don't |
49bb9e6d GKH |
369 | * need this, but something like a $10 X100P card does. Any DC really |
370 | * slows down convergence. | |
371 | * | |
372 | * Note: removes some low frequency from the signal, this reduces the | |
373 | * speech quality when listening to samples through headphones but may | |
374 | * not be obvious through a telephone handset. | |
375 | * | |
376 | * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta | |
377 | * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz. | |
4460a860 M |
378 | */ |
379 | ||
380 | if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) { | |
381 | tmp = rx << 15; | |
196e76e8 | 382 | |
49bb9e6d GKH |
383 | /* |
384 | * Make sure the gain of the HPF is 1.0. This can still | |
385 | * saturate a little under impulse conditions, and it might | |
386 | * roll to 32768 and need clipping on sustained peak level | |
387 | * signals. However, the scale of such clipping is small, and | |
388 | * the error due to any saturation should not markedly affect | |
389 | * the downstream processing. | |
390 | */ | |
4460a860 | 391 | tmp -= (tmp >> 4); |
196e76e8 | 392 | |
4460a860 M |
393 | ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2; |
394 | ||
49bb9e6d GKH |
395 | /* |
396 | * hard limit filter to prevent clipping. Note that at this | |
397 | * stage rx should be limited to +/- 16383 due to right shift | |
398 | * above | |
399 | */ | |
4460a860 M |
400 | tmp1 = ec->rx_1 >> 15; |
401 | if (tmp1 > 16383) | |
402 | tmp1 = 16383; | |
403 | if (tmp1 < -16383) | |
404 | tmp1 = -16383; | |
405 | rx = tmp1; | |
406 | ec->rx_2 = tmp; | |
407 | } | |
10602db8 | 408 | |
4460a860 M |
409 | /* Block average of power in the filter states. Used for |
410 | adaption power calculation. */ | |
10602db8 | 411 | |
4460a860 M |
412 | { |
413 | int new, old; | |
414 | ||
415 | /* efficient "out with the old and in with the new" algorithm so | |
416 | we don't have to recalculate over the whole block of | |
417 | samples. */ | |
30c5007e | 418 | new = (int)tx * (int)tx; |
4460a860 M |
419 | old = (int)ec->fir_state.history[ec->fir_state.curr_pos] * |
420 | (int)ec->fir_state.history[ec->fir_state.curr_pos]; | |
0c474826 | 421 | ec->pstates += |
7a9aea51 | 422 | ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps; |
0c474826 LN |
423 | if (ec->pstates < 0) |
424 | ec->pstates = 0; | |
4460a860 | 425 | } |
10602db8 | 426 | |
4460a860 | 427 | /* Calculate short term average levels using simple single pole IIRs */ |
10602db8 | 428 | |
0c474826 LN |
429 | ec->ltxacc += abs(tx) - ec->ltx; |
430 | ec->ltx = (ec->ltxacc + (1 << 4)) >> 5; | |
431 | ec->lrxacc += abs(rx) - ec->lrx; | |
432 | ec->lrx = (ec->lrxacc + (1 << 4)) >> 5; | |
10602db8 | 433 | |
49bb9e6d | 434 | /* Foreground filter */ |
10602db8 | 435 | |
4460a860 M |
436 | ec->fir_state.coeffs = ec->fir_taps16[0]; |
437 | echo_value = fir16(&ec->fir_state, tx); | |
438 | ec->clean = rx - echo_value; | |
0c474826 LN |
439 | ec->lcleanacc += abs(ec->clean) - ec->lclean; |
440 | ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5; | |
10602db8 | 441 | |
49bb9e6d | 442 | /* Background filter */ |
10602db8 | 443 | |
4460a860 M |
444 | echo_value = fir16(&ec->fir_state_bg, tx); |
445 | clean_bg = rx - echo_value; | |
0c474826 LN |
446 | ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg; |
447 | ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5; | |
10602db8 | 448 | |
49bb9e6d | 449 | /* Background Filter adaption */ |
10602db8 | 450 | |
4460a860 M |
451 | /* Almost always adap bg filter, just simple DT and energy |
452 | detection to minimise adaption in cases of strong double talk. | |
453 | However this is not critical for the dual path algorithm. | |
454 | */ | |
455 | ec->factor = 0; | |
456 | ec->shift = 0; | |
457 | if ((ec->nonupdate_dwell == 0)) { | |
0c474826 | 458 | int p, logp, shift; |
4460a860 M |
459 | |
460 | /* Determine: | |
461 | ||
462 | f = Beta * clean_bg_rx/P ------ (1) | |
463 | ||
464 | where P is the total power in the filter states. | |
465 | ||
466 | The Boffins have shown that if we obey (1) we converge | |
467 | quickly and avoid instability. | |
468 | ||
469 | The correct factor f must be in Q30, as this is the fixed | |
470 | point format required by the lms_adapt_bg() function, | |
471 | therefore the scaled version of (1) is: | |
472 | ||
473 | (2^30) * f = (2^30) * Beta * clean_bg_rx/P | |
196e76e8 | 474 | factor = (2^30) * Beta * clean_bg_rx/P ----- (2) |
4460a860 M |
475 | |
476 | We have chosen Beta = 0.25 by experiment, so: | |
477 | ||
196e76e8 | 478 | factor = (2^30) * (2^-2) * clean_bg_rx/P |
4460a860 | 479 | |
7a9aea51 | 480 | (30 - 2 - log2(P)) |
196e76e8 | 481 | factor = clean_bg_rx 2 ----- (3) |
4460a860 M |
482 | |
483 | To avoid a divide we approximate log2(P) as top_bit(P), | |
484 | which returns the position of the highest non-zero bit in | |
485 | P. This approximation introduces an error as large as a | |
486 | factor of 2, but the algorithm seems to handle it OK. | |
487 | ||
488 | Come to think of it a divide may not be a big deal on a | |
489 | modern DSP, so its probably worth checking out the cycles | |
490 | for a divide versus a top_bit() implementation. | |
491 | */ | |
492 | ||
0c474826 LN |
493 | p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates; |
494 | logp = top_bit(p) + ec->log2taps; | |
495 | shift = 30 - 2 - logp; | |
4460a860 M |
496 | ec->shift = shift; |
497 | ||
498 | lms_adapt_bg(ec, clean_bg, shift); | |
10602db8 | 499 | } |
4460a860 M |
500 | |
501 | /* very simple DTD to make sure we dont try and adapt with strong | |
502 | near end speech */ | |
503 | ||
504 | ec->adapt = 0; | |
0c474826 | 505 | if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx)) |
4460a860 M |
506 | ec->nonupdate_dwell = DTD_HANGOVER; |
507 | if (ec->nonupdate_dwell) | |
508 | ec->nonupdate_dwell--; | |
509 | ||
49bb9e6d | 510 | /* Transfer logic */ |
4460a860 M |
511 | |
512 | /* These conditions are from the dual path paper [1], I messed with | |
513 | them a bit to improve performance. */ | |
514 | ||
515 | if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) && | |
516 | (ec->nonupdate_dwell == 0) && | |
dc57a3ea | 517 | /* (ec->Lclean_bg < 0.875*ec->Lclean) */ |
0c474826 | 518 | (8 * ec->lclean_bg < 7 * ec->lclean) && |
dc57a3ea | 519 | /* (ec->Lclean_bg < 0.125*ec->Ltx) */ |
0c474826 | 520 | (8 * ec->lclean_bg < ec->ltx)) { |
4460a860 | 521 | if (ec->cond_met == 6) { |
49bb9e6d GKH |
522 | /* |
523 | * BG filter has had better results for 6 consecutive | |
524 | * samples | |
525 | */ | |
4460a860 M |
526 | ec->adapt = 1; |
527 | memcpy(ec->fir_taps16[0], ec->fir_taps16[1], | |
7a9aea51 | 528 | ec->taps * sizeof(int16_t)); |
4460a860 M |
529 | } else |
530 | ec->cond_met++; | |
531 | } else | |
532 | ec->cond_met = 0; | |
533 | ||
49bb9e6d | 534 | /* Non-Linear Processing */ |
4460a860 M |
535 | |
536 | ec->clean_nlp = ec->clean; | |
537 | if (ec->adaption_mode & ECHO_CAN_USE_NLP) { | |
49bb9e6d GKH |
538 | /* |
539 | * Non-linear processor - a fancy way to say "zap small | |
540 | * signals, to avoid residual echo due to (uLaw/ALaw) | |
541 | * non-linearity in the channel.". | |
542 | */ | |
4460a860 | 543 | |
0c474826 | 544 | if ((16 * ec->lclean < ec->ltx)) { |
49bb9e6d GKH |
545 | /* |
546 | * Our e/c has improved echo by at least 24 dB (each | |
547 | * factor of 2 is 6dB, so 2*2*2*2=16 is the same as | |
548 | * 6+6+6+6=24dB) | |
549 | */ | |
4460a860 | 550 | if (ec->adaption_mode & ECHO_CAN_USE_CNG) { |
0c474826 | 551 | ec->cng_level = ec->lbgn; |
4460a860 | 552 | |
49bb9e6d GKH |
553 | /* |
554 | * Very elementary comfort noise generation. | |
555 | * Just random numbers rolled off very vaguely | |
556 | * Hoth-like. DR: This noise doesn't sound | |
557 | * quite right to me - I suspect there are some | |
83aa3c7b | 558 | * overflow issues in the filtering as it's too |
49bb9e6d GKH |
559 | * "crackly". |
560 | * TODO: debug this, maybe just play noise at | |
561 | * high level or look at spectrum. | |
4460a860 M |
562 | */ |
563 | ||
564 | ec->cng_rndnum = | |
565 | 1664525U * ec->cng_rndnum + 1013904223U; | |
566 | ec->cng_filter = | |
567 | ((ec->cng_rndnum & 0xFFFF) - 32768 + | |
568 | 5 * ec->cng_filter) >> 3; | |
569 | ec->clean_nlp = | |
570 | (ec->cng_filter * ec->cng_level * 8) >> 14; | |
571 | ||
572 | } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) { | |
573 | /* This sounds much better than CNG */ | |
0c474826 LN |
574 | if (ec->clean_nlp > ec->lbgn) |
575 | ec->clean_nlp = ec->lbgn; | |
576 | if (ec->clean_nlp < -ec->lbgn) | |
577 | ec->clean_nlp = -ec->lbgn; | |
4460a860 | 578 | } else { |
49bb9e6d GKH |
579 | /* |
580 | * just mute the residual, doesn't sound very | |
581 | * good, used mainly in G168 tests | |
582 | */ | |
4460a860 M |
583 | ec->clean_nlp = 0; |
584 | } | |
585 | } else { | |
49bb9e6d GKH |
586 | /* |
587 | * Background noise estimator. I tried a few | |
588 | * algorithms here without much luck. This very simple | |
589 | * one seems to work best, we just average the level | |
590 | * using a slow (1 sec time const) filter if the | |
591 | * current level is less than a (experimentally | |
592 | * derived) constant. This means we dont include high | |
593 | * level signals like near end speech. When combined | |
594 | * with CNG or especially CLIP seems to work OK. | |
4460a860 | 595 | */ |
0c474826 LN |
596 | if (ec->lclean < 40) { |
597 | ec->lbgn_acc += abs(ec->clean) - ec->lbgn; | |
598 | ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12; | |
4460a860 M |
599 | } |
600 | } | |
601 | } | |
602 | ||
603 | /* Roll around the taps buffer */ | |
604 | if (ec->curr_pos <= 0) | |
605 | ec->curr_pos = ec->taps; | |
606 | ec->curr_pos--; | |
607 | ||
608 | if (ec->adaption_mode & ECHO_CAN_DISABLE) | |
609 | ec->clean_nlp = rx; | |
610 | ||
611 | /* Output scaled back up again to match input scaling */ | |
612 | ||
613 | return (int16_t) ec->clean_nlp << 1; | |
10602db8 | 614 | } |
9d8f2d5d | 615 | EXPORT_SYMBOL_GPL(oslec_update); |
10602db8 | 616 | |
935e99fb | 617 | /* This function is separated from the echo canceller is it is usually called |
10602db8 DR |
618 | as part of the tx process. See rx HP (DC blocking) filter above, it's |
619 | the same design. | |
620 | ||
621 | Some soft phones send speech signals with a lot of low frequency | |
622 | energy, e.g. down to 20Hz. This can make the hybrid non-linear | |
623 | which causes the echo canceller to fall over. This filter can help | |
624 | by removing any low frequency before it gets to the tx port of the | |
625 | hybrid. | |
626 | ||
627 | It can also help by removing and DC in the tx signal. DC is bad | |
628 | for LMS algorithms. | |
629 | ||
49bb9e6d GKH |
630 | This is one of the classic DC removal filters, adjusted to provide |
631 | sufficient bass rolloff to meet the above requirement to protect hybrids | |
632 | from things that upset them. The difference between successive samples | |
633 | produces a lousy HPF, and then a suitably placed pole flattens things out. | |
634 | The final result is a nicely rolled off bass end. The filtering is | |
635 | implemented with extended fractional precision, which noise shapes things, | |
636 | giving very clean DC removal. | |
10602db8 DR |
637 | */ |
638 | ||
30c5007e | 639 | int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) |
4460a860 | 640 | { |
3ec50be5 JJ |
641 | int tmp; |
642 | int tmp1; | |
10602db8 | 643 | |
4460a860 M |
644 | if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) { |
645 | tmp = tx << 15; | |
196e76e8 | 646 | |
49bb9e6d GKH |
647 | /* |
648 | * Make sure the gain of the HPF is 1.0. The first can still | |
649 | * saturate a little under impulse conditions, and it might | |
650 | * roll to 32768 and need clipping on sustained peak level | |
651 | * signals. However, the scale of such clipping is small, and | |
652 | * the error due to any saturation should not markedly affect | |
653 | * the downstream processing. | |
654 | */ | |
4460a860 | 655 | tmp -= (tmp >> 4); |
196e76e8 | 656 | |
4460a860 M |
657 | ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2; |
658 | tmp1 = ec->tx_1 >> 15; | |
659 | if (tmp1 > 32767) | |
660 | tmp1 = 32767; | |
661 | if (tmp1 < -32767) | |
662 | tmp1 = -32767; | |
663 | tx = tmp1; | |
664 | ec->tx_2 = tmp; | |
665 | } | |
666 | ||
667 | return tx; | |
10602db8 | 668 | } |
9d8f2d5d | 669 | EXPORT_SYMBOL_GPL(oslec_hpf_tx); |
68b8d9f6 TC |
670 | |
671 | MODULE_LICENSE("GPL"); | |
672 | MODULE_AUTHOR("David Rowe"); | |
673 | MODULE_DESCRIPTION("Open Source Line Echo Canceller"); | |
674 | MODULE_VERSION("0.3.0"); |