Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "rom_enc.h"
45 : #include "rom_com.h"
46 : #include "prot.h"
47 : #include "wmc_auto.h"
48 :
49 : /*-----------------------------------------------------------------*
50 : * Local constants
51 : *-----------------------------------------------------------------*/
52 :
53 : #define MDCT_SW_SIG_LINE_THR 2.85f /* Significant spectral line threshold above Etot (dB) */
54 : #define MDCT_SW_SIG_PEAK_THR 36.0f /* Significant peak threshold below Etot (dB) */
55 : #define MDCT_SW_HI_SPARSE_THR 0.25f /* Max. ratio of significant spectral lines for the spectrum to be considered sparse */
56 : #define MDCT_SW_HI_ENER_LO_THR 7.5f /* Hi band low energy threshold (dB) */
57 : #define MDCT_SW_1_VOICING_THR 0.9f /* Voicing threshold */
58 : #define MDCT_SW_1_HI_ENER_LO_THR 12.5f /* Hi band low energy threshold (dB) */
59 : #define MDCT_SW_1_SIG_HI_LEVEL_THR 28.0f /* High signal level threshold above noise floor (dB) */
60 : #define MDCT_SW_1_SIG_LO_LEVEL_THR 22.5f /* Low signal level threshold above noise floor (dB) */
61 : #define MDCT_SW_1_COR_THR 80.0f /* Threshold on cor_map_sum to indicate strongly tonal signal */
62 : #define MDCT_SW_1_SPARSENESS_THR 0.65f /* Threshold on spectrum sparseness */
63 :
64 : #define MDCT_SW_2_VOICING_THR 0.6f /* Voicing threshold */
65 : #define MDCT_SW_2_HI_ENER_LO_THR 9.5f /* Hi band low energy threshold (dB) */
66 : #define MDCT_SW_2_SIG_HI_LEVEL_THR 19.0f /* High signal level threshold above noise floor (dB) */
67 : #define MDCT_SW_2_SIG_LO_LEVEL_THR 23.5f /* Low signal level threshold above noise floor (dB) */
68 : #define MDCT_SW_2_COR_THR 62.5f /* Threshold on cor_map_sum to indicate strongly tonal signal */
69 : #define MDCT_SW_2_SPARSENESS_THR 0.4f /* Threshold on spectrum sparseness */
70 :
71 : #define MDCT_SW_HYST_FAC 0.8f /* Hysteresis tolerance factor */
72 :
73 :
74 : /*--------------------------------------------------------------------------*
75 : * get_sparseness()
76 : *
77 : *
78 : *--------------------------------------------------------------------------*/
79 :
80 309 : static float get_sparseness(
81 : const float Bin_E[],
82 : int16_t n,
83 : float thr )
84 : {
85 : int16_t num_max, i;
86 :
87 309 : thr = (float) ( thr * log( 10 ) ); /* Convert to 10*log() domain from 10*log10() domain */
88 :
89 309 : thr = max( thr, 3.0f ); /* Set an absolute minimum for close to silent signals */
90 :
91 309 : num_max = 0;
92 39243 : for ( i = 1; i < n - 1; ++i )
93 : {
94 38934 : if ( Bin_E[i] > max( max( Bin_E[i - 1], Bin_E[i + 1] ), thr ) )
95 : {
96 4880 : ++num_max;
97 : }
98 : }
99 :
100 309 : return 1.0f - num_max / (float) ( ( n - 2 ) / 2 );
101 : }
102 :
103 : /*--------------------------------------------------------------------------*
104 : * MDCT_selector()
105 : *
106 : *
107 : *--------------------------------------------------------------------------*/
108 :
109 309 : void MDCT_selector(
110 : Encoder_State *st, /* i/o: Encoder State */
111 : const float sp_floor, /* i : Noise floor estimate */
112 : const float Etot, /* i : Total energy */
113 : const float cor_map_sum, /* i : sum of correlation map */
114 : const float enerBuffer[] /* i : energy buffer */
115 : )
116 : {
117 309 : TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc;
118 :
119 309 : if ( st->mdct_sw_enable == MODE1 || st->mdct_sw_enable == MODE2 )
120 : {
121 : float hi_ener, frame_voicing, sparseness;
122 : int16_t peak_count;
123 : int16_t prefer_tcx, prefer_hq_core, switching_point, hi_sparse, sparse;
124 : int16_t lob_cldfb, hib_cldfb, lob_fft, hib_fft;
125 : int16_t i;
126 : float sig_lo_level_thr, sig_hi_level_thr, cor_thr, voicing_thr, sparseness_thr, hi_ener_lo_thr;
127 : int16_t last_core;
128 :
129 309 : if ( st->bwidth == NB )
130 : {
131 0 : lob_cldfb = 3200 / 400;
132 0 : hib_cldfb = 4000 / 400;
133 0 : lob_fft = ( L_FFT / 2 ) / 2; /* 3.2 KHz */
134 0 : hib_fft = ( 40 * ( L_FFT / 2 ) ) / 64; /* 4.0 KHz */
135 : }
136 309 : else if ( st->bwidth == WB )
137 : {
138 0 : lob_cldfb = 4800 / 400;
139 0 : hib_cldfb = 8000 / 400;
140 0 : lob_fft = 3 * L_FFT / 2 / 4; /* 4.8 KHz */
141 0 : hib_fft = L_FFT / 2; /* 6.4 KHz (should be 8 KHz) */
142 : }
143 : else
144 : {
145 309 : lob_cldfb = 6400 / 400;
146 309 : hib_cldfb = 16000 / 400;
147 309 : if ( st->bwidth == FB )
148 : {
149 0 : hib_cldfb = 24000 / 400;
150 : }
151 309 : lob_fft = L_FFT / 2; /* 6.4 KHz */
152 309 : hib_fft = L_FFT / 2; /* 6.4 KHz (should be 8 KHz) */
153 : }
154 :
155 : /* st->last_core is reset to TCX_20_CORE in init_acelp() => fix it here */
156 309 : last_core = st->last_core;
157 309 : if ( st->last_codec_mode == MODE1 && last_core == TCX_20_CORE )
158 : {
159 0 : last_core = HQ_CORE;
160 : }
161 :
162 : /* Voicing */
163 309 : frame_voicing = ( st->voicing[0] + st->voicing[1] ) * 0.5f;
164 :
165 : /* Spectral sparseness */
166 309 : sparseness = get_sparseness( st->Bin_E, lob_fft, Etot - MDCT_SW_SIG_PEAK_THR );
167 :
168 : /* Hi band energy */
169 309 : hi_ener = (float) log10( mean( &enerBuffer[lob_cldfb], hib_cldfb - lob_cldfb ) + 0.0001f );
170 :
171 : /* Hi band sparseness */
172 309 : if ( st->bwidth >= SWB )
173 : {
174 : /* For SWB, assume hi band sparseness based on 4.8 KHz-6.4 KHz band */
175 309 : lob_fft = 3 * L_FFT / 2 / 4; /* 4.8 KHz */
176 : }
177 :
178 309 : peak_count = 0;
179 10197 : for ( i = lob_fft; i < hib_fft; ++i )
180 : {
181 9888 : if ( st->Bin_E[i] >= Etot + MDCT_SW_SIG_LINE_THR * LOG_10 )
182 : {
183 1564 : ++peak_count;
184 : }
185 : }
186 309 : hi_sparse = peak_count <= anint( ( hib_fft - lob_fft ) * MDCT_SW_HI_SPARSE_THR );
187 309 : sparse = peak_count <= anint( ( hib_fft - lob_fft ) * MDCT_SW_HI_SPARSE_THR / MDCT_SW_HYST_FAC );
188 :
189 : /* Hysteresis */
190 309 : if ( hTcxEnc->prev_hi_sparse > 0 && sparse > 0 && min( min( st->voicing[0], st->voicing[1] ), st->voicing[2] ) >= MDCT_SW_1_VOICING_THR )
191 : {
192 98 : hi_sparse = 1;
193 : }
194 :
195 : /* Allowed switching point? */
196 278 : switching_point = ( last_core != HQ_CORE && last_core != TCX_20_CORE ) || /* previous core was non-MDCT */
197 288 : ( hTcxEnc->prev_hi_ener <= MDCT_SW_HI_ENER_LO_THR || hi_ener <= MDCT_SW_HI_ENER_LO_THR ) || /* hi band is close to silent */
198 652 : ( last_core == HQ_CORE && ( st->mdct_sw_enable == MODE1 || ( hi_sparse > 0 && hTcxEnc->prev_hi_sparse >= 0 && hTcxEnc->prev_hi_sparse <= 1 ) ) ) || /* HQ_CORE and hi band became sparse */
199 34 : ( last_core == TCX_20_CORE && ( hi_sparse == 0 && hTcxEnc->prev_hi_sparse > 0 ) ); /* TCX and hi band became dense */
200 :
201 309 : if ( st->mdct_sw_enable == MODE1 )
202 : {
203 309 : sig_lo_level_thr = MDCT_SW_1_SIG_LO_LEVEL_THR;
204 309 : sig_hi_level_thr = MDCT_SW_1_SIG_HI_LEVEL_THR;
205 309 : cor_thr = MDCT_SW_1_COR_THR;
206 309 : voicing_thr = MDCT_SW_1_VOICING_THR;
207 309 : sparseness_thr = MDCT_SW_1_SPARSENESS_THR;
208 309 : hi_ener_lo_thr = MDCT_SW_1_HI_ENER_LO_THR;
209 : }
210 : else
211 : {
212 : /* st->mdct_sw_enable == MODE2 */
213 0 : sig_lo_level_thr = MDCT_SW_2_SIG_LO_LEVEL_THR;
214 0 : sig_hi_level_thr = MDCT_SW_2_SIG_HI_LEVEL_THR;
215 0 : cor_thr = MDCT_SW_2_COR_THR;
216 0 : voicing_thr = MDCT_SW_2_VOICING_THR;
217 0 : sparseness_thr = MDCT_SW_2_SPARSENESS_THR;
218 0 : hi_ener_lo_thr = MDCT_SW_2_HI_ENER_LO_THR;
219 : }
220 :
221 258 : prefer_tcx = ( Etot - sp_floor >= sig_hi_level_thr ) && /* noise floor is low */
222 567 : ( cor_map_sum >= cor_thr || frame_voicing >= voicing_thr || sparseness >= sparseness_thr ) && /* strong tonal components */
223 0 : ( hi_ener <= hi_ener_lo_thr || hi_sparse > 0 ); /* high freqs have low energy or are sparse */
224 :
225 913 : prefer_hq_core = ( Etot - sp_floor < sig_lo_level_thr ) || /* noise floor is very high */
226 599 : ( cor_map_sum < cor_thr * MDCT_SW_HYST_FAC && frame_voicing < voicing_thr * MDCT_SW_HYST_FAC && sparseness < sparseness_thr * MDCT_SW_HYST_FAC ) || /* too weak tonal components */
227 290 : ( st->mdct_sw_enable == MODE1 && !prefer_tcx && st->hTranDet->transientDetector.bIsAttackPresent );
228 :
229 : /* Prefer HQ_CORE on transients */
230 309 : if ( st->mdct_sw_enable == MODE2 && st->hTranDet->transientDetector.bIsAttackPresent )
231 : {
232 0 : prefer_tcx = 0;
233 0 : prefer_hq_core = 1;
234 : }
235 :
236 309 : if ( switching_point && ( prefer_tcx || prefer_hq_core ) )
237 : {
238 243 : if ( prefer_tcx )
239 : {
240 229 : st->core = TCX_20_CORE;
241 : }
242 : else /* prefer_hq_core */
243 : {
244 14 : st->core = HQ_CORE;
245 : }
246 : }
247 66 : else if ( last_core == HQ_CORE || last_core == TCX_20_CORE )
248 : {
249 62 : st->core = last_core;
250 : }
251 :
252 : /* Prevent the usage of HQ_CORE on noisy-speech or inactive */
253 309 : if ( st->mdct_sw_enable == MODE2 && st->core == HQ_CORE && ( st->flag_noisy_speech_snr == 1 || st->vad_flag == 0 ) )
254 : {
255 0 : st->core = TCX_20_CORE;
256 : }
257 :
258 :
259 : /* Update memories */
260 309 : if ( hi_sparse <= 0 )
261 : {
262 66 : hTcxEnc->prev_hi_sparse = hi_sparse;
263 : }
264 : else
265 : {
266 243 : hTcxEnc->prev_hi_sparse += hi_sparse;
267 243 : if ( hTcxEnc->prev_hi_sparse >= 2 )
268 : {
269 196 : hTcxEnc->prev_hi_sparse = 2;
270 : }
271 : }
272 309 : hTcxEnc->prev_hi_ener = hi_ener;
273 : }
274 :
275 309 : return;
276 : }
277 :
278 : /*--------------------------------------------------------------------------*
279 : * MDCT_selector_reset()
280 : *
281 : * reset MDCT selector memories
282 : *--------------------------------------------------------------------------*/
283 :
284 10682 : void MDCT_selector_reset(
285 : TCX_ENC_HANDLE hTcxEnc /* i/o: TCX Encoder Handle */
286 : )
287 : {
288 10682 : hTcxEnc->prev_hi_ener = 0;
289 10682 : hTcxEnc->prev_hi_sparse = -1;
290 :
291 10682 : return;
292 : }
|