Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <assert.h>
38 : #include <stdint.h>
39 : #include "options.h"
40 : #include "prot.h"
41 : #include "rom_enc.h"
42 : #include "wmc_auto.h"
43 :
44 :
45 : /*-------------------------------------------------------------------*
46 : * vad_init()
47 : *
48 : *
49 : *-------------------------------------------------------------------*/
50 :
51 3 : int16_t vad_init(
52 : VAD_CLDFB_HANDLE hVAD_CLDFB /* i/o: CLDFB VAD state */
53 : )
54 : {
55 3 : float sSFM[SFM_NUM] = { 0.88f, 0.92f, 0.92f };
56 : int16_t i;
57 :
58 3 : if ( hVAD_CLDFB == NULL )
59 : {
60 0 : return -1;
61 : }
62 :
63 3 : hVAD_CLDFB->frameloop = 0;
64 3 : hVAD_CLDFB->lt_snr_org = 1.0f;
65 3 : hVAD_CLDFB->lf_snr_smooth = 5.0f;
66 3 : hVAD_CLDFB->l_silence_snr = 0.5f;
67 3 : hVAD_CLDFB->l_speech_snr = 5.0f;
68 3 : hVAD_CLDFB->l_silence_snr_count = 1;
69 3 : hVAD_CLDFB->l_speech_snr_count = 1;
70 3 : hVAD_CLDFB->fg_energy = 16 * ( 3.0518e-5f );
71 3 : hVAD_CLDFB->bg_energy = 16 * ( 4.6566e-10f );
72 3 : hVAD_CLDFB->fg_energy_count = 16;
73 3 : hVAD_CLDFB->bg_energy_count = 16;
74 3 : hVAD_CLDFB->tonality_rate3 = 0.46f;
75 3 : hVAD_CLDFB->music_background_rate = 0.46f;
76 3 : hVAD_CLDFB->lt_noise_sp_center_diff_sum = 0.4f;
77 3 : hVAD_CLDFB->lt_noise_sp_center_diff_counter = 4;
78 3 : hVAD_CLDFB->lt_noise_sp_center0 = 1.8f;
79 3 : hVAD_CLDFB->lt_noise_sp_center3 = 2.0f;
80 3 : hVAD_CLDFB->lt_bg_highf_eng = 2.0f;
81 3 : hVAD_CLDFB->t_bg_energy = 0.01f;
82 3 : hVAD_CLDFB->t_bg_energy_sum = 0.01f;
83 3 : hVAD_CLDFB->tbg_energy_count = 1;
84 3 : hVAD_CLDFB->bg_update_count = 0;
85 3 : hVAD_CLDFB->frame_energy_smooth = 1.0f;
86 3 : hVAD_CLDFB->fg_energy_est_start = 0;
87 3 : hVAD_CLDFB->speech_flag = 0;
88 3 : hVAD_CLDFB->continuous_noise_num = 0;
89 3 : hVAD_CLDFB->continuous_speech_num = 0;
90 3 : hVAD_CLDFB->continuous_speech_num2 = 0;
91 3 : hVAD_CLDFB->update_num_with_snr = 0; /* the number of the background update with SNR*/
92 3 : hVAD_CLDFB->update_count = 0;
93 3 : hVAD_CLDFB->warm_hang_num = 0;
94 :
95 99 : for ( i = 0; i < PRE_SNR_NUM; i++ )
96 : {
97 96 : hVAD_CLDFB->pre_snr[i] = 0.0f;
98 : }
99 :
100 171 : for ( i = 0; i < POWER_NUM; i++ )
101 : {
102 168 : hVAD_CLDFB->frames_power[i] = 0;
103 : }
104 :
105 183 : for ( i = 0; i < SPEC_AMP_NUM; i++ )
106 : {
107 180 : hVAD_CLDFB->smooth_spec_amp[i] = 0;
108 : }
109 :
110 12 : for ( i = 0; i < SFM_NUM; i++ )
111 : {
112 9 : hVAD_CLDFB->sfm[i] = sSFM[i];
113 : }
114 :
115 15 : for ( i = 0; i < SP_CENTER_NUM; i++ )
116 : {
117 12 : hVAD_CLDFB->sp_center[i] = 1.2f;
118 : }
119 :
120 15 : for ( i = 0; i < STABLE_NUM; i++ )
121 : {
122 12 : hVAD_CLDFB->ltd_stable_rate[i] = 0.07f;
123 : }
124 :
125 39 : for ( i = 0; i < BG_ENG_NUM; i++ )
126 : {
127 36 : hVAD_CLDFB->sb_bg_energy[i] = 0.01f;
128 36 : hVAD_CLDFB->frame_sb_energy[i] = 0.001f;
129 : }
130 :
131 12 : for ( i = 0; i < TONA_NUM; i++ )
132 : {
133 9 : hVAD_CLDFB->f_tonality_rate[i] = 0.48f;
134 : }
135 :
136 171 : for ( i = 0; i < PRE_SPEC_DIF_NUM; i++ )
137 : {
138 168 : hVAD_CLDFB->pre_spec_low_dif[i] = 1.0f;
139 : }
140 :
141 3 : return 0;
142 : }
143 :
144 :
145 : /*-------------------------------------------------------------------*
146 : * UpdateState()
147 : *
148 : *
149 : *-------------------------------------------------------------------*/
150 :
151 3100 : static void UpdateState(
152 : VAD_CLDFB_HANDLE hVAD_CLDFB, /* i/o: CLDFB VAD state */
153 : const float frame_energy, /* i : current frame energy */
154 : const float high_eng, /* i : current frame high frequency energy */
155 : const int16_t update_flag, /* i : current frame update flag */
156 : const int16_t music_backgound_f, /* i : background music flag */
157 : const int16_t vad_flag, /* i : VAD flag */
158 : const float snr )
159 : {
160 3100 : hVAD_CLDFB->frame_energy_smooth = hVAD_CLDFB->frame_energy_smooth * 0.95f + frame_energy * 0.05f;
161 :
162 3100 : if ( vad_flag == 0 )
163 : {
164 88 : hVAD_CLDFB->lt_bg_highf_eng = hVAD_CLDFB->lt_bg_highf_eng * 0.95f + high_eng * 0.05f;
165 : }
166 :
167 3100 : if ( hVAD_CLDFB->frameloop < 1000 )
168 : {
169 3000 : hVAD_CLDFB->frameloop++;
170 : }
171 :
172 3100 : background_update( hVAD_CLDFB, frame_energy, update_flag, music_backgound_f, snr );
173 :
174 3100 : if ( vad_flag == 0 )
175 : {
176 88 : hVAD_CLDFB->continuous_speech_num2 = 0;
177 :
178 88 : if ( hVAD_CLDFB->continuous_noise_num > 10 )
179 : {
180 6 : hVAD_CLDFB->continuous_speech_num = 0;
181 : }
182 82 : else if ( hVAD_CLDFB->continuous_speech_num > 9 )
183 : {
184 8 : hVAD_CLDFB->continuous_speech_num = 9;
185 : }
186 :
187 88 : hVAD_CLDFB->continuous_noise_num++;
188 :
189 88 : if ( hVAD_CLDFB->continuous_noise_num > 2048 )
190 : {
191 0 : hVAD_CLDFB->continuous_noise_num = 2048;
192 : }
193 : }
194 : else
195 : {
196 3012 : hVAD_CLDFB->continuous_noise_num = 0;
197 3012 : hVAD_CLDFB->continuous_speech_num2++;
198 3012 : hVAD_CLDFB->continuous_speech_num++;
199 :
200 3012 : if ( hVAD_CLDFB->continuous_speech_num > 2048 )
201 : {
202 0 : hVAD_CLDFB->continuous_speech_num = 2048;
203 : }
204 :
205 3012 : if ( hVAD_CLDFB->continuous_speech_num2 > 2048 )
206 : {
207 0 : hVAD_CLDFB->continuous_speech_num2 = 2048;
208 : }
209 : }
210 :
211 3100 : return;
212 : }
213 :
214 :
215 : /*-------------------------------------------------------------------*
216 : * vad_proc()
217 : *
218 : *
219 : *-------------------------------------------------------------------*/
220 :
221 3100 : int16_t vad_proc(
222 : float realValues[16][60], /* CLDFB real values */
223 : float imagValues[16][60], /* CLDFB imag values */
224 : float *sb_power, /* Energy of CLDFB data */
225 : const int16_t numBands, /* number of input bands */
226 : VAD_CLDFB_HANDLE hVAD_CLDFB, /* i/o: CLDFB VAD state */
227 : int16_t *cldfb_addition,
228 : const int16_t vada_flag /* i : VAD flag */
229 : )
230 : {
231 : float frame_energy, frame_energy2;
232 : float spec_amp[8 * 10]; /* 120 */
233 : float snr, tsnr;
234 : int16_t update_flag;
235 : int16_t vad_flag;
236 3100 : int16_t music_backgound_f = 0;
237 3100 : float HB_Power = 0;
238 : float snr_flux;
239 : float lt_snr;
240 : float lt_snr_org;
241 : float lf_snr;
242 : int16_t bw_index;
243 :
244 3100 : if ( numBands < 20 )
245 : {
246 0 : bw_index = 1;
247 : }
248 3100 : else if ( numBands < 40 )
249 : {
250 0 : bw_index = 2;
251 : }
252 : else
253 : {
254 3100 : bw_index = 3;
255 : }
256 :
257 3100 : hVAD_CLDFB->bw_index = bw_index;
258 :
259 3100 : assert( numBands >= 10 );
260 :
261 : /* new optimized structure */
262 3100 : est_energy( sb_power, hVAD_CLDFB->frame_sb_energy, &frame_energy, &frame_energy2, &HB_Power, bw_index );
263 :
264 3100 : subband_FFT( realValues, imagValues, spec_amp );
265 :
266 3100 : spec_center( sb_power, hVAD_CLDFB->sp_center, bw_index );
267 :
268 3100 : ltd_stable( hVAD_CLDFB->frames_power, hVAD_CLDFB->ltd_stable_rate, frame_energy, hVAD_CLDFB->frameloop );
269 :
270 3100 : spec_flatness( spec_amp, hVAD_CLDFB->smooth_spec_amp, hVAD_CLDFB->sfm );
271 :
272 3100 : frame_spec_dif_cor_rate( spec_amp, hVAD_CLDFB->pre_spec_low_dif, hVAD_CLDFB->f_tonality_rate );
273 :
274 3100 : bg_music_decision( hVAD_CLDFB, &music_backgound_f, frame_energy );
275 :
276 3100 : SNR_calc( hVAD_CLDFB->frame_sb_energy, hVAD_CLDFB->sb_bg_energy, hVAD_CLDFB->t_bg_energy, &snr, &tsnr, frame_energy2, bw_index );
277 :
278 3100 : calc_snr_flux( tsnr, hVAD_CLDFB->pre_snr, &snr_flux );
279 :
280 3100 : calc_lt_snr( <_snr_org, <_snr, hVAD_CLDFB->fg_energy, hVAD_CLDFB->fg_energy_count, hVAD_CLDFB->bg_energy, hVAD_CLDFB->bg_energy_count, bw_index, hVAD_CLDFB->lt_noise_sp_center0 );
281 :
282 3100 : calc_lf_snr( &hVAD_CLDFB->lf_snr_smooth, &lf_snr, hVAD_CLDFB->l_speech_snr, hVAD_CLDFB->l_speech_snr_count, hVAD_CLDFB->l_silence_snr, hVAD_CLDFB->l_silence_snr_count, hVAD_CLDFB->fg_energy_count, hVAD_CLDFB->bg_energy_count, bw_index );
283 :
284 3100 : vad_flag = comvad_decision( hVAD_CLDFB, snr, tsnr, snr_flux, lt_snr, lt_snr_org, lf_snr, frame_energy2, music_backgound_f, cldfb_addition, vada_flag );
285 :
286 3100 : update_flag = update_decision( hVAD_CLDFB, snr, tsnr, frame_energy, HB_Power, hVAD_CLDFB->vad_flag_for_bk_update, music_backgound_f );
287 :
288 3100 : UpdateState( hVAD_CLDFB, frame_energy2, HB_Power, update_flag, music_backgound_f, hVAD_CLDFB->vad_flag_for_bk_update, snr );
289 :
290 3100 : return vad_flag;
291 : }
|