Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <assert.h>
38 : #include <stdint.h>
39 : #include "options.h"
40 : #include "prot.h"
41 : #include "rom_enc.h"
42 : #include "wmc_auto.h"
43 :
44 :
45 : /*-------------------------------------------------------------------*
46 : * vad_init()
47 : *
48 : *
49 : *-------------------------------------------------------------------*/
50 :
51 301 : int16_t vad_init(
52 : VAD_CLDFB_HANDLE hVAD_CLDFB /* i/o: CLDFB VAD state */
53 : )
54 : {
55 301 : float sSFM[SFM_NUM] = { 0.88f, 0.92f, 0.92f };
56 : int16_t i;
57 :
58 301 : if ( hVAD_CLDFB == NULL )
59 : {
60 0 : return -1;
61 : }
62 :
63 301 : hVAD_CLDFB->frameloop = 0;
64 301 : hVAD_CLDFB->lt_snr_org = 1.0f;
65 301 : hVAD_CLDFB->lf_snr_smooth = 5.0f;
66 301 : hVAD_CLDFB->l_silence_snr = 0.5f;
67 301 : hVAD_CLDFB->l_speech_snr = 5.0f;
68 301 : hVAD_CLDFB->l_silence_snr_count = 1;
69 301 : hVAD_CLDFB->l_speech_snr_count = 1;
70 301 : hVAD_CLDFB->fg_energy = 16 * ( 3.0518e-5f );
71 301 : hVAD_CLDFB->bg_energy = 16 * ( 4.6566e-10f );
72 301 : hVAD_CLDFB->fg_energy_count = 16;
73 301 : hVAD_CLDFB->bg_energy_count = 16;
74 301 : hVAD_CLDFB->tonality_rate3 = 0.46f;
75 301 : hVAD_CLDFB->music_background_rate = 0.46f;
76 301 : hVAD_CLDFB->lt_noise_sp_center_diff_sum = 0.4f;
77 301 : hVAD_CLDFB->lt_noise_sp_center_diff_counter = 4;
78 301 : hVAD_CLDFB->lt_noise_sp_center0 = 1.8f;
79 301 : hVAD_CLDFB->lt_noise_sp_center3 = 2.0f;
80 301 : hVAD_CLDFB->lt_bg_highf_eng = 2.0f;
81 301 : hVAD_CLDFB->t_bg_energy = 0.01f;
82 301 : hVAD_CLDFB->t_bg_energy_sum = 0.01f;
83 301 : hVAD_CLDFB->tbg_energy_count = 1;
84 301 : hVAD_CLDFB->bg_update_count = 0;
85 301 : hVAD_CLDFB->frame_energy_smooth = 1.0f;
86 301 : hVAD_CLDFB->fg_energy_est_start = 0;
87 301 : hVAD_CLDFB->speech_flag = 0;
88 301 : hVAD_CLDFB->continuous_noise_num = 0;
89 301 : hVAD_CLDFB->continuous_speech_num = 0;
90 301 : hVAD_CLDFB->continuous_speech_num2 = 0;
91 301 : hVAD_CLDFB->update_num_with_snr = 0; /* the number of the background update with SNR*/
92 301 : hVAD_CLDFB->update_count = 0;
93 301 : hVAD_CLDFB->warm_hang_num = 0;
94 :
95 9933 : for ( i = 0; i < PRE_SNR_NUM; i++ )
96 : {
97 9632 : hVAD_CLDFB->pre_snr[i] = 0.0f;
98 : }
99 :
100 17157 : for ( i = 0; i < POWER_NUM; i++ )
101 : {
102 16856 : hVAD_CLDFB->frames_power[i] = 0;
103 : }
104 :
105 18361 : for ( i = 0; i < SPEC_AMP_NUM; i++ )
106 : {
107 18060 : hVAD_CLDFB->smooth_spec_amp[i] = 0;
108 : }
109 :
110 1204 : for ( i = 0; i < SFM_NUM; i++ )
111 : {
112 903 : hVAD_CLDFB->sfm[i] = sSFM[i];
113 : }
114 :
115 1505 : for ( i = 0; i < SP_CENTER_NUM; i++ )
116 : {
117 1204 : hVAD_CLDFB->sp_center[i] = 1.2f;
118 : }
119 :
120 1505 : for ( i = 0; i < STABLE_NUM; i++ )
121 : {
122 1204 : hVAD_CLDFB->ltd_stable_rate[i] = 0.07f;
123 : }
124 :
125 3913 : for ( i = 0; i < BG_ENG_NUM; i++ )
126 : {
127 3612 : hVAD_CLDFB->sb_bg_energy[i] = 0.01f;
128 3612 : hVAD_CLDFB->frame_sb_energy[i] = 0.001f;
129 : }
130 :
131 1204 : for ( i = 0; i < TONA_NUM; i++ )
132 : {
133 903 : hVAD_CLDFB->f_tonality_rate[i] = 0.48f;
134 : }
135 :
136 17157 : for ( i = 0; i < PRE_SPEC_DIF_NUM; i++ )
137 : {
138 16856 : hVAD_CLDFB->pre_spec_low_dif[i] = 1.0f;
139 : }
140 :
141 301 : return 0;
142 : }
143 :
144 :
145 : /*-------------------------------------------------------------------*
146 : * UpdateState()
147 : *
148 : *
149 : *-------------------------------------------------------------------*/
150 :
151 96918 : static void UpdateState(
152 : VAD_CLDFB_HANDLE hVAD_CLDFB, /* i/o: CLDFB VAD state */
153 : const float frame_energy, /* i : current frame energy */
154 : const float high_eng, /* i : current frame high frequency energy */
155 : const int16_t update_flag, /* i : current frame update flag */
156 : const int16_t music_backgound_f, /* i : background music flag */
157 : const int16_t vad_flag, /* i : VAD flag */
158 : const float snr )
159 : {
160 96918 : hVAD_CLDFB->frame_energy_smooth = hVAD_CLDFB->frame_energy_smooth * 0.95f + frame_energy * 0.05f;
161 :
162 96918 : if ( vad_flag == 0 )
163 : {
164 8370 : hVAD_CLDFB->lt_bg_highf_eng = hVAD_CLDFB->lt_bg_highf_eng * 0.95f + high_eng * 0.05f;
165 : }
166 :
167 96918 : if ( hVAD_CLDFB->frameloop < 1000 )
168 : {
169 67370 : hVAD_CLDFB->frameloop++;
170 : }
171 :
172 96918 : background_update( hVAD_CLDFB, frame_energy, update_flag, music_backgound_f, snr );
173 :
174 96918 : if ( vad_flag == 0 )
175 : {
176 8370 : hVAD_CLDFB->continuous_speech_num2 = 0;
177 :
178 8370 : if ( hVAD_CLDFB->continuous_noise_num > 10 )
179 : {
180 5900 : hVAD_CLDFB->continuous_speech_num = 0;
181 : }
182 2470 : else if ( hVAD_CLDFB->continuous_speech_num > 9 )
183 : {
184 224 : hVAD_CLDFB->continuous_speech_num = 9;
185 : }
186 :
187 8370 : hVAD_CLDFB->continuous_noise_num++;
188 :
189 8370 : if ( hVAD_CLDFB->continuous_noise_num > 2048 )
190 : {
191 0 : hVAD_CLDFB->continuous_noise_num = 2048;
192 : }
193 : }
194 : else
195 : {
196 88548 : hVAD_CLDFB->continuous_noise_num = 0;
197 88548 : hVAD_CLDFB->continuous_speech_num2++;
198 88548 : hVAD_CLDFB->continuous_speech_num++;
199 :
200 88548 : if ( hVAD_CLDFB->continuous_speech_num > 2048 )
201 : {
202 0 : hVAD_CLDFB->continuous_speech_num = 2048;
203 : }
204 :
205 88548 : if ( hVAD_CLDFB->continuous_speech_num2 > 2048 )
206 : {
207 0 : hVAD_CLDFB->continuous_speech_num2 = 2048;
208 : }
209 : }
210 :
211 96918 : return;
212 : }
213 :
214 :
215 : /*-------------------------------------------------------------------*
216 : * vad_proc()
217 : *
218 : *
219 : *-------------------------------------------------------------------*/
220 :
221 96918 : int16_t vad_proc(
222 : float realValues[16][60], /* CLDFB real values */
223 : float imagValues[16][60], /* CLDFB imag values */
224 : float *sb_power, /* Energy of CLDFB data */
225 : const int16_t numBands, /* number of input bands */
226 : VAD_CLDFB_HANDLE hVAD_CLDFB, /* i/o: CLDFB VAD state */
227 : int16_t *cldfb_addition,
228 : const int16_t vada_flag /* i : VAD flag */
229 : )
230 : {
231 : float frame_energy, frame_energy2;
232 : float spec_amp[8 * 10]; /* 120 */
233 : float snr, tsnr;
234 : int16_t update_flag;
235 : int16_t vad_flag;
236 96918 : int16_t music_backgound_f = 0;
237 96918 : float HB_Power = 0;
238 : float snr_flux;
239 : float lt_snr;
240 : float lt_snr_org;
241 : float lf_snr;
242 : int16_t bw_index;
243 :
244 96918 : if ( numBands < 20 )
245 : {
246 7710 : bw_index = 1;
247 : }
248 89208 : else if ( numBands < 40 )
249 : {
250 21790 : bw_index = 2;
251 : }
252 : else
253 : {
254 67418 : bw_index = 3;
255 : }
256 :
257 96918 : hVAD_CLDFB->bw_index = bw_index;
258 :
259 96918 : assert( numBands >= 10 );
260 :
261 : /* new optimized structure */
262 96918 : est_energy( sb_power, hVAD_CLDFB->frame_sb_energy, &frame_energy, &frame_energy2, &HB_Power, bw_index );
263 :
264 96918 : subband_FFT( realValues, imagValues, spec_amp );
265 :
266 96918 : spec_center( sb_power, hVAD_CLDFB->sp_center, bw_index );
267 :
268 96918 : ltd_stable( hVAD_CLDFB->frames_power, hVAD_CLDFB->ltd_stable_rate, frame_energy, hVAD_CLDFB->frameloop );
269 :
270 96918 : spec_flatness( spec_amp, hVAD_CLDFB->smooth_spec_amp, hVAD_CLDFB->sfm );
271 :
272 96918 : frame_spec_dif_cor_rate( spec_amp, hVAD_CLDFB->pre_spec_low_dif, hVAD_CLDFB->f_tonality_rate );
273 :
274 96918 : bg_music_decision( hVAD_CLDFB, &music_backgound_f, frame_energy );
275 :
276 96918 : SNR_calc( hVAD_CLDFB->frame_sb_energy, hVAD_CLDFB->sb_bg_energy, hVAD_CLDFB->t_bg_energy, &snr, &tsnr, frame_energy2, bw_index );
277 :
278 96918 : calc_snr_flux( tsnr, hVAD_CLDFB->pre_snr, &snr_flux );
279 :
280 96918 : calc_lt_snr( <_snr_org, <_snr, hVAD_CLDFB->fg_energy, hVAD_CLDFB->fg_energy_count, hVAD_CLDFB->bg_energy, hVAD_CLDFB->bg_energy_count, bw_index, hVAD_CLDFB->lt_noise_sp_center0 );
281 :
282 96918 : calc_lf_snr( &hVAD_CLDFB->lf_snr_smooth, &lf_snr, hVAD_CLDFB->l_speech_snr, hVAD_CLDFB->l_speech_snr_count, hVAD_CLDFB->l_silence_snr, hVAD_CLDFB->l_silence_snr_count, hVAD_CLDFB->fg_energy_count, hVAD_CLDFB->bg_energy_count, bw_index );
283 :
284 96918 : vad_flag = comvad_decision( hVAD_CLDFB, snr, tsnr, snr_flux, lt_snr, lt_snr_org, lf_snr, frame_energy2, music_backgound_f, cldfb_addition, vada_flag );
285 :
286 96918 : update_flag = update_decision( hVAD_CLDFB, snr, tsnr, frame_energy, HB_Power, hVAD_CLDFB->vad_flag_for_bk_update, music_backgound_f );
287 :
288 96918 : UpdateState( hVAD_CLDFB, frame_energy2, HB_Power, update_flag, music_backgound_f, hVAD_CLDFB->vad_flag_for_bk_update, snr );
289 :
290 96918 : return vad_flag;
291 : }
|