Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "prot.h"
45 : #include "rom_com.h"
46 : #include "wmc_auto.h"
47 :
48 : /*--------------------------------------------------------------------------
49 : * hq_core_enc()
50 : *
51 : * HQ core encoder
52 : *--------------------------------------------------------------------------*/
53 :
54 84723 : void hq_core_enc(
55 : Encoder_State *st, /* i/o: encoder state structure */
56 : const float *audio, /* i : input audio signal */
57 : const int16_t input_frame_orig, /* i : frame length */
58 : const int16_t hq_core_type, /* i : HQ core type */
59 : const int16_t Voicing_flag, /* i : Voicing flag for FER method selection */
60 : const int16_t vad_hover_flag /* i : VAD hangover flag */
61 : )
62 : {
63 : int16_t i, is_transient, num_bits, extra_unused;
64 : float wtda_audio[2 * L_FRAME48k];
65 : float t_audio[L_FRAME48k_EXT];
66 : int16_t inner_frame, input_frame, L_frame;
67 : float ener_match;
68 : int16_t left_overlap, right_overlap;
69 : int16_t L_spec, overlap, nz, tcx_offset;
70 : float Aq_old[M + 1];
71 : float output[L_FRAME16k];
72 :
73 84723 : BSTR_ENC_HANDLE hBstr = st->hBstr;
74 :
75 84723 : push_wmops( "hq_core_enc" );
76 :
77 84723 : set_f( t_audio, 0, L_FRAME48k );
78 84723 : st->Nb_ACELP_frames = 0;
79 :
80 : /* set input_frame length */
81 84723 : input_frame = input_frame_orig;
82 :
83 : /* Sanity check, it should never happen at the encoder side (no BFI) */
84 84723 : if ( st->hTcxCfg->tcx_curr_overlap_mode == FULL_OVERLAP )
85 : {
86 0 : st->hTcxCfg->tcx_last_overlap_mode = ALDO_WINDOW;
87 : }
88 : else
89 : {
90 84723 : st->hTcxCfg->tcx_last_overlap_mode = st->hTcxCfg->tcx_curr_overlap_mode;
91 : }
92 84723 : st->hTcxCfg->tcx_curr_overlap_mode = ALDO_WINDOW;
93 :
94 : /*--------------------------------------------------------------------------
95 : * Preprocessing in the first HQ frame after ACELP frame
96 : * Find the number of bits for PVQ coding
97 : * Write signaling information
98 : *--------------------------------------------------------------------------*/
99 :
100 84723 : num_bits = (int16_t) ( st->total_brate / FRAMES_PER_SEC );
101 84723 : extra_unused = 0;
102 :
103 : /*--------------------------------------------------------------------------
104 : * Detect signal transition
105 : *--------------------------------------------------------------------------*/
106 :
107 84723 : is_transient = detect_transient( st, audio, input_frame );
108 :
109 84723 : if ( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE ) )
110 : {
111 : /*--------------------------------------------------------------------------
112 : * IVAS switching frame
113 : *--------------------------------------------------------------------------*/
114 :
115 1662 : L_spec = input_frame;
116 1662 : left_overlap = -1;
117 1662 : right_overlap = -1;
118 :
119 1662 : WindowSignal( st->hTcxCfg, st->hTcxCfg->tcx_offsetFB, TRANSITION_OVERLAP, FULL_OVERLAP, &left_overlap, &right_overlap, st->hTcxEnc->speech_TCX, &L_spec, wtda_audio, 1, 1 );
120 :
121 1662 : TCX_MDCT( wtda_audio, t_audio, left_overlap, L_spec - ( left_overlap + right_overlap ) / 2, right_overlap, st->element_mode );
122 :
123 1662 : inner_frame = inner_frame_tbl[st->bwidth];
124 1662 : L_spec = l_spec_ext_tbl[st->bwidth];
125 1662 : is_transient = 0;
126 : }
127 : else
128 : {
129 : /*--------------------------------------------------------------------------
130 : * Windowing and time-domain aliasing
131 : * DCT transform
132 : *--------------------------------------------------------------------------*/
133 :
134 83061 : wtda( audio, wtda_audio, NULL, st->hTcxCfg->tcx_last_overlap_mode, st->hTcxCfg->tcx_curr_overlap_mode, input_frame );
135 :
136 83061 : if ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE )
137 : {
138 : /* Preprocessing in the first HQ frame after ACELP frame */
139 329 : core_switching_hq_prepare_enc( st, &num_bits, input_frame, wtda_audio, audio );
140 :
141 : /* During ACELP->HQ core switching, limit the HQ core bitrate to 48kbps */
142 329 : if ( num_bits > HQ_48k / FRAMES_PER_SEC )
143 : {
144 146 : extra_unused = num_bits - (int16_t) ( HQ_48k / FRAMES_PER_SEC );
145 146 : num_bits = (int16_t) ( HQ_48k / FRAMES_PER_SEC );
146 : }
147 : }
148 :
149 83061 : direct_transform( wtda_audio, t_audio, is_transient, input_frame, st->element_mode );
150 :
151 : /* scale coefficients to their nominal level (8kHz) */
152 83061 : if ( input_frame != NORM_MDCT_FACTOR )
153 : {
154 82702 : ener_match = (float) sqrt( (float) NORM_MDCT_FACTOR / (float) input_frame );
155 :
156 66752782 : for ( i = 0; i < input_frame; i++ )
157 : {
158 66670080 : t_audio[i] *= ener_match;
159 : }
160 : }
161 :
162 : /* limit encoded band-width according to the command-line OR BWD limitation */
163 83061 : inner_frame = inner_frame_tbl[st->bwidth];
164 83061 : L_spec = l_spec_tbl[st->bwidth];
165 :
166 83061 : if ( input_frame > inner_frame )
167 : {
168 15545 : if ( is_transient )
169 : {
170 3100 : for ( i = 1; i < NUM_TIME_SWITCHING_BLOCKS; i++ )
171 : {
172 2325 : mvr2r( t_audio + i * input_frame / NUM_TIME_SWITCHING_BLOCKS, t_audio + i * inner_frame / NUM_TIME_SWITCHING_BLOCKS, inner_frame / NUM_TIME_SWITCHING_BLOCKS );
173 : }
174 : }
175 :
176 15545 : set_f( t_audio + inner_frame, 0.0f, input_frame - inner_frame );
177 : }
178 : }
179 :
180 : /* subtract signaling bits */
181 84723 : num_bits -= hBstr->nb_bits_tot;
182 :
183 : /*--------------------------------------------------------------------------
184 : * High-band gain control in case of BWS
185 : *--------------------------------------------------------------------------*/
186 :
187 84723 : if ( st->bwidth_sw_cnt > 0 )
188 : {
189 403 : if ( is_transient )
190 : {
191 85 : for ( i = 0; i < NUM_TIME_SWITCHING_BLOCKS; i++ )
192 : {
193 68 : v_multc( t_audio + i * inner_frame / NUM_TIME_SWITCHING_BLOCKS + L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS, (float) ( st->bwidth_sw_cnt ) / (float) BWS_TRAN_PERIOD, t_audio + i * inner_frame / NUM_TIME_SWITCHING_BLOCKS + L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS, inner_frame / NUM_TIME_SWITCHING_BLOCKS - L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS );
194 : }
195 : }
196 : else
197 : {
198 386 : v_multc( t_audio + L_FRAME16k, (float) ( st->bwidth_sw_cnt ) / (float) BWS_TRAN_PERIOD, t_audio + L_FRAME16k, L_spec - L_FRAME16k );
199 : }
200 : }
201 :
202 : /*--------------------------------------------------------------------------
203 : * Classify whether to put extra bits for FER mitigation
204 : *--------------------------------------------------------------------------*/
205 :
206 84723 : if ( ( st->last_core == TCX_20_CORE || st->last_core == TCX_10_CORE || st->last_core == HQ_CORE ) && st->core_brate > MINIMUM_RATE_TO_ENCODE_VOICING_FLAG )
207 : {
208 13984 : if ( Voicing_flag > 0 )
209 : {
210 0 : push_indice( hBstr, IND_HQ_VOICING_FLAG, 1, 1 );
211 0 : num_bits -= 1;
212 : }
213 : else
214 : {
215 13984 : push_indice( hBstr, IND_HQ_VOICING_FLAG, 0, 1 );
216 13984 : num_bits -= 1;
217 : }
218 : }
219 :
220 : /*--------------------------------------------------------------------------
221 : * Transform-domain encoding
222 : *--------------------------------------------------------------------------*/
223 :
224 84723 : if ( hq_core_type == LOW_RATE_HQ_CORE )
225 : {
226 : /* HQ low rate encoder */
227 1976 : hq_lr_enc( st, t_audio, inner_frame, &num_bits, is_transient );
228 : }
229 : else
230 : {
231 : /* HQ high rate encoder */
232 82747 : hq_hr_enc( st, t_audio, L_spec, &num_bits, is_transient, vad_hover_flag );
233 : }
234 :
235 : /* write all unused bits to the bitstream */
236 84723 : num_bits += extra_unused;
237 :
238 86912 : while ( num_bits >= 16 )
239 : {
240 2189 : push_indice( hBstr, IND_UNUSED, 0, 16 );
241 2189 : num_bits -= 16;
242 : }
243 :
244 84723 : if ( num_bits != 0 )
245 : {
246 41468 : push_indice( hBstr, IND_UNUSED, 0, num_bits );
247 : }
248 :
249 84723 : if ( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE ) )
250 : {
251 1662 : overlap = st->hTcxCfg->tcx_mdct_window_length;
252 1662 : nz = NS2SA( st->sr_core, N_ZERO_MDCT_NS );
253 1662 : L_frame = (int16_t) ( st->L_frame + st->hTcxCfg->tcx_offset - st->hTcxCfg->lfacNext );
254 1662 : tcx_offset = st->hTcxCfg->lfacNext;
255 1662 : set_f( Aq_old, 0, M + 1 ); /* Dummy filter */
256 1662 : Aq_old[0] = 1;
257 :
258 : /* Code taken from InternalTCXDecoder() */
259 1662 : TCX_MDCT_Inverse( t_audio, wtda_audio, overlap, L_frame - overlap, overlap, st->element_mode );
260 :
261 : /* Window current frame */
262 1662 : tcx_windowing_synthesis_current_frame( wtda_audio, st->hTcxCfg->tcx_aldo_window_2, st->hTcxCfg->tcx_mdct_window_half, st->hTcxCfg->tcx_mdct_window_minimum, overlap, /*st->hTcxCfg->tcx_mdct_window_length*/ st->hTcxCfg->tcx_mdct_window_half_length, st->hTcxCfg->tcx_mdct_window_min_length, st->last_core == ACELP_CORE, st->hTcxCfg->tcx_last_overlap_mode, /*left mode*/ st->hTcxEnc->acelp_zir, st->hTcxEnc->Txnq, NULL, Aq_old, st->hTcxCfg->tcx_mdct_window_trans, st->L_frame >> 1, tcx_offset < 0 ? -tcx_offset : 0, st->last_core, 0, 0 );
263 :
264 : /*Compute windowed synthesis in case of switching to ALDO windows in next frame*/
265 1662 : mvr2r( wtda_audio + L_frame - nz, st->hTcxEnc->old_out, nz + overlap );
266 1662 : set_zero( st->hTcxEnc->old_out + nz + overlap, nz );
267 :
268 1662 : tcx_windowing_synthesis_past_frame( st->hTcxEnc->old_out + nz, st->hTcxCfg->tcx_aldo_window_1_trunc, st->hTcxCfg->tcx_mdct_window_half, st->hTcxCfg->tcx_mdct_window_minimum, overlap, st->hTcxCfg->tcx_mdct_window_half_length, st->hTcxCfg->tcx_mdct_window_min_length, FULL_OVERLAP );
269 :
270 151242 : for ( i = 0; i < nz; i++ )
271 : {
272 149580 : st->hTcxEnc->old_out[nz + overlap + i] = wtda_audio[L_frame - 1 - i] * st->hTcxCfg->tcx_aldo_window_1_trunc[-1 - i];
273 : }
274 1662 : mvr2r( wtda_audio + ( overlap >> 1 ) - tcx_offset, output, st->L_frame );
275 : }
276 : else
277 : {
278 83061 : ener_match = (float) sqrt( (float) L_FRAME16k / (float) NORM_MDCT_FACTOR );
279 83061 : v_multc( t_audio, ener_match, t_audio, inner_frame );
280 :
281 83061 : inverse_transform( t_audio, wtda_audio, is_transient, L_FRAME16k, inner_frame, st->element_mode );
282 :
283 83061 : window_ola( wtda_audio, output, st->hTcxEnc->old_out, L_FRAME16k, st->hTcxCfg->tcx_last_overlap_mode, st->hTcxCfg->tcx_curr_overlap_mode, 0, 0, NULL );
284 : }
285 :
286 84723 : if ( st->element_mode > EVS_MONO )
287 : {
288 : /* Store LB synthesis in case of switch to ACELP */
289 74800 : mvr2r( output, st->hLPDmem->old_exc, L_FRAME16k );
290 : }
291 :
292 84723 : pop_wmops();
293 :
294 84723 : return;
295 : }
296 :
297 : /*-------------------------------------------------------------------*
298 : * hq_core_enc_init()
299 : *
300 : * Initialize HQ core state structure
301 : *-------------------------------------------------------------------*/
302 :
303 57653 : void HQ_core_enc_init(
304 : HQ_ENC_HANDLE hHQ_core /* i/o: HQ core data handle */
305 : )
306 : {
307 57653 : hHQ_core->mode_count = 0;
308 57653 : hHQ_core->mode_count1 = 0;
309 :
310 57653 : hHQ_core->hq_generic_speech_class = 0;
311 :
312 57653 : hHQ_core->prev_Npeaks = 0;
313 57653 : set_s( hHQ_core->prev_peaks, 0, HVQ_MAX_PEAKS );
314 57653 : hHQ_core->hvq_hangover = 0;
315 57653 : hHQ_core->prev_hqswb_clas = HQ_NORMAL;
316 57653 : set_s( hHQ_core->prev_SWB_peak_pos, 0, SPT_SHORTEN_SBNUM );
317 :
318 57653 : set_s( hHQ_core->prev_frm_index, -1, NB_SWB_SUBBANDS_HAR_SEARCH_SB );
319 57653 : hHQ_core->prev_frm_hfe2 = 0;
320 57653 : hHQ_core->prev_stab_hfe2 = 0;
321 57653 : hHQ_core->prev_ni_ratio = 0.5f;
322 57653 : set_f( hHQ_core->prev_En_sb, 0.0f, NB_SWB_SUBBANDS );
323 57653 : set_s( hHQ_core->last_bitalloc_max_band, 0, 2 );
324 57653 : set_f( hHQ_core->last_ni_gain, 0, BANDS_MAX );
325 57653 : set_f( hHQ_core->last_env, 0, BANDS_MAX );
326 57653 : hHQ_core->last_max_pos_pulse = 0;
327 :
328 57653 : hHQ_core->crest_lp = HQ_CREST_THRESHOLD;
329 57653 : hHQ_core->crest_mod_lp = HQ_CREST_MOD_THRESHOLD;
330 :
331 57653 : return;
332 : }
|