Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "prot.h"
45 : #include "rom_com.h"
46 : #include "wmc_auto.h"
47 :
48 : /*--------------------------------------------------------------------------
49 : * hq_core_enc()
50 : *
51 : * HQ core encoder
52 : *--------------------------------------------------------------------------*/
53 :
54 8457 : void hq_core_enc(
55 : Encoder_State *st, /* i/o: encoder state structure */
56 : const float *audio, /* i : input audio signal */
57 : const int16_t input_frame_orig, /* i : frame length */
58 : const int16_t hq_core_type, /* i : HQ core type */
59 : const int16_t Voicing_flag, /* i : Voicing flag for FER method selection */
60 : const int16_t vad_hover_flag /* i : VAD hangover flag */
61 : )
62 : {
63 : int16_t i, is_transient, num_bits, extra_unused;
64 : float wtda_audio[2 * L_FRAME48k];
65 : float t_audio[L_FRAME48k_EXT];
66 : int16_t inner_frame, input_frame, L_frame;
67 : float ener_match;
68 : int16_t left_overlap, right_overlap;
69 : int16_t L_spec, overlap, nz, tcx_offset;
70 : float Aq_old[M + 1];
71 : float output[L_FRAME16k];
72 :
73 8457 : BSTR_ENC_HANDLE hBstr = st->hBstr;
74 :
75 8457 : push_wmops( "hq_core_enc" );
76 :
77 8457 : set_f( t_audio, 0, L_FRAME48k );
78 8457 : st->Nb_ACELP_frames = 0;
79 :
80 : /* set input_frame length */
81 8457 : input_frame = input_frame_orig;
82 :
83 : /* Sanity check, it should never happen at the encoder side (no BFI) */
84 8457 : if ( st->hTcxCfg->tcx_curr_overlap_mode == FULL_OVERLAP )
85 : {
86 0 : st->hTcxCfg->tcx_last_overlap_mode = ALDO_WINDOW;
87 : }
88 : else
89 : {
90 8457 : st->hTcxCfg->tcx_last_overlap_mode = st->hTcxCfg->tcx_curr_overlap_mode;
91 : }
92 8457 : st->hTcxCfg->tcx_curr_overlap_mode = ALDO_WINDOW;
93 :
94 : /*--------------------------------------------------------------------------
95 : * Preprocessing in the first HQ frame after ACELP frame
96 : * Find the number of bits for PVQ coding
97 : * Write signaling information
98 : *--------------------------------------------------------------------------*/
99 :
100 8457 : num_bits = (int16_t) ( st->total_brate / FRAMES_PER_SEC );
101 8457 : extra_unused = 0;
102 :
103 : /*--------------------------------------------------------------------------
104 : * Detect signal transition
105 : *--------------------------------------------------------------------------*/
106 :
107 8457 : is_transient = detect_transient( st, audio, input_frame );
108 :
109 8457 : if ( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE ) )
110 : {
111 : /*--------------------------------------------------------------------------
112 : * IVAS switching frame
113 : *--------------------------------------------------------------------------*/
114 :
115 198 : L_spec = input_frame;
116 198 : left_overlap = -1;
117 198 : right_overlap = -1;
118 :
119 198 : WindowSignal( st->hTcxCfg, st->hTcxCfg->tcx_offsetFB, TRANSITION_OVERLAP, FULL_OVERLAP, &left_overlap, &right_overlap, st->hTcxEnc->speech_TCX, &L_spec, wtda_audio, 1, 1 );
120 :
121 198 : TCX_MDCT( wtda_audio, t_audio, left_overlap, L_spec - ( left_overlap + right_overlap ) / 2, right_overlap, st->element_mode );
122 :
123 198 : inner_frame = inner_frame_tbl[st->bwidth];
124 198 : L_spec = l_spec_ext_tbl[st->bwidth];
125 198 : is_transient = 0;
126 : }
127 : else
128 : {
129 : /*--------------------------------------------------------------------------
130 : * Windowing and time-domain aliasing
131 : * DCT transform
132 : *--------------------------------------------------------------------------*/
133 :
134 8259 : wtda( audio, wtda_audio, NULL, st->hTcxCfg->tcx_last_overlap_mode, st->hTcxCfg->tcx_curr_overlap_mode, input_frame );
135 :
136 8259 : if ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE )
137 : {
138 : /* Preprocessing in the first HQ frame after ACELP frame */
139 25 : core_switching_hq_prepare_enc( st, &num_bits, input_frame, wtda_audio, audio );
140 :
141 : /* During ACELP->HQ core switching, limit the HQ core bitrate to 48kbps */
142 25 : if ( num_bits > HQ_48k / FRAMES_PER_SEC )
143 : {
144 12 : extra_unused = num_bits - (int16_t) ( HQ_48k / FRAMES_PER_SEC );
145 12 : num_bits = (int16_t) ( HQ_48k / FRAMES_PER_SEC );
146 : }
147 : }
148 :
149 8259 : direct_transform( wtda_audio, t_audio, is_transient, input_frame, st->element_mode );
150 :
151 : /* scale coefficients to their nominal level (8kHz) */
152 8259 : if ( input_frame != NORM_MDCT_FACTOR )
153 : {
154 8259 : ener_match = (float) sqrt( (float) NORM_MDCT_FACTOR / (float) input_frame );
155 :
156 7494019 : for ( i = 0; i < input_frame; i++ )
157 : {
158 7485760 : t_audio[i] *= ener_match;
159 : }
160 : }
161 :
162 : /* limit encoded band-width according to the command-line OR BWD limitation */
163 8259 : inner_frame = inner_frame_tbl[st->bwidth];
164 8259 : L_spec = l_spec_tbl[st->bwidth];
165 :
166 8259 : if ( input_frame > inner_frame )
167 : {
168 1180 : if ( is_transient )
169 : {
170 472 : for ( i = 1; i < NUM_TIME_SWITCHING_BLOCKS; i++ )
171 : {
172 354 : mvr2r( t_audio + i * input_frame / NUM_TIME_SWITCHING_BLOCKS, t_audio + i * inner_frame / NUM_TIME_SWITCHING_BLOCKS, inner_frame / NUM_TIME_SWITCHING_BLOCKS );
173 : }
174 : }
175 :
176 1180 : set_f( t_audio + inner_frame, 0.0f, input_frame - inner_frame );
177 : }
178 : }
179 :
180 : /* subtract signaling bits */
181 8457 : num_bits -= hBstr->nb_bits_tot;
182 :
183 : /*--------------------------------------------------------------------------
184 : * High-band gain control in case of BWS
185 : *--------------------------------------------------------------------------*/
186 :
187 8457 : if ( st->bwidth_sw_cnt > 0 )
188 : {
189 31 : if ( is_transient )
190 : {
191 10 : for ( i = 0; i < NUM_TIME_SWITCHING_BLOCKS; i++ )
192 : {
193 8 : v_multc( t_audio + i * inner_frame / NUM_TIME_SWITCHING_BLOCKS + L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS, (float) ( st->bwidth_sw_cnt ) / (float) BWS_TRAN_PERIOD, t_audio + i * inner_frame / NUM_TIME_SWITCHING_BLOCKS + L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS, inner_frame / NUM_TIME_SWITCHING_BLOCKS - L_FRAME16k / NUM_TIME_SWITCHING_BLOCKS );
194 : }
195 : }
196 : else
197 : {
198 29 : v_multc( t_audio + L_FRAME16k, (float) ( st->bwidth_sw_cnt ) / (float) BWS_TRAN_PERIOD, t_audio + L_FRAME16k, L_spec - L_FRAME16k );
199 : }
200 : }
201 :
202 : /*--------------------------------------------------------------------------
203 : * Classify whether to put extra bits for FER mitigation
204 : *--------------------------------------------------------------------------*/
205 :
206 8457 : if ( ( st->last_core == TCX_20_CORE || st->last_core == TCX_10_CORE || st->last_core == HQ_CORE ) && st->core_brate > MINIMUM_RATE_TO_ENCODE_VOICING_FLAG )
207 : {
208 507 : if ( Voicing_flag > 0 )
209 : {
210 0 : push_indice( hBstr, IND_HQ_VOICING_FLAG, 1, 1 );
211 0 : num_bits -= 1;
212 : }
213 : else
214 : {
215 507 : push_indice( hBstr, IND_HQ_VOICING_FLAG, 0, 1 );
216 507 : num_bits -= 1;
217 : }
218 : }
219 :
220 : /*--------------------------------------------------------------------------
221 : * Transform-domain encoding
222 : *--------------------------------------------------------------------------*/
223 :
224 8457 : if ( hq_core_type == LOW_RATE_HQ_CORE )
225 : {
226 : /* HQ low rate encoder */
227 34 : hq_lr_enc( st, t_audio, inner_frame, &num_bits, is_transient );
228 : }
229 : else
230 : {
231 : /* HQ high rate encoder */
232 8423 : hq_hr_enc( st, t_audio, L_spec, &num_bits, is_transient, vad_hover_flag );
233 : }
234 :
235 : /* write all unused bits to the bitstream */
236 8457 : num_bits += extra_unused;
237 :
238 8639 : while ( num_bits >= 16 )
239 : {
240 182 : push_indice( hBstr, IND_UNUSED, 0, 16 );
241 182 : num_bits -= 16;
242 : }
243 :
244 8457 : if ( num_bits != 0 )
245 : {
246 4941 : push_indice( hBstr, IND_UNUSED, 0, num_bits );
247 : }
248 :
249 8457 : if ( st->element_mode > EVS_MONO && ( st->last_core == ACELP_CORE || st->last_core == AMR_WB_CORE ) )
250 : {
251 198 : overlap = st->hTcxCfg->tcx_mdct_window_length;
252 198 : nz = NS2SA( st->sr_core, N_ZERO_MDCT_NS );
253 198 : L_frame = (int16_t) ( st->L_frame + st->hTcxCfg->tcx_offset - st->hTcxCfg->lfacNext );
254 198 : tcx_offset = st->hTcxCfg->lfacNext;
255 198 : set_f( Aq_old, 0, M + 1 ); /* Dummy filter */
256 198 : Aq_old[0] = 1;
257 :
258 : /* Code taken from InternalTCXDecoder() */
259 198 : TCX_MDCT_Inverse( t_audio, wtda_audio, overlap, L_frame - overlap, overlap, st->element_mode );
260 :
261 : /* Window current frame */
262 198 : tcx_windowing_synthesis_current_frame( wtda_audio, st->hTcxCfg->tcx_aldo_window_2, st->hTcxCfg->tcx_mdct_window_half, st->hTcxCfg->tcx_mdct_window_minimum, overlap, /*st->hTcxCfg->tcx_mdct_window_length*/ st->hTcxCfg->tcx_mdct_window_half_length, st->hTcxCfg->tcx_mdct_window_min_length, st->last_core == ACELP_CORE, st->hTcxCfg->tcx_last_overlap_mode, /*left mode*/ st->hTcxEnc->acelp_zir, st->hTcxEnc->Txnq, NULL, Aq_old, st->hTcxCfg->tcx_mdct_window_trans, st->L_frame >> 1, tcx_offset < 0 ? -tcx_offset : 0, st->last_core, 0, 0 );
263 :
264 : /*Compute windowed synthesis in case of switching to ALDO windows in next frame*/
265 198 : mvr2r( wtda_audio + L_frame - nz, st->hTcxEnc->old_out, nz + overlap );
266 198 : set_zero( st->hTcxEnc->old_out + nz + overlap, nz );
267 :
268 198 : tcx_windowing_synthesis_past_frame( st->hTcxEnc->old_out + nz, st->hTcxCfg->tcx_aldo_window_1_trunc, st->hTcxCfg->tcx_mdct_window_half, st->hTcxCfg->tcx_mdct_window_minimum, overlap, st->hTcxCfg->tcx_mdct_window_half_length, st->hTcxCfg->tcx_mdct_window_min_length, FULL_OVERLAP );
269 :
270 18018 : for ( i = 0; i < nz; i++ )
271 : {
272 17820 : st->hTcxEnc->old_out[nz + overlap + i] = wtda_audio[L_frame - 1 - i] * st->hTcxCfg->tcx_aldo_window_1_trunc[-1 - i];
273 : }
274 198 : mvr2r( wtda_audio + ( overlap >> 1 ) - tcx_offset, output, st->L_frame );
275 : }
276 : else
277 : {
278 8259 : ener_match = (float) sqrt( (float) L_FRAME16k / (float) NORM_MDCT_FACTOR );
279 8259 : v_multc( t_audio, ener_match, t_audio, inner_frame );
280 :
281 8259 : inverse_transform( t_audio, wtda_audio, is_transient, L_FRAME16k, inner_frame, st->element_mode );
282 :
283 8259 : window_ola( wtda_audio, output, st->hTcxEnc->old_out, L_FRAME16k, st->hTcxCfg->tcx_last_overlap_mode, st->hTcxCfg->tcx_curr_overlap_mode, 0, 0, NULL );
284 : }
285 :
286 8457 : if ( st->element_mode > EVS_MONO )
287 : {
288 : /* Store LB synthesis in case of switch to ACELP */
289 8017 : mvr2r( output, st->hLPDmem->old_exc, L_FRAME16k );
290 : }
291 :
292 8457 : pop_wmops();
293 :
294 8457 : return;
295 : }
296 :
297 : /*-------------------------------------------------------------------*
298 : * hq_core_enc_init()
299 : *
300 : * Initialize HQ core state structure
301 : *-------------------------------------------------------------------*/
302 :
303 3109 : void HQ_core_enc_init(
304 : HQ_ENC_HANDLE hHQ_core /* i/o: HQ core data handle */
305 : )
306 : {
307 3109 : hHQ_core->mode_count = 0;
308 3109 : hHQ_core->mode_count1 = 0;
309 :
310 3109 : hHQ_core->hq_generic_speech_class = 0;
311 :
312 3109 : hHQ_core->prev_Npeaks = 0;
313 3109 : set_s( hHQ_core->prev_peaks, 0, HVQ_MAX_PEAKS );
314 3109 : hHQ_core->hvq_hangover = 0;
315 3109 : hHQ_core->prev_hqswb_clas = HQ_NORMAL;
316 3109 : set_s( hHQ_core->prev_SWB_peak_pos, 0, SPT_SHORTEN_SBNUM );
317 :
318 3109 : set_s( hHQ_core->prev_frm_index, -1, NB_SWB_SUBBANDS_HAR_SEARCH_SB );
319 3109 : hHQ_core->prev_frm_hfe2 = 0;
320 3109 : hHQ_core->prev_stab_hfe2 = 0;
321 3109 : hHQ_core->prev_ni_ratio = 0.5f;
322 3109 : set_f( hHQ_core->prev_En_sb, 0.0f, NB_SWB_SUBBANDS );
323 3109 : set_s( hHQ_core->last_bitalloc_max_band, 0, 2 );
324 3109 : set_f( hHQ_core->last_ni_gain, 0, BANDS_MAX );
325 3109 : set_f( hHQ_core->last_env, 0, BANDS_MAX );
326 3109 : hHQ_core->last_max_pos_pulse = 0;
327 :
328 3109 : hHQ_core->crest_lp = HQ_CREST_THRESHOLD;
329 3109 : hHQ_core->crest_mod_lp = HQ_CREST_MOD_THRESHOLD;
330 :
331 3109 : return;
332 : }
|