FFmpeg  2.8.17
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
wmavoice.c
Go to the documentation of this file.
1 /*
2  * Windows Media Audio Voice decoder.
3  * Copyright (c) 2009 Ronald S. Bultje
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * @brief Windows Media Audio Voice compatible decoder
25  * @author Ronald S. Bultje <rsbultje@gmail.com>
26  */
27 
28 #include <math.h>
29 
31 #include "libavutil/float_dsp.h"
32 #include "libavutil/mem.h"
33 #include "avcodec.h"
34 #include "internal.h"
35 #include "get_bits.h"
36 #include "put_bits.h"
37 #include "wmavoice_data.h"
38 #include "celp_filters.h"
39 #include "acelp_vectors.h"
40 #include "acelp_filters.h"
41 #include "lsp.h"
42 #include "dct.h"
43 #include "rdft.h"
44 #include "sinewin.h"
45 
46 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
47 #define MAX_LSPS 16 ///< maximum filter order
48 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
49  ///< of 16 for ASM input buffer alignment
50 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
51 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
52 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
53 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
54  ///< maximum number of samples per superframe
55 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
56  ///< was split over two packets
57 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
58 
59 /**
60  * Frame type VLC coding.
61  */
63 
64 /**
65  * Adaptive codebook types.
66  */
67 enum {
68  ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)
69  ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which
70  ///< we interpolate to get a per-sample pitch.
71  ///< Signal is generated using an asymmetric sinc
72  ///< window function
73  ///< @note see #wmavoice_ipol1_coeffs
74  ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using
75  ///< a Hamming sinc window function
76  ///< @note see #wmavoice_ipol2_coeffs
77 };
78 
79 /**
80  * Fixed codebook types.
81  */
82 enum {
83  FCB_TYPE_SILENCE = 0, ///< comfort noise during silence
84  ///< generated from a hardcoded (fixed) codebook
85  ///< with per-frame (low) gain values
86  FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block
87  ///< gain values
88  FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,
89  ///< used in particular for low-bitrate streams
90  FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in
91  ///< combinations of either single pulses or
92  ///< pulse pairs
93 };
94 
95 /**
96  * Description of frame types.
97  */
98 static const struct frame_type_desc {
99  uint8_t n_blocks; ///< amount of blocks per frame (each block
100  ///< (contains 160/#n_blocks samples)
101  uint8_t log_n_blocks; ///< log2(#n_blocks)
102  uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)
103  uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)
104  uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs
105  ///< (rather than just one single pulse)
106  ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES
107  uint16_t frame_size; ///< the amount of bits that make up the block
108  ///< data (per frame)
109 } frame_descs[17] = {
110  { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
111  { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
112  { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
113  { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
114  { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
115  { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
116  { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
117  { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
118  { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
119  { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
120  { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
121  { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
122  { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
123  { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
124  { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
125  { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
126  { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
127 };
128 
129 /**
130  * WMA Voice decoding context.
131  */
132 typedef struct WMAVoiceContext {
133  /**
134  * @name Global values specified in the stream header / extradata or used all over.
135  * @{
136  */
137  GetBitContext gb; ///< packet bitreader. During decoder init,
138  ///< it contains the extradata from the
139  ///< demuxer. During decoding, it contains
140  ///< packet data.
141  int8_t vbm_tree[25]; ///< converts VLC codes to frame type
142 
143  int spillover_bitsize; ///< number of bits used to specify
144  ///< #spillover_nbits in the packet header
145  ///< = ceil(log2(ctx->block_align << 3))
146  int history_nsamples; ///< number of samples in history for signal
147  ///< prediction (through ACB)
148 
149  /* postfilter specific values */
150  int do_apf; ///< whether to apply the averaged
151  ///< projection filter (APF)
152  int denoise_strength; ///< strength of denoising in Wiener filter
153  ///< [0-11]
154  int denoise_tilt_corr; ///< Whether to apply tilt correction to the
155  ///< Wiener filter coefficients (postfilter)
156  int dc_level; ///< Predicted amount of DC noise, based
157  ///< on which a DC removal filter is used
158 
159  int lsps; ///< number of LSPs per frame [10 or 16]
160  int lsp_q_mode; ///< defines quantizer defaults [0, 1]
161  int lsp_def_mode; ///< defines different sets of LSP defaults
162  ///< [0, 1]
163  int frame_lsp_bitsize; ///< size (in bits) of LSPs, when encoded
164  ///< per-frame (independent coding)
165  int sframe_lsp_bitsize; ///< size (in bits) of LSPs, when encoded
166  ///< per superframe (residual coding)
167 
168  int min_pitch_val; ///< base value for pitch parsing code
169  int max_pitch_val; ///< max value + 1 for pitch parsing
170  int pitch_nbits; ///< number of bits used to specify the
171  ///< pitch value in the frame header
172  int block_pitch_nbits; ///< number of bits used to specify the
173  ///< first block's pitch value
174  int block_pitch_range; ///< range of the block pitch
175  int block_delta_pitch_nbits; ///< number of bits used to specify the
176  ///< delta pitch between this and the last
177  ///< block's pitch value, used in all but
178  ///< first block
179  int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is
180  ///< from -this to +this-1)
181  uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale
182  ///< conversion
183 
184  /**
185  * @}
186  *
187  * @name Packet values specified in the packet header or related to a packet.
188  *
189  * A packet is considered to be a single unit of data provided to this
190  * decoder by the demuxer.
191  * @{
192  */
193  int spillover_nbits; ///< number of bits of the previous packet's
194  ///< last superframe preceding this
195  ///< packet's first full superframe (useful
196  ///< for re-synchronization also)
197  int has_residual_lsps; ///< if set, superframes contain one set of
198  ///< LSPs that cover all frames, encoded as
199  ///< independent and residual LSPs; if not
200  ///< set, each frame contains its own, fully
201  ///< independent, LSPs
202  int skip_bits_next; ///< number of bits to skip at the next call
203  ///< to #wmavoice_decode_packet() (since
204  ///< they're part of the previous superframe)
205 
207  ///< cache for superframe data split over
208  ///< multiple packets
209  int sframe_cache_size; ///< set to >0 if we have data from an
210  ///< (incomplete) superframe from a previous
211  ///< packet that spilled over in the current
212  ///< packet; specifies the amount of bits in
213  ///< #sframe_cache
214  PutBitContext pb; ///< bitstream writer for #sframe_cache
215 
216  /**
217  * @}
218  *
219  * @name Frame and superframe values
220  * Superframe and frame data - these can change from frame to frame,
221  * although some of them do in that case serve as a cache / history for
222  * the next frame or superframe.
223  * @{
224  */
225  double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous
226  ///< superframe
227  int last_pitch_val; ///< pitch value of the previous frame
228  int last_acb_type; ///< frame type [0-2] of the previous frame
229  int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)
230  ///< << 16) / #MAX_FRAMESIZE
231  float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE
232 
233  int aw_idx_is_ext; ///< whether the AW index was encoded in
234  ///< 8 bits (instead of 6)
235  int aw_pulse_range; ///< the range over which #aw_pulse_set1()
236  ///< can apply the pulse, relative to the
237  ///< value in aw_first_pulse_off. The exact
238  ///< position of the first AW-pulse is within
239  ///< [pulse_off, pulse_off + this], and
240  ///< depends on bitstream values; [16 or 24]
241  int aw_n_pulses[2]; ///< number of AW-pulses in each block; note
242  ///< that this number can be negative (in
243  ///< which case it basically means "zero")
244  int aw_first_pulse_off[2]; ///< index of first sample to which to
245  ///< apply AW-pulses, or -0xff if unset
246  int aw_next_pulse_off_cache; ///< the position (relative to start of the
247  ///< second block) at which pulses should
248  ///< start to be positioned, serves as a
249  ///< cache for pitch-adaptive window pulses
250  ///< between blocks
251 
252  int frame_cntr; ///< current frame index [0 - 0xFFFE]; is
253  ///< only used for comfort noise in #pRNG()
254  float gain_pred_err[6]; ///< cache for gain prediction
256  ///< cache of the signal of previous
257  ///< superframes, used as a history for
258  ///< signal generation
259  float synth_history[MAX_LSPS]; ///< see #excitation_history
260  /**
261  * @}
262  *
263  * @name Postfilter values
264  *
265  * Variables used for postfilter implementation, mostly history for
266  * smoothing and so on, and context variables for FFT/iFFT.
267  * @{
268  */
269  RDFTContext rdft, irdft; ///< contexts for FFT-calculation in the
270  ///< postfilter (for denoise filter)
271  DCTContext dct, dst; ///< contexts for phase shift (in Hilbert
272  ///< transform, part of postfilter)
273  float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
274  ///< range
275  float postfilter_agc; ///< gain control memory, used in
276  ///< #adaptive_gain_control()
277  float dcf_mem[2]; ///< DC filter history
279  ///< zero filter output (i.e. excitation)
280  ///< by postfilter
282  int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
283  DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
284  ///< aligned buffer for LPC tilting
286  ///< aligned buffer for denoise coefficients
288  ///< aligned buffer for postfilter speech
289  ///< synthesis
290  /**
291  * @}
292  */
294 
295 /**
296  * Set up the variable bit mode (VBM) tree from container extradata.
297  * @param gb bit I/O context.
298  * The bit context (s->gb) should be loaded with byte 23-46 of the
299  * container extradata (i.e. the ones containing the VBM tree).
300  * @param vbm_tree pointer to array to which the decoded VBM tree will be
301  * written.
302  * @return 0 on success, <0 on error.
303  */
304 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
305 {
306  int cntr[8] = { 0 }, n, res;
307 
308  memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
309  for (n = 0; n < 17; n++) {
310  res = get_bits(gb, 3);
311  if (cntr[res] > 3) // should be >= 3 + (res == 7))
312  return -1;
313  vbm_tree[res * 3 + cntr[res]++] = n;
314  }
315  return 0;
316 }
317 
319 {
320  static const uint8_t bits[] = {
321  2, 2, 2, 4, 4, 4,
322  6, 6, 6, 8, 8, 8,
323  10, 10, 10, 12, 12, 12,
324  14, 14, 14, 14
325  };
326  static const uint16_t codes[] = {
327  0x0000, 0x0001, 0x0002, // 00/01/10
328  0x000c, 0x000d, 0x000e, // 11+00/01/10
329  0x003c, 0x003d, 0x003e, // 1111+00/01/10
330  0x00fc, 0x00fd, 0x00fe, // 111111+00/01/10
331  0x03fc, 0x03fd, 0x03fe, // 11111111+00/01/10
332  0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10
333  0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
334  };
335 
336  INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
337  bits, 1, 1, codes, 2, 2, 132);
338 }
339 
340 /**
341  * Set up decoder with parameters from demuxer (extradata etc.).
342  */
344 {
345  int n, flags, pitch_range, lsp16_flag;
346  WMAVoiceContext *s = ctx->priv_data;
347 
348  /**
349  * Extradata layout:
350  * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
351  * - byte 19-22: flags field (annoyingly in LE; see below for known
352  * values),
353  * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,
354  * rest is 0).
355  */
356  if (ctx->extradata_size != 46) {
357  av_log(ctx, AV_LOG_ERROR,
358  "Invalid extradata size %d (should be 46)\n",
359  ctx->extradata_size);
360  return AVERROR_INVALIDDATA;
361  }
362  flags = AV_RL32(ctx->extradata + 18);
363  s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
364  s->do_apf = flags & 0x1;
365  if (s->do_apf) {
366  ff_rdft_init(&s->rdft, 7, DFT_R2C);
367  ff_rdft_init(&s->irdft, 7, IDFT_C2R);
368  ff_dct_init(&s->dct, 6, DCT_I);
369  ff_dct_init(&s->dst, 6, DST_I);
370 
371  ff_sine_window_init(s->cos, 256);
372  memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
373  for (n = 0; n < 255; n++) {
374  s->sin[n] = -s->sin[510 - n];
375  s->cos[510 - n] = s->cos[n];
376  }
377  }
378  s->denoise_strength = (flags >> 2) & 0xF;
379  if (s->denoise_strength >= 12) {
380  av_log(ctx, AV_LOG_ERROR,
381  "Invalid denoise filter strength %d (max=11)\n",
382  s->denoise_strength);
383  return AVERROR_INVALIDDATA;
384  }
385  s->denoise_tilt_corr = !!(flags & 0x40);
386  s->dc_level = (flags >> 7) & 0xF;
387  s->lsp_q_mode = !!(flags & 0x2000);
388  s->lsp_def_mode = !!(flags & 0x4000);
389  lsp16_flag = flags & 0x1000;
390  if (lsp16_flag) {
391  s->lsps = 16;
392  s->frame_lsp_bitsize = 34;
393  s->sframe_lsp_bitsize = 60;
394  } else {
395  s->lsps = 10;
396  s->frame_lsp_bitsize = 24;
397  s->sframe_lsp_bitsize = 48;
398  }
399  for (n = 0; n < s->lsps; n++)
400  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
401 
402  init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
403  if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
404  av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
405  return AVERROR_INVALIDDATA;
406  }
407 
408  if (ctx->sample_rate >= INT_MAX / (256 * 37))
409  return AVERROR_INVALIDDATA;
410 
411  s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
412  s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
413  pitch_range = s->max_pitch_val - s->min_pitch_val;
414  if (pitch_range <= 0) {
415  av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
416  return AVERROR_INVALIDDATA;
417  }
418  s->pitch_nbits = av_ceil_log2(pitch_range);
419  s->last_pitch_val = 40;
421  s->history_nsamples = s->max_pitch_val + 8;
422 
424  int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
425  max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
426 
427  av_log(ctx, AV_LOG_ERROR,
428  "Unsupported samplerate %d (min=%d, max=%d)\n",
429  ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
430 
431  return AVERROR(ENOSYS);
432  }
433 
434  s->block_conv_table[0] = s->min_pitch_val;
435  s->block_conv_table[1] = (pitch_range * 25) >> 6;
436  s->block_conv_table[2] = (pitch_range * 44) >> 6;
437  s->block_conv_table[3] = s->max_pitch_val - 1;
438  s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
439  if (s->block_delta_pitch_hrange <= 0) {
440  av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
441  return AVERROR_INVALIDDATA;
442  }
443  s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
445  s->block_conv_table[3] + 1 +
446  2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
447  s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
448 
449  ctx->channels = 1;
452 
453  return 0;
454 }
455 
456 /**
457  * @name Postfilter functions
458  * Postfilter functions (gain control, wiener denoise filter, DC filter,
459  * kalman smoothening, plus surrounding code to wrap it)
460  * @{
461  */
462 /**
463  * Adaptive gain control (as used in postfilter).
464  *
465  * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except
466  * that the energy here is calculated using sum(abs(...)), whereas the
467  * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).
468  *
469  * @param out output buffer for filtered samples
470  * @param in input buffer containing the samples as they are after the
471  * postfilter steps so far
472  * @param speech_synth input buffer containing speech synth before postfilter
473  * @param size input buffer size
474  * @param alpha exponential filter factor
475  * @param gain_mem pointer to filter memory (single float)
476  */
477 static void adaptive_gain_control(float *out, const float *in,
478  const float *speech_synth,
479  int size, float alpha, float *gain_mem)
480 {
481  int i;
482  float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
483  float mem = *gain_mem;
484 
485  for (i = 0; i < size; i++) {
486  speech_energy += fabsf(speech_synth[i]);
487  postfilter_energy += fabsf(in[i]);
488  }
489  gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
490 
491  for (i = 0; i < size; i++) {
492  mem = alpha * mem + gain_scale_factor;
493  out[i] = in[i] * mem;
494  }
495 
496  *gain_mem = mem;
497 }
498 
499 /**
500  * Kalman smoothing function.
501  *
502  * This function looks back pitch +/- 3 samples back into history to find
503  * the best fitting curve (that one giving the optimal gain of the two
504  * signals, i.e. the highest dot product between the two), and then
505  * uses that signal history to smoothen the output of the speech synthesis
506  * filter.
507  *
508  * @param s WMA Voice decoding context
509  * @param pitch pitch of the speech signal
510  * @param in input speech signal
511  * @param out output pointer for smoothened signal
512  * @param size input/output buffer size
513  *
514  * @returns -1 if no smoothening took place, e.g. because no optimal
515  * fit could be found, or 0 on success.
516  */
517 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
518  const float *in, float *out, int size)
519 {
520  int n;
521  float optimal_gain = 0, dot;
522  const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
523  *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
524  *best_hist_ptr = NULL;
525 
526  /* find best fitting point in history */
527  do {
528  dot = avpriv_scalarproduct_float_c(in, ptr, size);
529  if (dot > optimal_gain) {
530  optimal_gain = dot;
531  best_hist_ptr = ptr;
532  }
533  } while (--ptr >= end);
534 
535  if (optimal_gain <= 0)
536  return -1;
537  dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
538  if (dot <= 0) // would be 1.0
539  return -1;
540 
541  if (optimal_gain <= dot) {
542  dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
543  } else
544  dot = 0.625;
545 
546  /* actual smoothing */
547  for (n = 0; n < size; n++)
548  out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
549 
550  return 0;
551 }
552 
553 /**
554  * Get the tilt factor of a formant filter from its transfer function
555  * @see #tilt_factor() in amrnbdec.c, which does essentially the same,
556  * but somehow (??) it does a speech synthesis filter in the
557  * middle, which is missing here
558  *
559  * @param lpcs LPC coefficients
560  * @param n_lpcs Size of LPC buffer
561  * @returns the tilt factor
562  */
563 static float tilt_factor(const float *lpcs, int n_lpcs)
564 {
565  float rh0, rh1;
566 
567  rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
568  rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
569 
570  return rh1 / rh0;
571 }
572 
573 /**
574  * Derive denoise filter coefficients (in real domain) from the LPCs.
575  */
576 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
577  int fcb_type, float *coeffs, int remainder)
578 {
579  float last_coeff, min = 15.0, max = -15.0;
580  float irange, angle_mul, gain_mul, range, sq;
581  int n, idx;
582 
583  /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
584  s->rdft.rdft_calc(&s->rdft, lpcs);
585 #define log_range(var, assign) do { \
586  float tmp = log10f(assign); var = tmp; \
587  max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
588  } while (0)
589  log_range(last_coeff, lpcs[1] * lpcs[1]);
590  for (n = 1; n < 64; n++)
591  log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
592  lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
593  log_range(lpcs[0], lpcs[0] * lpcs[0]);
594 #undef log_range
595  range = max - min;
596  lpcs[64] = last_coeff;
597 
598  /* Now, use this spectrum to pick out these frequencies with higher
599  * (relative) power/energy (which we then take to be "not noise"),
600  * and set up a table (still in lpc[]) of (relative) gains per frequency.
601  * These frequencies will be maintained, while others ("noise") will be
602  * decreased in the filter output. */
603  irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
604  gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
605  (5.0 / 14.7));
606  angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
607  for (n = 0; n <= 64; n++) {
608  float pwr;
609 
610  idx = lrint((max - lpcs[n]) * irange - 1);
611  idx = FFMAX(0, idx);
613  lpcs[n] = angle_mul * pwr;
614 
615  /* 70.57 =~ 1/log10(1.0331663) */
616  idx = av_clipf((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);
617 
618  if (idx > 127) { // fall back if index falls outside table range
619  coeffs[n] = wmavoice_energy_table[127] *
620  powf(1.0331663, idx - 127);
621  } else
622  coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
623  }
624 
625  /* calculate the Hilbert transform of the gains, which we do (since this
626  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
627  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
628  * "moment" of the LPCs in this filter. */
629  s->dct.dct_calc(&s->dct, lpcs);
630  s->dst.dct_calc(&s->dst, lpcs);
631 
632  /* Split out the coefficient indexes into phase/magnitude pairs */
633  idx = 255 + av_clip(lpcs[64], -255, 255);
634  coeffs[0] = coeffs[0] * s->cos[idx];
635  idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
636  last_coeff = coeffs[64] * s->cos[idx];
637  for (n = 63;; n--) {
638  idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
639  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
640  coeffs[n * 2] = coeffs[n] * s->cos[idx];
641 
642  if (!--n) break;
643 
644  idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
645  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
646  coeffs[n * 2] = coeffs[n] * s->cos[idx];
647  }
648  coeffs[1] = last_coeff;
649 
650  /* move into real domain */
651  s->irdft.rdft_calc(&s->irdft, coeffs);
652 
653  /* tilt correction and normalize scale */
654  memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
655  if (s->denoise_tilt_corr) {
656  float tilt_mem = 0;
657 
658  coeffs[remainder - 1] = 0;
659  ff_tilt_compensation(&tilt_mem,
660  -1.8 * tilt_factor(coeffs, remainder - 1),
661  coeffs, remainder);
662  }
663  sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs,
664  remainder));
665  for (n = 0; n < remainder; n++)
666  coeffs[n] *= sq;
667 }
668 
669 /**
670  * This function applies a Wiener filter on the (noisy) speech signal as
671  * a means to denoise it.
672  *
673  * - take RDFT of LPCs to get the power spectrum of the noise + speech;
674  * - using this power spectrum, calculate (for each frequency) the Wiener
675  * filter gain, which depends on the frequency power and desired level
676  * of noise subtraction (when set too high, this leads to artifacts)
677  * We can do this symmetrically over the X-axis (so 0-4kHz is the inverse
678  * of 4-8kHz);
679  * - by doing a phase shift, calculate the Hilbert transform of this array
680  * of per-frequency filter-gains to get the filtering coefficients;
681  * - smoothen/normalize/de-tilt these filter coefficients as desired;
682  * - take RDFT of noisy sound, apply the coefficients and take its IRDFT
683  * to get the denoised speech signal;
684  * - the leftover (i.e. output of the IRDFT on denoised speech data beyond
685  * the frame boundary) are saved and applied to subsequent frames by an
686  * overlap-add method (otherwise you get clicking-artifacts).
687  *
688  * @param s WMA Voice decoding context
689  * @param fcb_type Frame (codebook) type
690  * @param synth_pf input: the noisy speech signal, output: denoised speech
691  * data; should be 16-byte aligned (for ASM purposes)
692  * @param size size of the speech data
693  * @param lpcs LPCs used to synthesize this frame's speech data
694  */
695 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
696  float *synth_pf, int size,
697  const float *lpcs)
698 {
699  int remainder, lim, n;
700 
701  if (fcb_type != FCB_TYPE_SILENCE) {
702  float *tilted_lpcs = s->tilted_lpcs_pf,
703  *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
704 
705  tilted_lpcs[0] = 1.0;
706  memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
707  memset(&tilted_lpcs[s->lsps + 1], 0,
708  sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
709  ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
710  tilted_lpcs, s->lsps + 2);
711 
712  /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
713  * size is applied to the next frame. All input beyond this is zero,
714  * and thus all output beyond this will go towards zero, hence we can
715  * limit to min(size-1, 127-size) as a performance consideration. */
716  remainder = FFMIN(127 - size, size - 1);
717  calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
718 
719  /* apply coefficients (in frequency spectrum domain), i.e. complex
720  * number multiplication */
721  memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
722  s->rdft.rdft_calc(&s->rdft, synth_pf);
723  s->rdft.rdft_calc(&s->rdft, coeffs);
724  synth_pf[0] *= coeffs[0];
725  synth_pf[1] *= coeffs[1];
726  for (n = 1; n < 64; n++) {
727  float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
728  synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
729  synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
730  }
731  s->irdft.rdft_calc(&s->irdft, synth_pf);
732  }
733 
734  /* merge filter output with the history of previous runs */
735  if (s->denoise_filter_cache_size) {
736  lim = FFMIN(s->denoise_filter_cache_size, size);
737  for (n = 0; n < lim; n++)
738  synth_pf[n] += s->denoise_filter_cache[n];
739  s->denoise_filter_cache_size -= lim;
740  memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
742  }
743 
744  /* move remainder of filter output into a cache for future runs */
745  if (fcb_type != FCB_TYPE_SILENCE) {
746  lim = FFMIN(remainder, s->denoise_filter_cache_size);
747  for (n = 0; n < lim; n++)
748  s->denoise_filter_cache[n] += synth_pf[size + n];
749  if (lim < remainder) {
750  memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
751  sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
752  s->denoise_filter_cache_size = remainder;
753  }
754  }
755 }
756 
757 /**
758  * Averaging projection filter, the postfilter used in WMAVoice.
759  *
760  * This uses the following steps:
761  * - A zero-synthesis filter (generate excitation from synth signal)
762  * - Kalman smoothing on excitation, based on pitch
763  * - Re-synthesized smoothened output
764  * - Iterative Wiener denoise filter
765  * - Adaptive gain filter
766  * - DC filter
767  *
768  * @param s WMAVoice decoding context
769  * @param synth Speech synthesis output (before postfilter)
770  * @param samples Output buffer for filtered samples
771  * @param size Buffer size of synth & samples
772  * @param lpcs Generated LPCs used for speech synthesis
773  * @param zero_exc_pf destination for zero synthesis filter (16-byte aligned)
774  * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
775  * @param pitch Pitch of the input signal
776  */
777 static void postfilter(WMAVoiceContext *s, const float *synth,
778  float *samples, int size,
779  const float *lpcs, float *zero_exc_pf,
780  int fcb_type, int pitch)
781 {
782  float synth_filter_in_buf[MAX_FRAMESIZE / 2],
783  *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
784  *synth_filter_in = zero_exc_pf;
785 
786  av_assert0(size <= MAX_FRAMESIZE / 2);
787 
788  /* generate excitation from input signal */
789  ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
790 
791  if (fcb_type >= FCB_TYPE_AW_PULSES &&
792  !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
793  synth_filter_in = synth_filter_in_buf;
794 
795  /* re-synthesize speech after smoothening, and keep history */
796  ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
797  synth_filter_in, size, s->lsps);
798  memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
799  sizeof(synth_pf[0]) * s->lsps);
800 
801  wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
802 
803  adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
804  &s->postfilter_agc);
805 
806  if (s->dc_level > 8) {
807  /* remove ultra-low frequency DC noise / highpass filter;
808  * coefficients are identical to those used in SIPR decoding,
809  * and very closely resemble those used in AMR-NB decoding. */
811  (const float[2]) { -1.99997, 1.0 },
812  (const float[2]) { -1.9330735188, 0.93589198496 },
813  0.93980580475, s->dcf_mem, size);
814  }
815 }
816 /**
817  * @}
818  */
819 
820 /**
821  * Dequantize LSPs
822  * @param lsps output pointer to the array that will hold the LSPs
823  * @param num number of LSPs to be dequantized
824  * @param values quantized values, contains n_stages values
825  * @param sizes range (i.e. max value) of each quantized value
826  * @param n_stages number of dequantization runs
827  * @param table dequantization table to be used
828  * @param mul_q LSF multiplier
829  * @param base_q base (lowest) LSF values
830  */
831 static void dequant_lsps(double *lsps, int num,
832  const uint16_t *values,
833  const uint16_t *sizes,
834  int n_stages, const uint8_t *table,
835  const double *mul_q,
836  const double *base_q)
837 {
838  int n, m;
839 
840  memset(lsps, 0, num * sizeof(*lsps));
841  for (n = 0; n < n_stages; n++) {
842  const uint8_t *t_off = &table[values[n] * num];
843  double base = base_q[n], mul = mul_q[n];
844 
845  for (m = 0; m < num; m++)
846  lsps[m] += base + mul * t_off[m];
847 
848  table += sizes[n] * num;
849  }
850 }
851 
852 /**
853  * @name LSP dequantization routines
854  * LSP dequantization routines, for 10/16LSPs and independent/residual coding.
855  * @note we assume enough bits are available, caller should check.
856  * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;
857  * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
858  * @{
859  */
860 /**
861  * Parse 10 independently-coded LSPs.
862  */
863 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
864 {
865  static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
866  static const double mul_lsf[4] = {
867  5.2187144800e-3, 1.4626986422e-3,
868  9.6179549166e-4, 1.1325736225e-3
869  };
870  static const double base_lsf[4] = {
871  M_PI * -2.15522e-1, M_PI * -6.1646e-2,
872  M_PI * -3.3486e-2, M_PI * -5.7408e-2
873  };
874  uint16_t v[4];
875 
876  v[0] = get_bits(gb, 8);
877  v[1] = get_bits(gb, 6);
878  v[2] = get_bits(gb, 5);
879  v[3] = get_bits(gb, 5);
880 
881  dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
882  mul_lsf, base_lsf);
883 }
884 
885 /**
886  * Parse 10 independently-coded LSPs, and then derive the tables to
887  * generate LSPs for the other frames from them (residual coding).
888  */
890  double *i_lsps, const double *old,
891  double *a1, double *a2, int q_mode)
892 {
893  static const uint16_t vec_sizes[3] = { 128, 64, 64 };
894  static const double mul_lsf[3] = {
895  2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
896  };
897  static const double base_lsf[3] = {
898  M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
899  };
900  const float (*ipol_tab)[2][10] = q_mode ?
902  uint16_t interpol, v[3];
903  int n;
904 
905  dequant_lsp10i(gb, i_lsps);
906 
907  interpol = get_bits(gb, 5);
908  v[0] = get_bits(gb, 7);
909  v[1] = get_bits(gb, 6);
910  v[2] = get_bits(gb, 6);
911 
912  for (n = 0; n < 10; n++) {
913  double delta = old[n] - i_lsps[n];
914  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
915  a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
916  }
917 
918  dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
919  mul_lsf, base_lsf);
920 }
921 
922 /**
923  * Parse 16 independently-coded LSPs.
924  */
925 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
926 {
927  static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
928  static const double mul_lsf[5] = {
929  3.3439586280e-3, 6.9908173703e-4,
930  3.3216608306e-3, 1.0334960326e-3,
931  3.1899104283e-3
932  };
933  static const double base_lsf[5] = {
934  M_PI * -1.27576e-1, M_PI * -2.4292e-2,
935  M_PI * -1.28094e-1, M_PI * -3.2128e-2,
936  M_PI * -1.29816e-1
937  };
938  uint16_t v[5];
939 
940  v[0] = get_bits(gb, 8);
941  v[1] = get_bits(gb, 6);
942  v[2] = get_bits(gb, 7);
943  v[3] = get_bits(gb, 6);
944  v[4] = get_bits(gb, 7);
945 
946  dequant_lsps( lsps, 5, v, vec_sizes, 2,
947  wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
948  dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
949  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
950  dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
951  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
952 }
953 
954 /**
955  * Parse 16 independently-coded LSPs, and then derive the tables to
956  * generate LSPs for the other frames from them (residual coding).
957  */
959  double *i_lsps, const double *old,
960  double *a1, double *a2, int q_mode)
961 {
962  static const uint16_t vec_sizes[3] = { 128, 128, 128 };
963  static const double mul_lsf[3] = {
964  1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
965  };
966  static const double base_lsf[3] = {
967  M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
968  };
969  const float (*ipol_tab)[2][16] = q_mode ?
971  uint16_t interpol, v[3];
972  int n;
973 
974  dequant_lsp16i(gb, i_lsps);
975 
976  interpol = get_bits(gb, 5);
977  v[0] = get_bits(gb, 7);
978  v[1] = get_bits(gb, 7);
979  v[2] = get_bits(gb, 7);
980 
981  for (n = 0; n < 16; n++) {
982  double delta = old[n] - i_lsps[n];
983  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
984  a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
985  }
986 
987  dequant_lsps( a2, 10, v, vec_sizes, 1,
988  wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
989  dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
990  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
991  dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
992  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
993 }
994 
995 /**
996  * @}
997  * @name Pitch-adaptive window coding functions
998  * The next few functions are for pitch-adaptive window coding.
999  * @{
1000  */
1001 /**
1002  * Parse the offset of the first pitch-adaptive window pulses, and
1003  * the distribution of pulses between the two blocks in this frame.
1004  * @param s WMA Voice decoding context private data
1005  * @param gb bit I/O context
1006  * @param pitch pitch for each block in this frame
1007  */
1009  const int *pitch)
1010 {
1011  static const int16_t start_offset[94] = {
1012  -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1013  13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1014  27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1015  45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1016  69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1017  93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1018  117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1019  141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1020  };
1021  int bits, offset;
1022 
1023  /* position of pulse */
1024  s->aw_idx_is_ext = 0;
1025  if ((bits = get_bits(gb, 6)) >= 54) {
1026  s->aw_idx_is_ext = 1;
1027  bits += (bits - 54) * 3 + get_bits(gb, 2);
1028  }
1029 
1030  /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
1031  * the distribution of the pulses in each block contained in this frame. */
1032  s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
1033  for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1034  s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
1035  s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
1036  offset += s->aw_n_pulses[0] * pitch[0];
1037  s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
1038  s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
1039 
1040  /* if continuing from a position before the block, reset position to
1041  * start of block (when corrected for the range over which it can be
1042  * spread in aw_pulse_set1()). */
1043  if (start_offset[bits] < MAX_FRAMESIZE / 2) {
1044  while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
1045  s->aw_first_pulse_off[1] -= pitch[1];
1046  if (start_offset[bits] < 0)
1047  while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
1048  s->aw_first_pulse_off[0] -= pitch[0];
1049  }
1050 }
1051 
1052 /**
1053  * Apply second set of pitch-adaptive window pulses.
1054  * @param s WMA Voice decoding context private data
1055  * @param gb bit I/O context
1056  * @param block_idx block index in frame [0, 1]
1057  * @param fcb structure containing fixed codebook vector info
1058  * @return -1 on error, 0 otherwise
1059  */
1061  int block_idx, AMRFixed *fcb)
1062 {
1063  uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
1064  uint16_t *use_mask = use_mask_mem + 2;
1065  /* in this function, idx is the index in the 80-bit (+ padding) use_mask
1066  * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
1067  * of idx are the position of the bit within a particular item in the
1068  * array (0 being the most significant bit, and 15 being the least
1069  * significant bit), and the remainder (>> 4) is the index in the
1070  * use_mask[]-array. This is faster and uses less memory than using a
1071  * 80-byte/80-int array. */
1072  int pulse_off = s->aw_first_pulse_off[block_idx],
1073  pulse_start, n, idx, range, aidx, start_off = 0;
1074 
1075  /* set offset of first pulse to within this block */
1076  if (s->aw_n_pulses[block_idx] > 0)
1077  while (pulse_off + s->aw_pulse_range < 1)
1078  pulse_off += fcb->pitch_lag;
1079 
1080  /* find range per pulse */
1081  if (s->aw_n_pulses[0] > 0) {
1082  if (block_idx == 0) {
1083  range = 32;
1084  } else /* block_idx = 1 */ {
1085  range = 8;
1086  if (s->aw_n_pulses[block_idx] > 0)
1087  pulse_off = s->aw_next_pulse_off_cache;
1088  }
1089  } else
1090  range = 16;
1091  pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1092 
1093  /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
1094  * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
1095  * we exclude that range from being pulsed again in this function. */
1096  memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
1097  memset( use_mask, -1, 5 * sizeof(use_mask[0]));
1098  memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
1099  if (s->aw_n_pulses[block_idx] > 0)
1100  for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
1101  int excl_range = s->aw_pulse_range; // always 16 or 24
1102  uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1103  int first_sh = 16 - (idx & 15);
1104  *use_mask_ptr++ &= 0xFFFFu << first_sh;
1105  excl_range -= first_sh;
1106  if (excl_range >= 16) {
1107  *use_mask_ptr++ = 0;
1108  *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1109  } else
1110  *use_mask_ptr &= 0xFFFF >> excl_range;
1111  }
1112 
1113  /* find the 'aidx'th offset that is not excluded */
1114  aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
1115  for (n = 0; n <= aidx; pulse_start++) {
1116  for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
1117  if (idx >= MAX_FRAMESIZE / 2) { // find from zero
1118  if (use_mask[0]) idx = 0x0F;
1119  else if (use_mask[1]) idx = 0x1F;
1120  else if (use_mask[2]) idx = 0x2F;
1121  else if (use_mask[3]) idx = 0x3F;
1122  else if (use_mask[4]) idx = 0x4F;
1123  else return -1;
1124  idx -= av_log2_16bit(use_mask[idx >> 4]);
1125  }
1126  if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1127  use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1128  n++;
1129  start_off = idx;
1130  }
1131  }
1132 
1133  fcb->x[fcb->n] = start_off;
1134  fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
1135  fcb->n++;
1136 
1137  /* set offset for next block, relative to start of that block */
1138  n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
1139  s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
1140  return 0;
1141 }
1142 
1143 /**
1144  * Apply first set of pitch-adaptive window pulses.
1145  * @param s WMA Voice decoding context private data
1146  * @param gb bit I/O context
1147  * @param block_idx block index in frame [0, 1]
1148  * @param fcb storage location for fixed codebook pulse info
1149  */
1151  int block_idx, AMRFixed *fcb)
1152 {
1153  int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
1154  float v;
1155 
1156  if (s->aw_n_pulses[block_idx] > 0) {
1157  int n, v_mask, i_mask, sh, n_pulses;
1158 
1159  if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
1160  n_pulses = 3;
1161  v_mask = 8;
1162  i_mask = 7;
1163  sh = 4;
1164  } else { // 4 pulses, 1:sign + 2:index each
1165  n_pulses = 4;
1166  v_mask = 4;
1167  i_mask = 3;
1168  sh = 3;
1169  }
1170 
1171  for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1172  fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
1173  fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
1174  s->aw_first_pulse_off[block_idx];
1175  while (fcb->x[fcb->n] < 0)
1176  fcb->x[fcb->n] += fcb->pitch_lag;
1177  if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
1178  fcb->n++;
1179  }
1180  } else {
1181  int num2 = (val & 0x1FF) >> 1, delta, idx;
1182 
1183  if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
1184  else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
1185  else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
1186  else { delta = 7; idx = num2 + 1 - 3 * 75; }
1187  v = (val & 0x200) ? -1.0 : 1.0;
1188 
1189  fcb->no_repeat_mask |= 3 << fcb->n;
1190  fcb->x[fcb->n] = idx - delta;
1191  fcb->y[fcb->n] = v;
1192  fcb->x[fcb->n + 1] = idx;
1193  fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
1194  fcb->n += 2;
1195  }
1196 }
1197 
1198 /**
1199  * @}
1200  *
1201  * Generate a random number from frame_cntr and block_idx, which will lief
1202  * in the range [0, 1000 - block_size] (so it can be used as an index in a
1203  * table of size 1000 of which you want to read block_size entries).
1204  *
1205  * @param frame_cntr current frame number
1206  * @param block_num current block index
1207  * @param block_size amount of entries we want to read from a table
1208  * that has 1000 entries
1209  * @return a (non-)random number in the [0, 1000 - block_size] range.
1210  */
1211 static int pRNG(int frame_cntr, int block_num, int block_size)
1212 {
1213  /* array to simplify the calculation of z:
1214  * y = (x % 9) * 5 + 6;
1215  * z = (49995 * x) / y;
1216  * Since y only has 9 values, we can remove the division by using a
1217  * LUT and using FASTDIV-style divisions. For each of the 9 values
1218  * of y, we can rewrite z as:
1219  * z = x * (49995 / y) + x * ((49995 % y) / y)
1220  * In this table, each col represents one possible value of y, the
1221  * first number is 49995 / y, and the second is the FASTDIV variant
1222  * of 49995 % y / y. */
1223  static const unsigned int div_tbl[9][2] = {
1224  { 8332, 3 * 715827883U }, // y = 6
1225  { 4545, 0 * 390451573U }, // y = 11
1226  { 3124, 11 * 268435456U }, // y = 16
1227  { 2380, 15 * 204522253U }, // y = 21
1228  { 1922, 23 * 165191050U }, // y = 26
1229  { 1612, 23 * 138547333U }, // y = 31
1230  { 1388, 27 * 119304648U }, // y = 36
1231  { 1219, 16 * 104755300U }, // y = 41
1232  { 1086, 39 * 93368855U } // y = 46
1233  };
1234  unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
1235  if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,
1236  // so this is effectively a modulo (%)
1237  y = x - 9 * MULH(477218589, x); // x % 9
1238  z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
1239  // z = x * 49995 / (y * 5 + 6)
1240  return z % (1000 - block_size);
1241 }
1242 
1243 /**
1244  * Parse hardcoded signal for a single block.
1245  * @note see #synth_block().
1246  */
1248  int block_idx, int size,
1249  const struct frame_type_desc *frame_desc,
1250  float *excitation)
1251 {
1252  float gain;
1253  int n, r_idx;
1254 
1255  av_assert0(size <= MAX_FRAMESIZE);
1256 
1257  /* Set the offset from which we start reading wmavoice_std_codebook */
1258  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1259  r_idx = pRNG(s->frame_cntr, block_idx, size);
1260  gain = s->silence_gain;
1261  } else /* FCB_TYPE_HARDCODED */ {
1262  r_idx = get_bits(gb, 8);
1263  gain = wmavoice_gain_universal[get_bits(gb, 6)];
1264  }
1265 
1266  /* Clear gain prediction parameters */
1267  memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
1268 
1269  /* Apply gain to hardcoded codebook and use that as excitation signal */
1270  for (n = 0; n < size; n++)
1271  excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
1272 }
1273 
1274 /**
1275  * Parse FCB/ACB signal for a single block.
1276  * @note see #synth_block().
1277  */
1279  int block_idx, int size,
1280  int block_pitch_sh2,
1281  const struct frame_type_desc *frame_desc,
1282  float *excitation)
1283 {
1284  static const float gain_coeff[6] = {
1285  0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1286  };
1287  float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1288  int n, idx, gain_weight;
1289  AMRFixed fcb;
1290 
1291  av_assert0(size <= MAX_FRAMESIZE / 2);
1292  memset(pulses, 0, sizeof(*pulses) * size);
1293 
1294  fcb.pitch_lag = block_pitch_sh2 >> 2;
1295  fcb.pitch_fac = 1.0;
1296  fcb.no_repeat_mask = 0;
1297  fcb.n = 0;
1298 
1299  /* For the other frame types, this is where we apply the innovation
1300  * (fixed) codebook pulses of the speech signal. */
1301  if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1302  aw_pulse_set1(s, gb, block_idx, &fcb);
1303  if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
1304  /* Conceal the block with silence and return.
1305  * Skip the correct amount of bits to read the next
1306  * block from the correct offset. */
1307  int r_idx = pRNG(s->frame_cntr, block_idx, size);
1308 
1309  for (n = 0; n < size; n++)
1310  excitation[n] =
1311  wmavoice_std_codebook[r_idx + n] * s->silence_gain;
1312  skip_bits(gb, 7 + 1);
1313  return;
1314  }
1315  } else /* FCB_TYPE_EXC_PULSES */ {
1316  int offset_nbits = 5 - frame_desc->log_n_blocks;
1317 
1318  fcb.no_repeat_mask = -1;
1319  /* similar to ff_decode_10_pulses_35bits(), but with single pulses
1320  * (instead of double) for a subset of pulses */
1321  for (n = 0; n < 5; n++) {
1322  float sign;
1323  int pos1, pos2;
1324 
1325  sign = get_bits1(gb) ? 1.0 : -1.0;
1326  pos1 = get_bits(gb, offset_nbits);
1327  fcb.x[fcb.n] = n + 5 * pos1;
1328  fcb.y[fcb.n++] = sign;
1329  if (n < frame_desc->dbl_pulses) {
1330  pos2 = get_bits(gb, offset_nbits);
1331  fcb.x[fcb.n] = n + 5 * pos2;
1332  fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
1333  }
1334  }
1335  }
1336  ff_set_fixed_vector(pulses, &fcb, 1.0, size);
1337 
1338  /* Calculate gain for adaptive & fixed codebook signal.
1339  * see ff_amr_set_fixed_gain(). */
1340  idx = get_bits(gb, 7);
1342  gain_coeff, 6) -
1343  5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
1344  acb_gain = wmavoice_gain_codebook_acb[idx];
1345  pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
1346  -2.9957322736 /* log(0.05) */,
1347  1.6094379124 /* log(5.0) */);
1348 
1349  gain_weight = 8 >> frame_desc->log_n_blocks;
1350  memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
1351  sizeof(*s->gain_pred_err) * (6 - gain_weight));
1352  for (n = 0; n < gain_weight; n++)
1353  s->gain_pred_err[n] = pred_err;
1354 
1355  /* Calculation of adaptive codebook */
1356  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1357  int len;
1358  for (n = 0; n < size; n += len) {
1359  int next_idx_sh16;
1360  int abs_idx = block_idx * size + n;
1361  int pitch_sh16 = (s->last_pitch_val << 16) +
1362  s->pitch_diff_sh16 * abs_idx;
1363  int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1364  int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1365  idx = idx_sh16 >> 16;
1366  if (s->pitch_diff_sh16) {
1367  if (s->pitch_diff_sh16 > 0) {
1368  next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1369  } else
1370  next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1371  len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
1372  1, size - n);
1373  } else
1374  len = size;
1375 
1376  ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
1378  idx, 9, len);
1379  }
1380  } else /* ACB_TYPE_HAMMING */ {
1381  int block_pitch = block_pitch_sh2 >> 2;
1382  idx = block_pitch_sh2 & 3;
1383  if (idx) {
1384  ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
1386  idx, 8, size);
1387  } else
1388  av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
1389  sizeof(float) * size);
1390  }
1391 
1392  /* Interpolate ACB/FCB and use as excitation signal */
1393  ff_weighted_vector_sumf(excitation, excitation, pulses,
1394  acb_gain, fcb_gain, size);
1395 }
1396 
1397 /**
1398  * Parse data in a single block.
1399  * @note we assume enough bits are available, caller should check.
1400  *
1401  * @param s WMA Voice decoding context private data
1402  * @param gb bit I/O context
1403  * @param block_idx index of the to-be-read block
1404  * @param size amount of samples to be read in this block
1405  * @param block_pitch_sh2 pitch for this block << 2
1406  * @param lsps LSPs for (the end of) this frame
1407  * @param prev_lsps LSPs for the last frame
1408  * @param frame_desc frame type descriptor
1409  * @param excitation target memory for the ACB+FCB interpolated signal
1410  * @param synth target memory for the speech synthesis filter output
1411  * @return 0 on success, <0 on error.
1412  */
1414  int block_idx, int size,
1415  int block_pitch_sh2,
1416  const double *lsps, const double *prev_lsps,
1417  const struct frame_type_desc *frame_desc,
1418  float *excitation, float *synth)
1419 {
1420  double i_lsps[MAX_LSPS];
1421  float lpcs[MAX_LSPS];
1422  float fac;
1423  int n;
1424 
1425  if (frame_desc->acb_type == ACB_TYPE_NONE)
1426  synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
1427  else
1428  synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
1429  frame_desc, excitation);
1430 
1431  /* convert interpolated LSPs to LPCs */
1432  fac = (block_idx + 0.5) / frame_desc->n_blocks;
1433  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1434  i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1435  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1436 
1437  /* Speech synthesis */
1438  ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
1439 }
1440 
1441 /**
1442  * Synthesize output samples for a single frame.
1443  * @note we assume enough bits are available, caller should check.
1444  *
1445  * @param ctx WMA Voice decoder context
1446  * @param gb bit I/O context (s->gb or one for cross-packet superframes)
1447  * @param frame_idx Frame number within superframe [0-2]
1448  * @param samples pointer to output sample buffer, has space for at least 160
1449  * samples
1450  * @param lsps LSP array
1451  * @param prev_lsps array of previous frame's LSPs
1452  * @param excitation target buffer for excitation signal
1453  * @param synth target buffer for synthesized speech data
1454  * @return 0 on success, <0 on error.
1455  */
1456 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
1457  float *samples,
1458  const double *lsps, const double *prev_lsps,
1459  float *excitation, float *synth)
1460 {
1461  WMAVoiceContext *s = ctx->priv_data;
1462  int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);
1463  int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);
1464 
1465  /* Parse frame type ("frame header"), see frame_descs */
1466  int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
1467 
1468  if (bd_idx < 0) {
1469  av_log(ctx, AV_LOG_ERROR,
1470  "Invalid frame type VLC code, skipping\n");
1471  return AVERROR_INVALIDDATA;
1472  }
1473 
1474  block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
1475 
1476  /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
1477  if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
1478  /* Pitch is provided per frame, which is interpreted as the pitch of
1479  * the last sample of the last block of this frame. We can interpolate
1480  * the pitch of other blocks (and even pitch-per-sample) by gradually
1481  * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
1482  n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
1483  log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
1484  cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
1485  cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
1486  if (s->last_acb_type == ACB_TYPE_NONE ||
1487  20 * abs(cur_pitch_val - s->last_pitch_val) >
1488  (cur_pitch_val + s->last_pitch_val))
1489  s->last_pitch_val = cur_pitch_val;
1490 
1491  /* pitch per block */
1492  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1493  int fac = n * 2 + 1;
1494 
1495  pitch[n] = (MUL16(fac, cur_pitch_val) +
1496  MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
1497  frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
1498  }
1499 
1500  /* "pitch-diff-per-sample" for calculation of pitch per sample */
1501  s->pitch_diff_sh16 =
1502  (cur_pitch_val - s->last_pitch_val) * (1 << 16) / MAX_FRAMESIZE;
1503  }
1504 
1505  /* Global gain (if silence) and pitch-adaptive window coordinates */
1506  switch (frame_descs[bd_idx].fcb_type) {
1507  case FCB_TYPE_SILENCE:
1509  break;
1510  case FCB_TYPE_AW_PULSES:
1511  aw_parse_coords(s, gb, pitch);
1512  break;
1513  }
1514 
1515  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1516  int bl_pitch_sh2;
1517 
1518  /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
1519  switch (frame_descs[bd_idx].acb_type) {
1520  case ACB_TYPE_HAMMING: {
1521  /* Pitch is given per block. Per-block pitches are encoded as an
1522  * absolute value for the first block, and then delta values
1523  * relative to this value) for all subsequent blocks. The scale of
1524  * this pitch value is semi-logaritmic compared to its use in the
1525  * decoder, so we convert it to normal scale also. */
1526  int block_pitch,
1527  t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
1528  t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
1529  t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
1530 
1531  if (n == 0) {
1532  block_pitch = get_bits(gb, s->block_pitch_nbits);
1533  } else
1534  block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
1536  /* Convert last_ so that any next delta is within _range */
1537  last_block_pitch = av_clip(block_pitch,
1539  s->block_pitch_range -
1541 
1542  /* Convert semi-log-style scale back to normal scale */
1543  if (block_pitch < t1) {
1544  bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
1545  } else {
1546  block_pitch -= t1;
1547  if (block_pitch < t2) {
1548  bl_pitch_sh2 =
1549  (s->block_conv_table[1] << 2) + (block_pitch << 1);
1550  } else {
1551  block_pitch -= t2;
1552  if (block_pitch < t3) {
1553  bl_pitch_sh2 =
1554  (s->block_conv_table[2] + block_pitch) << 2;
1555  } else
1556  bl_pitch_sh2 = s->block_conv_table[3] << 2;
1557  }
1558  }
1559  pitch[n] = bl_pitch_sh2 >> 2;
1560  break;
1561  }
1562 
1563  case ACB_TYPE_ASYMMETRIC: {
1564  bl_pitch_sh2 = pitch[n] << 2;
1565  break;
1566  }
1567 
1568  default: // ACB_TYPE_NONE has no pitch
1569  bl_pitch_sh2 = 0;
1570  break;
1571  }
1572 
1573  synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1574  lsps, prev_lsps, &frame_descs[bd_idx],
1575  &excitation[n * block_nsamples],
1576  &synth[n * block_nsamples]);
1577  }
1578 
1579  /* Averaging projection filter, if applicable. Else, just copy samples
1580  * from synthesis buffer */
1581  if (s->do_apf) {
1582  double i_lsps[MAX_LSPS];
1583  float lpcs[MAX_LSPS];
1584 
1585  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1586  i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1587  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1588  postfilter(s, synth, samples, 80, lpcs,
1589  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1590  frame_descs[bd_idx].fcb_type, pitch[0]);
1591 
1592  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1593  i_lsps[n] = cos(lsps[n]);
1594  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1595  postfilter(s, &synth[80], &samples[80], 80, lpcs,
1596  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1597  frame_descs[bd_idx].fcb_type, pitch[0]);
1598  } else
1599  memcpy(samples, synth, 160 * sizeof(synth[0]));
1600 
1601  /* Cache values for next frame */
1602  s->frame_cntr++;
1603  if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
1604  s->last_acb_type = frame_descs[bd_idx].acb_type;
1605  switch (frame_descs[bd_idx].acb_type) {
1606  case ACB_TYPE_NONE:
1607  s->last_pitch_val = 0;
1608  break;
1609  case ACB_TYPE_ASYMMETRIC:
1610  s->last_pitch_val = cur_pitch_val;
1611  break;
1612  case ACB_TYPE_HAMMING:
1613  s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
1614  break;
1615  }
1616 
1617  return 0;
1618 }
1619 
1620 /**
1621  * Ensure minimum value for first item, maximum value for last value,
1622  * proper spacing between each value and proper ordering.
1623  *
1624  * @param lsps array of LSPs
1625  * @param num size of LSP array
1626  *
1627  * @note basically a double version of #ff_acelp_reorder_lsf(), might be
1628  * useful to put in a generic location later on. Parts are also
1629  * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),
1630  * which is in float.
1631  */
1632 static void stabilize_lsps(double *lsps, int num)
1633 {
1634  int n, m, l;
1635 
1636  /* set minimum value for first, maximum value for last and minimum
1637  * spacing between LSF values.
1638  * Very similar to ff_set_min_dist_lsf(), but in double. */
1639  lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
1640  for (n = 1; n < num; n++)
1641  lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
1642  lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
1643 
1644  /* reorder (looks like one-time / non-recursed bubblesort).
1645  * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
1646  for (n = 1; n < num; n++) {
1647  if (lsps[n] < lsps[n - 1]) {
1648  for (m = 1; m < num; m++) {
1649  double tmp = lsps[m];
1650  for (l = m - 1; l >= 0; l--) {
1651  if (lsps[l] <= tmp) break;
1652  lsps[l + 1] = lsps[l];
1653  }
1654  lsps[l + 1] = tmp;
1655  }
1656  break;
1657  }
1658  }
1659 }
1660 
1661 /**
1662  * Test if there's enough bits to read 1 superframe.
1663  *
1664  * @param orig_gb bit I/O context used for reading. This function
1665  * does not modify the state of the bitreader; it
1666  * only uses it to copy the current stream position
1667  * @param s WMA Voice decoding context private data
1668  * @return < 0 on error, 1 on not enough bits or 0 if OK.
1669  */
1671  WMAVoiceContext *s)
1672 {
1673  GetBitContext s_gb, *gb = &s_gb;
1674  int n, need_bits, bd_idx;
1675  const struct frame_type_desc *frame_desc;
1676 
1677  /* initialize a copy */
1678  init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
1679  skip_bits_long(gb, get_bits_count(orig_gb));
1680  av_assert1(get_bits_left(gb) == get_bits_left(orig_gb));
1681 
1682  /* superframe header */
1683  if (get_bits_left(gb) < 14)
1684  return 1;
1685  if (!get_bits1(gb))
1686  return AVERROR(ENOSYS); // WMAPro-in-WMAVoice superframe
1687  if (get_bits1(gb)) skip_bits(gb, 12); // number of samples in superframe
1688  if (s->has_residual_lsps) { // residual LSPs (for all frames)
1689  if (get_bits_left(gb) < s->sframe_lsp_bitsize)
1690  return 1;
1692  }
1693 
1694  /* frames */
1695  for (n = 0; n < MAX_FRAMES; n++) {
1696  int aw_idx_is_ext = 0;
1697 
1698  if (!s->has_residual_lsps) { // independent LSPs (per-frame)
1699  if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
1701  }
1702  bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
1703  if (bd_idx < 0)
1704  return AVERROR_INVALIDDATA; // invalid frame type VLC code
1705  frame_desc = &frame_descs[bd_idx];
1706  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1707  if (get_bits_left(gb) < s->pitch_nbits)
1708  return 1;
1709  skip_bits_long(gb, s->pitch_nbits);
1710  }
1711  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1712  skip_bits(gb, 8);
1713  } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1714  int tmp = get_bits(gb, 6);
1715  if (tmp >= 0x36) {
1716  skip_bits(gb, 2);
1717  aw_idx_is_ext = 1;
1718  }
1719  }
1720 
1721  /* blocks */
1722  if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
1723  need_bits = s->block_pitch_nbits +
1724  (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
1725  } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1726  need_bits = 2 * !aw_idx_is_ext;
1727  } else
1728  need_bits = 0;
1729  need_bits += frame_desc->frame_size;
1730  if (get_bits_left(gb) < need_bits)
1731  return 1;
1732  skip_bits_long(gb, need_bits);
1733  }
1734 
1735  return 0;
1736 }
1737 
1738 /**
1739  * Synthesize output samples for a single superframe. If we have any data
1740  * cached in s->sframe_cache, that will be used instead of whatever is loaded
1741  * in s->gb.
1742  *
1743  * WMA Voice superframes contain 3 frames, each containing 160 audio samples,
1744  * to give a total of 480 samples per frame. See #synth_frame() for frame
1745  * parsing. In addition to 3 frames, superframes can also contain the LSPs
1746  * (if these are globally specified for all frames (residually); they can
1747  * also be specified individually per-frame. See the s->has_residual_lsps
1748  * option), and can specify the number of samples encoded in this superframe
1749  * (if less than 480), usually used to prevent blanks at track boundaries.
1750  *
1751  * @param ctx WMA Voice decoder context
1752  * @return 0 on success, <0 on error or 1 if there was not enough data to
1753  * fully parse the superframe
1754  */
1756  int *got_frame_ptr)
1757 {
1758  WMAVoiceContext *s = ctx->priv_data;
1759  GetBitContext *gb = &s->gb, s_gb;
1760  int n, res, n_samples = 480;
1761  double lsps[MAX_FRAMES][MAX_LSPS];
1762  const double *mean_lsf = s->lsps == 16 ?
1764  float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
1765  float synth[MAX_LSPS + MAX_SFRAMESIZE];
1766  float *samples;
1767 
1768  memcpy(synth, s->synth_history,
1769  s->lsps * sizeof(*synth));
1770  memcpy(excitation, s->excitation_history,
1771  s->history_nsamples * sizeof(*excitation));
1772 
1773  if (s->sframe_cache_size > 0) {
1774  gb = &s_gb;
1776  s->sframe_cache_size = 0;
1777  }
1778 
1779  if ((res = check_bits_for_superframe(gb, s)) == 1) {
1780  *got_frame_ptr = 0;
1781  return 1;
1782  } else if (res < 0)
1783  return res;
1784 
1785  /* First bit is speech/music bit, it differentiates between WMAVoice
1786  * speech samples (the actual codec) and WMAVoice music samples, which
1787  * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
1788  * the wild yet. */
1789  if (!get_bits1(gb)) {
1790  avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
1791  return AVERROR_PATCHWELCOME;
1792  }
1793 
1794  /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
1795  if (get_bits1(gb)) {
1796  if ((n_samples = get_bits(gb, 12)) > 480) {
1797  av_log(ctx, AV_LOG_ERROR,
1798  "Superframe encodes >480 samples (%d), not allowed\n",
1799  n_samples);
1800  return AVERROR_INVALIDDATA;
1801  }
1802  }
1803  /* Parse LSPs, if global for the superframe (can also be per-frame). */
1804  if (s->has_residual_lsps) {
1805  double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
1806 
1807  for (n = 0; n < s->lsps; n++)
1808  prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
1809 
1810  if (s->lsps == 10) {
1811  dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1812  } else /* s->lsps == 16 */
1813  dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1814 
1815  for (n = 0; n < s->lsps; n++) {
1816  lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1817  lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
1818  lsps[2][n] += mean_lsf[n];
1819  }
1820  for (n = 0; n < 3; n++)
1821  stabilize_lsps(lsps[n], s->lsps);
1822  }
1823 
1824  /* get output buffer */
1825  frame->nb_samples = 480;
1826  if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
1827  return res;
1828  frame->nb_samples = n_samples;
1829  samples = (float *)frame->data[0];
1830 
1831  /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
1832  for (n = 0; n < 3; n++) {
1833  if (!s->has_residual_lsps) {
1834  int m;
1835 
1836  if (s->lsps == 10) {
1837  dequant_lsp10i(gb, lsps[n]);
1838  } else /* s->lsps == 16 */
1839  dequant_lsp16i(gb, lsps[n]);
1840 
1841  for (m = 0; m < s->lsps; m++)
1842  lsps[n][m] += mean_lsf[m];
1843  stabilize_lsps(lsps[n], s->lsps);
1844  }
1845 
1846  if ((res = synth_frame(ctx, gb, n,
1847  &samples[n * MAX_FRAMESIZE],
1848  lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1849  &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
1850  &synth[s->lsps + n * MAX_FRAMESIZE]))) {
1851  *got_frame_ptr = 0;
1852  return res;
1853  }
1854  }
1855 
1856  /* Statistics? FIXME - we don't check for length, a slight overrun
1857  * will be caught by internal buffer padding, and anything else
1858  * will be skipped, not read. */
1859  if (get_bits1(gb)) {
1860  res = get_bits(gb, 4);
1861  skip_bits(gb, 10 * (res + 1));
1862  }
1863 
1864  *got_frame_ptr = 1;
1865 
1866  /* Update history */
1867  memcpy(s->prev_lsps, lsps[2],
1868  s->lsps * sizeof(*s->prev_lsps));
1869  memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
1870  s->lsps * sizeof(*synth));
1871  memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1872  s->history_nsamples * sizeof(*excitation));
1873  if (s->do_apf)
1874  memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
1875  s->history_nsamples * sizeof(*s->zero_exc_pf));
1876 
1877  return 0;
1878 }
1879 
1880 /**
1881  * Parse the packet header at the start of each packet (input data to this
1882  * decoder).
1883  *
1884  * @param s WMA Voice decoding context private data
1885  * @return 1 if not enough bits were available, or 0 on success.
1886  */
1888 {
1889  GetBitContext *gb = &s->gb;
1890  unsigned int res;
1891 
1892  if (get_bits_left(gb) < 11)
1893  return 1;
1894  skip_bits(gb, 4); // packet sequence number
1895  s->has_residual_lsps = get_bits1(gb);
1896  do {
1897  if (get_bits_left(gb) < 6 + s->spillover_bitsize)
1898  return AVERROR_INVALIDDATA;
1899 
1900  res = get_bits(gb, 6); // number of superframes per packet
1901  // (minus first one if there is spillover)
1902  if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
1903  return 1;
1904  } while (res == 0x3F);
1906 
1907  return 0;
1908 }
1909 
1910 /**
1911  * Copy (unaligned) bits from gb/data/size to pb.
1912  *
1913  * @param pb target buffer to copy bits into
1914  * @param data source buffer to copy bits from
1915  * @param size size of the source data, in bytes
1916  * @param gb bit I/O context specifying the current position in the source.
1917  * data. This function might use this to align the bit position to
1918  * a whole-byte boundary before calling #avpriv_copy_bits() on aligned
1919  * source data
1920  * @param nbits the amount of bits to copy from source to target
1921  *
1922  * @note after calling this function, the current position in the input bit
1923  * I/O context is undefined.
1924  */
1925 static void copy_bits(PutBitContext *pb,
1926  const uint8_t *data, int size,
1927  GetBitContext *gb, int nbits)
1928 {
1929  int rmn_bytes, rmn_bits;
1930 
1931  rmn_bits = rmn_bytes = get_bits_left(gb);
1932  if (rmn_bits < nbits)
1933  return;
1934  if (nbits > pb->size_in_bits - put_bits_count(pb))
1935  return;
1936  rmn_bits &= 7; rmn_bytes >>= 3;
1937  if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
1938  put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
1939  avpriv_copy_bits(pb, data + size - rmn_bytes,
1940  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1941 }
1942 
1943 /**
1944  * Packet decoding: a packet is anything that the (ASF) demuxer contains,
1945  * and we expect that the demuxer / application provides it to us as such
1946  * (else you'll probably get garbage as output). Every packet has a size of
1947  * ctx->block_align bytes, starts with a packet header (see
1948  * #parse_packet_header()), and then a series of superframes. Superframe
1949  * boundaries may exceed packets, i.e. superframes can split data over
1950  * multiple (two) packets.
1951  *
1952  * For more information about frames, see #synth_superframe().
1953  */
1955  int *got_frame_ptr, AVPacket *avpkt)
1956 {
1957  WMAVoiceContext *s = ctx->priv_data;
1958  GetBitContext *gb = &s->gb;
1959  int size, res, pos;
1960 
1961  /* Packets are sometimes a multiple of ctx->block_align, with a packet
1962  * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
1963  * feeds us ASF packets, which may concatenate multiple "codec" packets
1964  * in a single "muxer" packet, so we artificially emulate that by
1965  * capping the packet size at ctx->block_align. */
1966  for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
1967  if (!size) {
1968  *got_frame_ptr = 0;
1969  return 0;
1970  }
1971  init_get_bits(&s->gb, avpkt->data, size << 3);
1972 
1973  /* size == ctx->block_align is used to indicate whether we are dealing with
1974  * a new packet or a packet of which we already read the packet header
1975  * previously. */
1976  if (size == ctx->block_align) { // new packet header
1977  if ((res = parse_packet_header(s)) < 0)
1978  return res;
1979 
1980  /* If the packet header specifies a s->spillover_nbits, then we want
1981  * to push out all data of the previous packet (+ spillover) before
1982  * continuing to parse new superframes in the current packet. */
1983  if (s->spillover_nbits > 0) {
1984  if (s->sframe_cache_size > 0) {
1985  int cnt = get_bits_count(gb);
1986  copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
1987  flush_put_bits(&s->pb);
1989  if ((res = synth_superframe(ctx, data, got_frame_ptr)) == 0 &&
1990  *got_frame_ptr) {
1991  cnt += s->spillover_nbits;
1992  s->skip_bits_next = cnt & 7;
1993  res = cnt >> 3;
1994  if (res > avpkt->size) {
1995  av_log(ctx, AV_LOG_ERROR,
1996  "Trying to skip %d bytes in packet of size %d\n",
1997  res, avpkt->size);
1998  return AVERROR_INVALIDDATA;
1999  }
2000  return res;
2001  } else
2002  skip_bits_long (gb, s->spillover_nbits - cnt +
2003  get_bits_count(gb)); // resync
2004  } else
2005  skip_bits_long(gb, s->spillover_nbits); // resync
2006  }
2007  } else if (s->skip_bits_next)
2008  skip_bits(gb, s->skip_bits_next);
2009 
2010  /* Try parsing superframes in current packet */
2011  s->sframe_cache_size = 0;
2012  s->skip_bits_next = 0;
2013  pos = get_bits_left(gb);
2014  if ((res = synth_superframe(ctx, data, got_frame_ptr)) < 0) {
2015  return res;
2016  } else if (*got_frame_ptr) {
2017  int cnt = get_bits_count(gb);
2018  s->skip_bits_next = cnt & 7;
2019  res = cnt >> 3;
2020  if (res > avpkt->size) {
2021  av_log(ctx, AV_LOG_ERROR,
2022  "Trying to skip %d bytes in packet of size %d\n",
2023  res, avpkt->size);
2024  return AVERROR_INVALIDDATA;
2025  }
2026  return res;
2027  } else if ((s->sframe_cache_size = pos) > 0) {
2028  /* rewind bit reader to start of last (incomplete) superframe... */
2029  init_get_bits(gb, avpkt->data, size << 3);
2030  skip_bits_long(gb, (size << 3) - pos);
2031  av_assert1(get_bits_left(gb) == pos);
2032 
2033  /* ...and cache it for spillover in next packet */
2035  copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
2036  // FIXME bad - just copy bytes as whole and add use the
2037  // skip_bits_next field
2038  }
2039 
2040  return size;
2041 }
2042 
2044 {
2045  WMAVoiceContext *s = ctx->priv_data;
2046 
2047  if (s->do_apf) {
2048  ff_rdft_end(&s->rdft);
2049  ff_rdft_end(&s->irdft);
2050  ff_dct_end(&s->dct);
2051  ff_dct_end(&s->dst);
2052  }
2053 
2054  return 0;
2055 }
2056 
2058 {
2059  WMAVoiceContext *s = ctx->priv_data;
2060  int n;
2061 
2062  s->postfilter_agc = 0;
2063  s->sframe_cache_size = 0;
2064  s->skip_bits_next = 0;
2065  for (n = 0; n < s->lsps; n++)
2066  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
2067  memset(s->excitation_history, 0,
2068  sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
2069  memset(s->synth_history, 0,
2070  sizeof(*s->synth_history) * MAX_LSPS);
2071  memset(s->gain_pred_err, 0,
2072  sizeof(s->gain_pred_err));
2073 
2074  if (s->do_apf) {
2075  memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
2076  sizeof(*s->synth_filter_out_buf) * s->lsps);
2077  memset(s->dcf_mem, 0,
2078  sizeof(*s->dcf_mem) * 2);
2079  memset(s->zero_exc_pf, 0,
2080  sizeof(*s->zero_exc_pf) * s->history_nsamples);
2081  memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
2082  }
2083 }
2084 
2086  .name = "wmavoice",
2087  .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
2088  .type = AVMEDIA_TYPE_AUDIO,
2089  .id = AV_CODEC_ID_WMAVOICE,
2090  .priv_data_size = sizeof(WMAVoiceContext),
2093  .close = wmavoice_decode_end,
2095  .capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
2096  .flush = wmavoice_flush,
2097 };
RDFTContext rdft
Definition: wmavoice.c:269
Description of frame types.
Definition: wmavoice.c:98
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
Definition: wmavoice.c:1150
av_cold void ff_rdft_end(RDFTContext *s)
Definition: rdft.c:132
static const uint8_t wmavoice_dq_lsp16r2[0x500]
#define NULL
Definition: coverity.c:32
const char const char void * val
Definition: avisynth_c.h:634
int do_apf
whether to apply the averaged projection filter (APF)
Definition: wmavoice.c:150
float v
const char * s
Definition: avisynth_c.h:631
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will lief in the range [0...
Definition: wmavoice.c:1211
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
Definition: wmavoice.c:304
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.c:84
float gain_pred_err[6]
cache for gain prediction
Definition: wmavoice.c:254
This structure describes decoded (raw) audio or video data.
Definition: frame.h:171
void(* dct_calc)(struct DCTContext *s, FFTSample *data)
Definition: dct.h:37
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned...
Definition: wmavoice.c:246
int frame_lsp_bitsize
size (in bits) of LSPs, when encoded per-frame (independent coding)
Definition: wmavoice.c:163
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:101
static void flush(AVCodecContext *avctx)
float postfilter_agc
gain control memory, used in adaptive_gain_control()
Definition: wmavoice.c:275
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:205
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:261
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
Definition: wmavoice.c:777
memory handling functions
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
static void skip_bits_long(GetBitContext *s, int n)
Definition: get_bits.h:218
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
#define avpriv_request_sample(...)
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
Definition: wmavoice.c:287
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
Definition: wmavoice.c:1008
int x[10]
Definition: acelp_vectors.h:55
int size
Definition: avcodec.h:1434
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
Definition: wmavoice.c:241
static int interpol(MBContext *mb, uint32_t *color, int x, int y, int linesize)
void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
Definition: bitstream.c:64
const uint8_t * buffer
Definition: get_bits.h:56
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
Definition: wmavoice.c:1632
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:53
static const float wmavoice_gain_codebook_fcb[128]
static const uint8_t wmavoice_dq_lsp16i1[0x640]
#define a1
Definition: regdef.h:47
static const uint8_t wmavoice_dq_lsp16r1[0x500]
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
Definition: wmavoice.c:193
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
int block_pitch_nbits
number of bits used to specify the first block's pitch value
Definition: wmavoice.c:172
static const uint8_t wmavoice_dq_lsp16i3[0x300]
float pitch_fac
Definition: acelp_vectors.h:59
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
Definition: wmavoice.c:1456
static void calc_input_response(WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
Definition: wmavoice.c:576
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
Definition: wmavoice.c:863
AVCodec.
Definition: avcodec.h:3482
#define MUL16(a, b)
Definition: fft-test.c:50
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
Definition: wmavoice.c:48
int block_align
number of bytes per packet if constant and known or 0 Used by some WAV based audio codecs...
Definition: avcodec.h:2309
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:68
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
Definition: wmavoice.c:1060
static const float wmavoice_ipol1_coeffs[17 *9]
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
Definition: wmavoice.c:143
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors.
Definition: float_dsp.c:108
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value...
Definition: wmavoice.c:175
uint8_t bits
Definition: crc.c:295
enum AVSampleFormat sample_fmt
audio sample format
Definition: avcodec.h:2280
int mem
Definition: avisynth_c.h:684
uint8_t
#define av_cold
Definition: attributes.h:74
Sparse representation for the algebraic codebook (fixed) vector.
Definition: acelp_vectors.h:53
static const uint8_t wmavoice_dq_lsp16r3[0x600]
float delta
#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size)
Definition: get_bits.h:481
DCTContext dct
Definition: wmavoice.c:271
static const float wmavoice_gain_codebook_acb[128]
uint8_t log_n_blocks
log2(n_blocks)
Definition: wmavoice.c:101
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
Definition: wmavoice.c:244
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:74
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
Definition: wmavoice.c:197
float tilted_lpcs_pf[0x80]
aligned buffer for LPC tilting
Definition: wmavoice.c:283
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:1627
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
Definition: wmavoice.c:563
static av_cold void init_static_data(void)
Definition: dsddec.c:82
static const uint8_t wmavoice_dq_lsp10r[0x1400]
static AVFrame * frame
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
Definition: wmavoice.c:831
static int check_bits_for_superframe(GetBitContext *orig_gb, WMAVoiceContext *s)
Test if there's enough bits to read 1 superframe.
Definition: wmavoice.c:1670
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
uint8_t * data
Definition: avcodec.h:1433
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:213
float dcf_mem[2]
DC filter history.
Definition: wmavoice.c:277
bitstream reader API header.
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:2057
float synth_history[MAX_LSPS]
see excitation_history
Definition: wmavoice.c:259
ptrdiff_t size
Definition: opengl_enc.c:101
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
Definition: wmavoice.c:225
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
Definition: wmavoice.c:1925
#define av_log(a,...)
unsigned m
Definition: audioconvert.c:187
#define expf(x)
Definition: libm.h:72
#define U(x)
Definition: vp56_arith.h:37
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:594
int size_in_bits
Definition: put_bits.h:39
static double alpha(void *priv, double x, double y)
Definition: vf_geq.c:99
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
static const double wmavoice_mean_lsf16[2][16]
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
Definition: wmavoice.c:209
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
int block_pitch_range
range of the block pitch
Definition: wmavoice.c:174
static const float wmavoice_std_codebook[1000]
static const int sizes[][2]
Definition: img2dec.c:48
int last_acb_type
frame type [0-2] of the previous frame
Definition: wmavoice.c:228
#define AVERROR(e)
Definition: error.h:43
static const struct endianess table[]
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:178
static const float wmavoice_gain_silence[256]
int denoise_filter_cache_size
samples in denoise_filter_cache
Definition: wmavoice.c:282
int history_nsamples
number of samples in history for signal prediction (through ACB)
Definition: wmavoice.c:146
static const uint8_t wmavoice_dq_lsp10i[0xf00]
Definition: wmavoice_data.h:33
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
#define t1
Definition: regdef.h:29
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
Windows Media Voice (WMAVoice) tables.
Definition: avfft.h:73
const char * name
Name of the codec implementation.
Definition: avcodec.h:3489
int no_repeat_mask
Definition: acelp_vectors.h:57
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
Definition: wmavoice.c:154
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
Definition: wmavoice.c:233
#define t3
Definition: regdef.h:31
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
#define FFMAX(a, b)
Definition: common.h:90
Libavcodec external API header.
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
Definition: wmavoice.c:181
DCTContext dst
contexts for phase shift (in Hilbert transform, part of postfilter)
Definition: wmavoice.c:271
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
Definition: wmavoice.c:161
Definition: get_bits.h:64
uint64_t channel_layout
Audio channel layout.
Definition: avcodec.h:2333
void(* rdft_calc)(struct RDFTContext *s, FFTSample *z)
Definition: rdft.h:60
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:85
#define powf(x, y)
Definition: libm.h:48
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
Definition: wmavoice.c:202
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:86
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:958
int min_pitch_val
base value for pitch parsing code
Definition: wmavoice.c:168
WMA Voice decoding context.
Definition: wmavoice.c:132
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it...
Definition: wmavoice.c:695
int denoise_strength
strength of denoising in Wiener filter [0-11]
Definition: wmavoice.c:152
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
Definition: wmavoice.c:206
audio channel layout utility functions
Definition: avfft.h:97
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
#define FFMIN(a, b)
Definition: common.h:92
#define log_range(var, assign)
float y
#define MAX_LSPS
maximum filter order
Definition: wmavoice.c:47
static VLC frame_type_vlc
Frame type VLC coding.
Definition: wmavoice.c:62
int pitch_nbits
number of bits used to specify the pitch value in the frame header
Definition: wmavoice.c:170
#define MAX_BLOCKS
maximum number of blocks per frame
Definition: wmavoice.c:46
float denoise_coeffs_pf[0x80]
aligned buffer for denoise coefficients
Definition: wmavoice.c:285
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:889
int size_in_bits
Definition: get_bits.h:58
float y[10]
Definition: acelp_vectors.h:56
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
Definition: mathops.h:66
#define a2
Definition: regdef.h:48
Definition: dct.h:31
float sin[511]
Definition: wmavoice.c:273
static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE(*table)[2], int bits, int max_depth)
Parse a vlc code.
Definition: get_bits.h:561
#define AV_RL32
Definition: intreadwrite.h:146
Definition: avfft.h:72
float u
int n
Definition: avisynth_c.h:547
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
Definition: wmavoice.c:517
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
static const float wmavoice_gain_universal[64]
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
Definition: lsp.c:209
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:343
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:62
int sframe_lsp_bitsize
size (in bits) of LSPs, when encoded per superframe (residual coding)
Definition: wmavoice.c:165
static const uint8_t last_coeff[3]
Definition: qdm2data.h:257
static const struct frame_type_desc frame_descs[17]
float denoise_filter_cache[MAX_FRAMESIZE]
Definition: wmavoice.c:281
int sample_rate
samples per second
Definition: avcodec.h:2272
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:88
void AAC_RENAME() ff_sine_window_init(INTFLOAT *window, int n)
Generate a sine window.
static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1954
main external API structure.
Definition: avcodec.h:1512
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder). ...
Definition: wmavoice.c:1887
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: utils.c:1048
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> in
AVCodec ff_wmavoice_decoder
Definition: wmavoice.c:2085
int8_t vbm_tree[25]
converts VLC codes to frame type
Definition: wmavoice.c:141
int extradata_size
Definition: avcodec.h:1628
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:305
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
Definition: wmavoice.c:1413
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:2043
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:298
av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
Set up DCT.
Definition: dct.c:177
#define AV_CODEC_CAP_SUBFRAMES
Codec can output multiple frames per AVPacket Normally demuxers return one frame at a time...
Definition: avcodec.h:907
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
Definition: wmavoice.c:229
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:415
#define MAX_SFRAMESIZE
maximum number of samples per superframe
Definition: wmavoice.c:53
int lsp_q_mode
defines quantizer defaults [0, 1]
Definition: wmavoice.c:160
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
Definition: wmavoice.c:252
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.c:199
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs...
Definition: wmavoice.c:90
static av_always_inline av_const long int lrint(double x)
Definition: libm.h:148
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:83
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
Definition: wmavoice.c:477
static const float mean_lsf[10]
Definition: siprdata.h:27
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
Definition: wmavoice.c:55
#define av_log2_16bit
Definition: intmath.h:101
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
Definition: wmavoice.c:103
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
Definition: wmavoice.c:925
static int flags
Definition: cpu.c:47
RDFTContext irdft
contexts for FFT-calculation in the postfilter (for denoise filter)
Definition: wmavoice.c:269
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:182
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Definition: wmavoice.c:1755
#define M_LN10
Definition: mathematics.h:37
static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
Definition: ccaption_dec.c:521
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:69
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
Definition: wmavoice.c:1247
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Definition: wmavoice.c:99
common internal api header.
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:101
if(ret< 0)
Definition: vf_mcdeint.c:280
static av_cold void wmavoice_init_static_data(AVCodec *codec)
Definition: wmavoice.c:318
int pitch_lag
Definition: acelp_vectors.h:58
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation ...
Definition: wmavoice.c:255
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:48
int last_pitch_val
pitch value of the previous frame
Definition: wmavoice.c:227
#define AV_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding...
Definition: avcodec.h:636
void * priv_data
Definition: avcodec.h:1554
#define MAX_FRAMESIZE
maximum number of samples per frame
Definition: wmavoice.c:51
float silence_gain
set for use in blocks if ACB_TYPE_NONE
Definition: wmavoice.c:231
static const double wmavoice_mean_lsf10[2][10]
static const int16_t coeffs[]
int len
int channels
number of audio channels
Definition: avcodec.h:2273
VLC_TYPE(* table)[2]
code, bits
Definition: get_bits.h:66
av_cold void ff_dct_end(DCTContext *s)
Definition: dct.c:220
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.c:78
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> out
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
Definition: wmavoice.c:179
int max_pitch_val
max value + 1 for pitch parsing
Definition: wmavoice.c:169
#define av_uninit(x)
Definition: attributes.h:141
int lsps
number of LSPs per frame [10 or 16]
Definition: wmavoice.c:159
#define MAX_FRAMES
maximum number of frames per superframe
Definition: wmavoice.c:50
static const int8_t pulses[4]
Number of non-zero pulses in the MP-MLQ excitation.
Definition: g723_1_data.h:608
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
deliberately overlapping memcpy implementation
Definition: mem.c:442
PutBitContext pb
bitstream writer for sframe_cache
Definition: wmavoice.c:214
#define M_PI
Definition: mathematics.h:46
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
Definition: wmavoice.c:102
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
Definition: wmavoice.c:156
#define VLC_NBITS
number of bits to read per VLC iteration
Definition: wmavoice.c:57
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Definition: avfft.h:96
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
Definition: wmavoice.c:273
#define AV_CH_LAYOUT_MONO
av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
Set up a real FFT.
Definition: rdft.c:99
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
Definition: wmavoice.c:235
float min
This structure stores compressed data.
Definition: avcodec.h:1410
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:225
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
Definition: wmavoice.c:278
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
Definition: avcodec.h:857
for(j=16;j >0;--j)
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
Definition: wmavoice.c:1278
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
Definition: wmavoice.c:104
#define t2
Definition: regdef.h:30
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
Definition: wmavoice.c:52
uint16_t frame_size
the amount of bits that make up the block data (per frame)
Definition: wmavoice.c:107
#define MULH
Definition: mathops.h:42
GetBitContext gb
packet bitreader.
Definition: wmavoice.c:137
bitstream writer API