//-----------------------------------------------------------------------------
// F0 estimation based on DIO (Distributed Inline-filter Operation)
// Please see styleguide.txt to show special rules on names of variables
// and fnctions.
//-----------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "world.h"

namespace stand
{
namespace math
{
namespace dsp
{

#define MIN(a,b) (((a)<(b))?(a):(b))
#define MAX(a,b) (((a)<(b))?(b):(a))

//-----------------------------------------------------------------------------
// struct for RawEventByDio()
// "negative" means "zero-crossing point going from positive to negative"
// "positive" means "zero-crossing point going from negative to positive"
//-----------------------------------------------------------------------------
typedef struct{
  double *negative_interval_locations;
  double *negative_intervals;
  int number_of_negatives;
  double *positive_interval_locations; 
  double *positive_intervals;
  int number_of_positives;
  double *peak_interval_locations;
  double *peak_intervals;
  int number_of_peaks; 
  double *dip_interval_locations;
  double *dip_intervals;
  int number_of_dips;
} ZeroCrossings;

//-----------------------------------------------------------------------------
// InitializeDioOption allocates the memory to the struct.
// Output:
//   option   : Struct for the optional parameter.
//-----------------------------------------------------------------------------
void InitializeDioOption(DioOption *option)
{
  // You can change default parameters.
  option->channels_in_octave = 2;
  option->f0_ceil = 640;
  option->f0_floor = 80;
  option->frame_period = 5;
  // You can use from 1 to 12.
  option->speed = 11; // Default value for 44.1 kHz of fs.
}

//-----------------------------------------------------------------------------
// GetDownsampledSignal() calculates the spectrum for estimation.
// This function carries out downsampling to speed up the estimation process
// and calculates the spectrum of the downsampled signal.
// This function is only used in the OrigianlDio().
// Input:
//   x                  : Input signal
//   x_length           : Length of x
//   fs                 : Sampling frequency
//   y_length           : Length of the downsampled signal
//   fft_size           : FFT size
//   decimation_ratio   : Coefficient used for down sampling 
//                        (fs after down sampling is fs/decimation_ratio)
// Output:
//   y_spectrum         : Spectrum of the downsampled signal.
//-----------------------------------------------------------------------------
void GetSpectrumForEstimation(const double *x, int x_length, int fs, int y_length,
  int fft_size, int decimation_ratio, 
  fft_complex *y_spectrum)
{
  double *y = (double *)malloc(sizeof(double) * fft_size);

  // Downsampling
  if (decimation_ratio != 1) decimate(x, x_length, decimation_ratio, y);
  else for(int i = 0;i < x_length;i++) y[i] = x[i];

  // Removal of the DC component (y = y - mean(y))
  double meanY = 0.0;
  for(int i = 0;i < y_length;i++)  meanY += y[i];
  meanY /= (double)y_length;
  for(int i = 0;i < y_length;i++)  y[i] -= meanY;
  for(int i = y_length; i < fft_size;i++)  y[i] = 0.0;

  fft_plan forwardFFT = fft_plan_dft_r2c_1d(fft_size, y, y_spectrum, 
    FFT_ESTIMATE);
  fft_execute(forwardFFT);

  fft_destroy_plan(forwardFFT);
  free(y);
}

//-----------------------------------------------------------------------------
// GetBestF0Contour() calculates the best f0 contour based on stabilities of 
// all candidates.
// This function is only used in the OrigianlDio().
// Input: 
//   f0_length          : Length of best_f0_contour
//   f0_candidate_map   : All candidates of all temporal positions
//   f0_stability_map   : Stabilities of all f0 candidates
//   number_of_bands    : Number of variables of boundary_f0_list
// Output:
//   best_f0_contour    : Estimated best f0 contour
//-----------------------------------------------------------------------------
void GetBestF0Contour(int f0_length, double **f0_candidate_map, 
  double **f0_stability_map, int number_of_bands,
  double *best_f0_contour)
{
  double tmp;
  for(int i = 0;i < f0_length;i++)
  {
    tmp = f0_stability_map[0][i];
    best_f0_contour[i] = f0_candidate_map[0][i];
    for(int j = 1;j < number_of_bands;j++)
    {
      if(tmp > f0_stability_map[j][i])
      {
        tmp = f0_stability_map[j][i];
        best_f0_contour[i] = f0_candidate_map[j][i];
      }
    }
  }
}

//-----------------------------------------------------------------------------
// EliminateUnnaturalChange() is the 1st step of the postprocessing.
// This function eliminates the unnatural change of f0 based on allowed_range.
// This function is only used in GetFinalF0Contour().
// Input: 
//   f0_before            : Input f0 contour
//   f0_length            : Length of f0_before, best_f0 and f0_after
//   voice_range_minimum  : Unvoiced range. First voice_range_minimum sample
//                          is dealt with the unvoiced area.
//   allowed_range        : Allowable value to estimate and eliminate the 
//                          f0-change.
//   best_f0              : Current best f0 contour
// Output: 
//   f0_after             : Modified f0 contour
//-----------------------------------------------------------------------------
void EliminateUnnaturalChange(double *f0_before, int f0_length, 
  int voice_range_minimum, double allowed_range, double *best_f0_contour, 
  double *f0_after)
{
  // Initialization
  for(int i = 0;i < voice_range_minimum;i++) f0_before[i] = 0;
  for(int i = voice_range_minimum;i < f0_length-voice_range_minimum;i++) 
    f0_before[i] = best_f0_contour[i];
  for(int i = f0_length-voice_range_minimum;i < f0_length;i++) 
    f0_before[i] = 0;

  // その後跳躍防止の処理
  for(int i = 0;i < voice_range_minimum;i++) f0_after[i] = 0.0;
  for(int i = voice_range_minimum;i < f0_length;i++)
    f0_after[i] = fabs((f0_before[i]-f0_before[i-1])/
    (0.00001+f0_before[i]) ) < allowed_range ? f0_before[i] : 0.0;
}

//-----------------------------------------------------------------------------
// EliminateSuspectedF0() is the 2nd step of the postprocessing.
// This function eliminates the suspected f0 in the anlaut and auslaut.
// Furthermore, the number of voiced section is counted in this function.
// This function is only used in GetFinalF0Contour().
// Input:
//   f0_before            : Input f0 contour
//   f0_length            : Length of f0_before and f0_after
//   voice_range_minimum  : Unvoiced range. First voice_range_minimum sample
//                          is dealt with the unvoiced area.
// Output: (These parameters are used in the 3rd and 4th step)
//   positive_index       : List of the starts of the voiced section
//   negative_index       : List of the ends of the voiced section
//   positive_count       : Length of positive_index
//   negative_count       : Length of negative_index
//   f0_after             : Modified f0 contour
//-----------------------------------------------------------------------------
void EliminateSuspectedF0(double *f0_before, int f0_length, 
  int voice_range_minimum, 
  int *positive_index, int *negative_index, int *positive_count, 
  int *negative_count, double *f0_after)
{
  for(int i = 0;i < f0_length;i++) f0_after[i] = f0_before[i];
  for(int i = voice_range_minimum;i < f0_length;i++)
  {
    for(int j = 0;j < voice_range_minimum;j++)
    {
      if(f0_before[i-j] == 0 || f0_before[i+j] == 0)
      {
        f0_after[i] = 0.0;
        break;
      }
    }
  }

  *positive_count = *negative_count = 0;
  for(int i = 1;i < f0_length;i++)
  {
    if(f0_after[i] == 0 && f0_after[i-1] != 0)
      negative_index[(*negative_count)++] = i-1;
    else if (f0_after[i-1] == 0 && f0_after[i] != 0)
      positive_index[(*positive_count)++] = i;
  }
}

//-----------------------------------------------------------------------------
// ForwardCorrection() is the 3rd step of the postprocessing.
// This function corrects the f0 candidates from backward to forward.
// This function is only used in GetFinalF0Contour().
// Input:
//   f0_before              : Input f0 contour
//   f0_length              : Length of f0_before and f0_after
//   f0_map                 : All f0 candidates in each temporal position
//   number_of_candidates   : Number of f0 candidates in each tempral position
//   allowed_range          : Allowable value to estimate and eliminate the 
//                            f0-change.
//   positive_index         : List of the starts of the voiced section
//   negative_index         : List of the ends of the voiced section
//   negative_count         : Length of negative_index
// Output:
//   f0_after               : Corrected f0 contour
//-----------------------------------------------------------------------------
void ForwardCorrection(double *f0_before, int f0_length, double **f0_map, 
  int number_of_candidates, double allowed_range, int *positive_index, 
  int *negative_index, int negative_count, 
  double *f0_after)
{
  for(int i = 0;i < f0_length;i++) f0_after[i] = f0_before[i];

  double reference_value1, reference_value2, minimum_error, error_value;
  for(int i = 0;i < negative_count;i++)
  {
    for(int j = negative_index[i];j < f0_length-1;j++)
    {
      reference_value1 = f0_after[j]*2 - f0_after[j-1];
      reference_value2 = f0_after[j];
      minimum_error = MIN(fabs(reference_value1 - f0_map[0][j+1]), 
        fabs(reference_value2 - f0_map[0][j+1]));
      for(int k = 1;k < number_of_candidates;k++)
      {
        error_value = MIN(fabs(reference_value1 - f0_map[k][j+1]), 
          fabs(reference_value2 - f0_map[k][j+1]));
        if(error_value < minimum_error)
        {
          minimum_error = error_value;
          f0_after[j+1] = f0_map[k][j+1];
        }
      }
      if(MIN(minimum_error / (reference_value1+0.0001), minimum_error / 
        (reference_value2+0.0001)) > allowed_range)
      {
        f0_after[j+1] = 0.0;
        break;
      }
      if(i != negative_count && j == positive_index[i+1]-1)
      {
        negative_index[j] = j;
        break;
      }
    }
  }
}

//-----------------------------------------------------------------------------
// BackwardCorrection() is the 4th step of the postprocessing.
// This function corrects the f0 candidates from forward tobackward.
// This function is only used in GetFinalF0Contour().
// Input:
//   f0_before              : Input f0 contour
//   f0_length              : Length of f0_before and f0_after
//   f0_map                 : All f0 candidates in each temporal position
//   number_of_candidates   : Number of f0 candidates in each tempral position
//   allowed_range          : Allowable value to estimate and eliminate the 
//                            f0-change.
//   positive_index         : List of the starts of the voiced section
//   negative_index         : List of the ends of the voiced section
//   positive_count         : Length of negative_index
// Output:
//   f0_after               : Corrected f0 contour
//-----------------------------------------------------------------------------
void BackwardCorrection(double *f0_before, int f0_length, double **f0_map, 
  int number_of_candidates, double allowed_range, int *positive_index, 
  int *negative_index, int positive_count, 
  double *f0_after)
{
  for(int i = 0;i < f0_length;i++) f0_after[i] = f0_before[i];
  double reference_value1, reference_value2, minimum_error, error_value;
  for(int i = positive_count-1;i >= 0;i--)
  {
    for(int j = positive_index[i]+1;j > 1;j--)
    {
      reference_value1 = f0_after[j]*2 - f0_after[j-1];
      reference_value2 = f0_after[j];
      minimum_error = MIN(fabs(reference_value1 - f0_map[0][j+1]), 
        fabs(reference_value2 - f0_map[0][j+1]));
      for(int k = 1;k < number_of_candidates;k++)
      {
        error_value = MIN(fabs(reference_value1 - f0_map[k][j-1]), 
          fabs(reference_value2 - f0_map[k][j-1]));
        if(MIN(minimum_error / (reference_value1+0.0001), 
          minimum_error / (reference_value2+0.0001)) > allowed_range)
        {
          minimum_error = error_value;
          f0_after[j-1] = f0_map[k][j-1];
        }
      }
      if(MIN(minimum_error / (reference_value1+0.0001), minimum_error / 
        (reference_value2+0.0001)) > allowed_range)
      {
        f0_after[j-1] = 0.0;
        break;
      }
      if(i != 0 && j == negative_index[i-1]+1) break;
    }
  }
}

//-----------------------------------------------------------------------------
// EliminateInvalidVoicedSection() is the final step of the postprocessing.
// This function eliminates the voiced section whose the
// duration is under 50 msec.
// This function is only used in GetFinalF0Contour().
// Input:
//   f0_before            : Input f0 contour
//   f0_length            : Length of f0_before and f0_after
//   voice_range_minimum  : Unvoiced range. First voice_range_minimum sample
//                          is dealt with the unvoiced area.
// Output: 
//   f0_after             : Corrected f0 contour
//-----------------------------------------------------------------------------
void EliminateInvalidVoicedSection(double *f0_before, int f0_length, 
  int voice_range_minimum, 
  double *f0_after)
{
  for(int i = 0;i < f0_length;i++) f0_after[i] = f0_before[i];

  int current_index, j;
  for(int i = 0;i < f0_length;i++)
  {
    if(f0_before[i] == 0.0) continue;
    current_index = i;
    for(j = current_index;j < f0_length;j++) 
      if(f0_before[j] == 0.0) break;
    i = j;
    if((j-current_index) > voice_range_minimum) continue;
    for(;j >= current_index;j--) f0_after[j] = 0.0;
  }
}

//-----------------------------------------------------------------------------
// GetFinalF0Contour() calculates the optimal f0 contour based on all f0 
// candidates and the f0 contour calculated by the fundamental-ness.
// Input:
//   frame_period           : Temporal interval of the time axis
//   number_of_candidates   : Number of f0 candidates in each tempral position
//   fs                     : Sampling frequency
//   f0_map                 : All f0 candidates in each temporal position
//   best_f0                : Current best f0 contour
//   f0_length              : Length of best_f0
// Output:
//   final_f0               : Optimal f0 contour. Length equals f0_length.
//-----------------------------------------------------------------------------
void GetFinalF0Contour(double frame_period, int number_of_candidates, int fs, 
  double **f0_map, double *best_f0_contour, int f0_length, 
  double *final_f0_contour)
{
  // ±50 msecは信用しない．
  int voice_range_minimum = (int)(0.5 + 50 / frame_period); 
  // frame_period依存にするか否かは議論がある．
  double allowed_range = 0.1 * frame_period/5.0; 

  double *f0_tmp1 = (double *)malloc(sizeof(double) * f0_length);
  double *f0_tmp2 = (double *)malloc(sizeof(double) * f0_length);

  EliminateUnnaturalChange(f0_tmp1, f0_length, voice_range_minimum, 
    allowed_range, best_f0_contour, f0_tmp2);
  int *positive_index = (int *)malloc(sizeof(int) * f0_length);
  int *negative_index = (int *)malloc(sizeof(int) * f0_length);
  int positive_count, negative_count;
  EliminateSuspectedF0(f0_tmp2, f0_length, voice_range_minimum, 
    positive_index, negative_index, &positive_count, &negative_count, 
    f0_tmp1);
  ForwardCorrection(f0_tmp1, f0_length, f0_map, number_of_candidates, 
    allowed_range, positive_index, negative_index, negative_count, 
    f0_tmp2);
  BackwardCorrection(f0_tmp2, f0_length, f0_map, number_of_candidates, 
    allowed_range, positive_index, negative_index, positive_count, 
    f0_tmp1);

  EliminateInvalidVoicedSection(f0_tmp1, f0_length, voice_range_minimum, 
    final_f0_contour);
  
  free(f0_tmp1); free(f0_tmp2);
  free(positive_index);
  free(negative_index);
}

//-----------------------------------------------------------------------------
// NuttallWindow() calculates the coefficients of Nuttall window whose length 
// is y_length.
// Input: 
//   y_length   : Length of the window
// Output:
//   y          : Calculated window.
//-----------------------------------------------------------------------------
void NuttallWindow(int y_length, 
  double *y)
{
  double tmp;
  for(int i = 0;i < y_length;i++)
  {
    tmp  = ((double)(i+1) - (double)(y_length+1)/2.0) / 
      (double)(y_length+1);
    y[i] = 0.355768 + 0.487396*cos(2*PI*tmp) + 
      0.144232*cos(4*PI*tmp) + 0.012604*cos(6*PI*tmp);
  }
}

//-----------------------------------------------------------------------------
// GetFilteredSignal() calculates the signal that is the convolution of the 
// input signal and low-pass filter. 
// This function is only used in RawEventByDio()
// Input: 
//   half_average_length  : Length of low-pass filter (cutoff frequency)
//   fft_size             : Length of FFT
//   x_spectrum           : Spectrum of the input waveform x
//   x_length             : Length of x
// Output:
//   filtered_signal      : Filtered signal
//-----------------------------------------------------------------------------
void GetFilteredSignal(int half_average_length, int fft_size, 
  fft_complex *x_spectrum, int x_length, 
  double *filtered_signal)
{
  double *low_pass_filter = (double *)malloc(sizeof(double) * fft_size);
  for(int i = half_average_length*2;i < fft_size;i++) 
    low_pass_filter[i] = 0.0;
  // Nuttall window is used as a low-pass filter.
  // Cutoff frequency depends on the window length.
  NuttallWindow(half_average_length*4, low_pass_filter);

  fft_complex  *low_pass_filter_spectrum = 
    (fft_complex *)malloc(sizeof(fft_complex) * fft_size);
  fft_plan forwardFFT = fft_plan_dft_r2c_1d(fft_size, low_pass_filter, 
    low_pass_filter_spectrum, FFT_ESTIMATE);
  fft_execute(forwardFFT); 

  // Convolution
  double tmp = x_spectrum[0][0]*low_pass_filter_spectrum[0][0] - 
    x_spectrum[0][1]*low_pass_filter_spectrum[0][1];
  low_pass_filter_spectrum[0][1] = 
    x_spectrum[0][0]*low_pass_filter_spectrum[0][1] + 
    x_spectrum[0][1]*low_pass_filter_spectrum[0][0];
  low_pass_filter_spectrum[0][0] = tmp;
  for(int i = 1;i <= fft_size/2;i++)
  {
    tmp = x_spectrum[i][0]*low_pass_filter_spectrum[i][0] - 
      x_spectrum[i][1]*low_pass_filter_spectrum[i][1];
    low_pass_filter_spectrum[i][1] = 
      x_spectrum[i][0]*low_pass_filter_spectrum[i][1] + 
      x_spectrum[i][1]*low_pass_filter_spectrum[i][0];
    low_pass_filter_spectrum[i][0] = tmp;
    low_pass_filter_spectrum[fft_size-i-1][0] = 
      low_pass_filter_spectrum[i][0];
    low_pass_filter_spectrum[fft_size-i-1][1] = 
      low_pass_filter_spectrum[i][1];
  }

  fft_plan inverseFFT = fft_plan_dft_c2r_1d(fft_size, 
    low_pass_filter_spectrum, filtered_signal, FFT_ESTIMATE);
  fft_execute(inverseFFT);

  // Compensation of the delay.
  int index_bias = half_average_length*2;
  for(int i = 0;i < x_length;i++) 
    filtered_signal[i] = filtered_signal[i+index_bias];

  fft_destroy_plan(inverseFFT);  
  fft_destroy_plan(forwardFFT);
  free(low_pass_filter_spectrum);
  free(low_pass_filter);
}

//-----------------------------------------------------------------------------
// CheckEvent() returns 1, provided that the input value is over 1.
// This function is for RawEventByDio().
//-----------------------------------------------------------------------------
inline int CheckEvent(int x)
{
  return x > 0 ? 1 : 0;
}

//-----------------------------------------------------------------------------
// ZeroCrossingEngine() calculates the zero crossing points from positive to 
// negative. Thanks to Custom.Maid http://custom-made.seesaa.net/ (2012/8/19)
// This function is only used in RawEventByDio().
// Input: 
//   x                    : Input vector
//   x_length             : Length of x
//   fs                   : Sampling frequency
// Output:
//   interval_locations   : List of interval locatoins.
//   intervals            : Values of intervals
//   Number of locations
//-----------------------------------------------------------------------------
int ZeroCrossingEngine(double *x, int x_length, double fs,
  double *interval_locations, double *intervals)
{
  int *negative_going_points = (int *)malloc(sizeof(int) * x_length);

  for(int i = 0;i < x_length-1;i++) 
    negative_going_points[i] = (0.0 < x[i]) && (x[i+1] <= 0.0) ? i+1:0;
  negative_going_points[x_length-1] = 0;

  int *edges = (int *)malloc(sizeof(int) * x_length);
  int count = 0;
  for(int i = 0;i < x_length;i++)
    if(negative_going_points[i] > 0) 
      edges[count++] = negative_going_points[i];

  if(count < 2) 
  {
    free(edges);
    free(negative_going_points);
    return 0;
  }

  double *fine_edges = (double *)malloc(sizeof(double) * count);
  for(int i = 0;i < count;i++)
    fine_edges[i] = (double)edges[i] - x[edges[i]-1] / 
    (x[edges[i]]-x[edges[i]-1]);

  for(int i = 0;i < count-1;i++)
  {
    intervals[i] = fs / (fine_edges[i+1] - fine_edges[i]);
    interval_locations[i] = (fine_edges[i]+fine_edges[i+1])/2.0/fs;
  }

  free(fine_edges);
  free(edges);
  free(negative_going_points);
  return count;
}

//-----------------------------------------------------------------------------
// GetFourZeroCrossingIntervals() calculates four zero-crossing intervals.
// (1) Zero-crossing going from negative to positive.
// (2) Zero-crossing going from positive to negative.
// (3) Peak, and (4) dip. (3) and (4) are calculated from the zero-crossings of 
// the differential of waveform.
// Input:
//   filtered_signal  : Signal filtered by the low-pass filter
//   x_length         : Length of filtered_signal
//   fs               : Sampling frequency
// Output:
//   zero_crossings   : Struct to store the four zero-crossing intervals
//-----------------------------------------------------------------------------
void GetFourZeroCrossingIntervals(double *filtered_signal, 
  int x_length, double fs, 
  ZeroCrossings *zero_crossings)
{
  zero_crossings->negative_interval_locations = 
    (double *)malloc(sizeof(double) * (int)(x_length/4));
  zero_crossings->positive_interval_locations = 
    (double *)malloc(sizeof(double) * (int)(x_length/4));
  zero_crossings->peak_interval_locations = 
    (double *)malloc(sizeof(double) * (int)(x_length/4));
  zero_crossings->dip_interval_locations = 
    (double *)malloc(sizeof(double) * (int)(x_length/4));
  zero_crossings->negative_intervals = 
    (double *)malloc(sizeof(double) * (int)(x_length/4));
  zero_crossings->positive_intervals = 
    (double *)malloc(sizeof(double) * (int)(x_length/4));
  zero_crossings->peak_intervals = 
    (double *)malloc(sizeof(double) * (int)(x_length/4));
  zero_crossings->dip_intervals = 
    (double *)malloc(sizeof(double) * (int)(x_length/4));

  zero_crossings->number_of_negatives = ZeroCrossingEngine(filtered_signal, 
    x_length, fs, zero_crossings->negative_interval_locations, 
    zero_crossings->negative_intervals);

  for(int i = 0;i < x_length;i++) filtered_signal[i] = -filtered_signal[i];
  zero_crossings->number_of_positives = ZeroCrossingEngine(filtered_signal, 
    x_length, fs, zero_crossings->positive_interval_locations, 
    zero_crossings->positive_intervals);

  for(int i = 0;i < x_length-1;i++) filtered_signal[i] = 
    filtered_signal[i]-filtered_signal[i+1];
  zero_crossings->number_of_peaks = ZeroCrossingEngine(filtered_signal, 
    x_length-1, fs, zero_crossings->peak_interval_locations, 
    zero_crossings->peak_intervals);

  for(int i = 0;i < x_length-1;i++) filtered_signal[i] = -filtered_signal[i];
  zero_crossings->number_of_dips = ZeroCrossingEngine(filtered_signal, 
    x_length-1, fs, zero_crossings->dip_interval_locations, 
    zero_crossings->dip_intervals);

  return;
}

//-----------------------------------------------------------------------------
// GetF0Candidates() calculates the F0 candidates based on the zero-crossings.
// Input:
//   zero_crossings   : Struct to store four zero-crossing points
//   boundary_f0      : Cutoff frequency of the low-pass filter
//   f0_floor         : Lower limit of the estimated f0 candidate
//   f0_ceil          : Upper limit of the estimated f0 candidate
//   time_axis        : Temporal positions used for the f0 estimation
//   time_axis_length : Length of time_axis
// Output:
//   f0_candidates    : Time sequence of the estimated f0
//   f0_deviations    : Fundamental-ness of each f0 candidate
//-----------------------------------------------------------------------------
void GetF0Candidates(ZeroCrossings *zero_crossings, double boundary_f0, 
  double f0_floor, double f0_ceil, double *time_axis, int time_axis_length, 
  double *f0_candidates, double *f0_deviations)
{
  if(0 == CheckEvent(zero_crossings->number_of_negatives-2) * 
    CheckEvent(zero_crossings->number_of_positives-2) * 
    CheckEvent(zero_crossings->number_of_peaks-2) * 
    CheckEvent(zero_crossings->number_of_dips-2)) 
  {
    for(int i = 0;i < time_axis_length;i++)
    {
      f0_deviations[i] = 100000.0;
      f0_candidates[i] = 0.0;
    }
    return;
  }

  double *interpolated_f0_set[4];
  for(int i = 0;i < 4;i++)
    interpolated_f0_set[i] = 
    (double *)malloc(sizeof(double) * time_axis_length);

  interp1(zero_crossings->negative_interval_locations, 
    zero_crossings->negative_intervals, 
    zero_crossings->number_of_negatives, 
    time_axis, time_axis_length, interpolated_f0_set[0]);
  interp1(zero_crossings->positive_interval_locations, 
    zero_crossings->positive_intervals, 
    zero_crossings->number_of_positives, 
    time_axis, time_axis_length, interpolated_f0_set[1]);
  interp1(zero_crossings->peak_interval_locations, 
    zero_crossings->peak_intervals, zero_crossings->number_of_peaks, 
    time_axis, time_axis_length, interpolated_f0_set[2]);
  interp1(zero_crossings->dip_interval_locations, 
    zero_crossings->dip_intervals, zero_crossings->number_of_dips, 
    time_axis, time_axis_length, interpolated_f0_set[3]);

  for(int i = 0;i < time_axis_length;i++)
  {
    f0_candidates[i] = (interpolated_f0_set[0][i] + 
      interpolated_f0_set[1][i] + interpolated_f0_set[2][i] + 
      interpolated_f0_set[3][i]) / 4.0;

    f0_deviations[i]   = sqrt( (
      (interpolated_f0_set[0][i]-f0_candidates[i])*
      (interpolated_f0_set[0][i]-f0_candidates[i]) + 
      (interpolated_f0_set[1][i]-f0_candidates[i]) * 
      (interpolated_f0_set[1][i]-f0_candidates[i]) + 
      (interpolated_f0_set[2][i]-f0_candidates[i]) * 
      (interpolated_f0_set[2][i]-f0_candidates[i]) + 
      (interpolated_f0_set[3][i]-f0_candidates[i]) * 
      (interpolated_f0_set[3][i]-f0_candidates[i])) / 3.0);

    if(f0_candidates[i] > boundary_f0 || 
      f0_candidates[i] < boundary_f0/2.0 || 
      f0_candidates[i] > f0_ceil || f0_candidates[i] < f0_floor)
    {
      f0_candidates[i] = 0.0;
      f0_deviations[i]   = 100000.0;
    }
  }
  for(int i = 0;i < 4;i++) free(interpolated_f0_set[i]);
}

//-----------------------------------------------------------------------------
// DestroyZeroCrossings() frees the memory of array in the struct
// Input:
//   zero_crossings : Struct to keep the memory.
// Caution:
//   This function must be called after GetFourZeroCrossingIntervals().
//-----------------------------------------------------------------------------
void DestroyZeroCrossings(ZeroCrossings *zero_crossings)
{
  free(zero_crossings->negative_interval_locations); 
  free(zero_crossings->positive_interval_locations); 
  free(zero_crossings->peak_interval_locations); 
  free(zero_crossings->dip_interval_locations);
  free(zero_crossings->negative_intervals); 
  free(zero_crossings->positive_intervals); 
  free(zero_crossings->peak_intervals); 
  free(zero_crossings->dip_intervals);
}

//-----------------------------------------------------------------------------
// RawEventByDio() calculates the zero-crossing 
// This function is only used in OriginalDio().
// Input: 
//   boundary_f0      : Cutoff frequency of the low-pass filter.
//   fs               : Sampling frequency
//   x_spectrum       : Spectrum of the input waveform x
//   x_length         : Length of x
//   fft_size         : Length of FFT
//   f0_floor         : Fower limit of the estimated f0
//   f0_ceil          : Upper limit of the estimated f0
//   time_axis        : Temporal positions used for estimating f0
//   time_axis_length : Length of time_axis
// Output:
//   f0_deviations    : Standard deviation of four intervals
//   interpolated_f0  : Estimated F0 contour
//-----------------------------------------------------------------------------
void RawEventByDio(double boundary_f0, double fs, fft_complex *x_spectrum, 
  int x_length, int fft_size, double f0_floor, double f0_ceil, 
  double *time_axis, int time_axis_length, 
  double *f0_deviations, double *f0_candidates)
{
  double *filtered_signal = (double *)malloc(sizeof(double) * fft_size);
  GetFilteredSignal((int)(fs / boundary_f0 / 2 + 0.5), fft_size, x_spectrum, 
    x_length, filtered_signal);

  ZeroCrossings zero_crossings;
  memset(&zero_crossings, 0, sizeof(zero_crossings));
  GetFourZeroCrossingIntervals(filtered_signal, x_length, fs, 
    &zero_crossings);

  GetF0Candidates(&zero_crossings, boundary_f0, f0_floor, f0_ceil, 
    time_axis, time_axis_length, f0_candidates, f0_deviations);

  DestroyZeroCrossings(&zero_crossings);  
  free(filtered_signal);
}

//-----------------------------------------------------------------------------
// GetF0CandidateAndStabilityMap() calculates all f0 candidates and 
// their stabilities.
// This function is only used in the OrigianlDio().
// Input:
//   boundary_f0_list     : cutoff frequencies of low-pass filter
//   number_of_bands      : Number of variables of boundary_f0_list
//   fs_after_downsamping : Sampling frequency after downsampling
//   y_length             : Length of downsampled signal
//   time_axis            : Temporal positions used for the f0 estimation
//   f0_length            : Length of time_axis
//   frame_period         : Temporal inverval of time_axis
//   y_spectrum           : Spectrum of the downsampled signal
//   fft_size             : FFT size used to calculate y_spectrum
//   f0_floor             : Lower limit of the f0 candidate
//   f0_ceil              : Upper limit of the f0 candidate
// Output: 
//   f0_candidate_map     : f0 candidates of all temporal posotions
//   f0_stability_map     : Stabilities of all f0 candidates
//-----------------------------------------------------------------------------
void GetF0CandidateAndStabilityMap(double *boundary_f0_list, 
  int number_of_bands, double fs_after_downsampling, int y_length, 
  double *time_axis, int f0_length, fft_complex *y_spectrum, int fft_size, 
  double f0_floor, double f0_ceil,
  double **f0_candidate_map, double **f0_stability_map)
{
  double * f0_candidates = (double *) malloc(sizeof(double) * f0_length);
  double * f0_deviations = (double *) malloc(sizeof(double) * f0_length);

  // Calculation of the acoustics events (zero-crossing)
  for(int i = 0;i < number_of_bands;i++)
  {
    RawEventByDio(boundary_f0_list[i], fs_after_downsampling, y_spectrum, 
      y_length, fft_size, f0_floor, f0_ceil, time_axis, f0_length, 
      f0_deviations, f0_candidates);
    for(int j = 0;j < f0_length;j++)
    {
      f0_stability_map[i][j] = f0_deviations[j] / 
        (f0_candidates[j]+0.00000001);
      f0_candidate_map[i][j] = f0_candidates[j];
    }
  }
  free(f0_candidates);
  free(f0_deviations);
}

//-----------------------------------------------------------------------------
// GetSamplesForDIO() calculates the number of samples required for Dio().
// Input:
//   fs             : Sampling frequency [Hz]
//   x_length       : Length of the input signal [Sample].
//   frame_period   : Frame shift [msec]
// Output:
//   The number of samples required to store the results of Dio()
//-----------------------------------------------------------------------------
int GetSamplesForDIO(int fs, int x_length, double frame_period)
{
  return (int)((double)x_length / (double)fs / (frame_period/1000.0) ) + 1;
}

//-----------------------------------------------------------------------------
// OriginalDio() estimates the F0 based on Distributed Inline-filter Operation.
// Input:
//   x                  : Input signal
//   x_length           : Length of x
//   fs                 : Sampling frequency
//   frame_period       : Frame shift
//   f0_floor           : Lower limit of f0 candidates
//   f0_ceil            : Upper limit of f0 candidates
//   channels_in_octave : Number of filters used to extract fundamental 
//                        component.
//   speed              : Downsampling factor. 1 is the slowest, 
//                        12 is the fastest
// Output
//   time_axis          : Temporal positions used for estimation
//   f0                 : Estimated f0 contour.
//-----------------------------------------------------------------------------
void OriginalDio(const double *x, int x_length, int fs, double frame_period,
  double f0_floor, double f0_ceil, double channels_in_octave, int speed,
  double *time_axis, double *f0)
{
  // Calculation of fundamental parameters
  int number_of_bands = 1 + (int)(log((double)f0_ceil / 
    (double)f0_floor)/log(2.0) * channels_in_octave);
  double * boundary_f0_list = 
    (double *)malloc(sizeof(double) * number_of_bands);
  for(int i = 0;i < number_of_bands;i++)
    boundary_f0_list[i] = f0_floor*pow(2.0, i/channels_in_octave);

  int decimation_ratio = MAX(MIN(speed, 12), 1); // normalization;
  int y_length = (1 + (int)(x_length/decimation_ratio));
  int fft_size = (int)pow(2.0, 1.0 + (int)(log((double)y_length + 
    (double)(4*(int)(1.0 + (double)fs/boundary_f0_list[0]/2.0)) ) / 
    log(2.0)));

  // Calculation of the spectrum used for the f0 estimation
  fft_complex  *y_spectrum = 
    (fft_complex *)malloc(sizeof(fft_complex) * fft_size);
  GetSpectrumForEstimation(x, x_length, fs, y_length, fft_size, 
    decimation_ratio, y_spectrum);

  // f0map represents all F0 candidates. We can modify them.
  double **f0_candidate_map = 
    (double **)malloc(sizeof(double *) * number_of_bands);
  double **f0_stability_map = 
    (double **)malloc(sizeof(double *) * number_of_bands);
  int  f0_length = GetSamplesForDIO(fs, x_length, frame_period);
  for(int i = 0;i < number_of_bands;i++)
  {
    f0_candidate_map[i]  = (double *)malloc(sizeof(double) * f0_length);
    f0_stability_map[i] = (double *)malloc(sizeof(double) * f0_length);
  }

  for(int i = 0;i < f0_length;i++) 
    time_axis[i] = (double)i * frame_period/1000.0;

  double fs_after_downsampling = (double)fs/(double)decimation_ratio;
  GetF0CandidateAndStabilityMap(boundary_f0_list, number_of_bands, 
    fs_after_downsampling, y_length, time_axis, f0_length, y_spectrum, 
    fft_size, f0_floor, f0_ceil, f0_candidate_map, f0_stability_map);

  // Selection of the best value based on fundamental-ness.
  double *best_f0_contour = (double *)malloc(sizeof(double) * f0_length);
  GetBestF0Contour(f0_length, f0_candidate_map, f0_stability_map, 
    number_of_bands, best_f0_contour);

  // Postprocessing to find the best f0-contour.
  GetFinalF0Contour(frame_period, number_of_bands, fs, f0_candidate_map, 
    best_f0_contour, f0_length, f0);

  // お片づけ(メモリの開放)
  free(best_f0_contour);
  free(y_spectrum);
  for(int i = 0;i < number_of_bands;i++)
  {
    free(f0_stability_map[i]);
    free(f0_candidate_map[i]);
  }
  free(f0_stability_map);
  free(f0_candidate_map);
  free(boundary_f0_list);
}

//-----------------------------------------------------------------------------
// DIO (vertion 0.1.1)
// Input:
//   x          : Input signal
//   x_length   : Length of x
//   fs         : Sampling frequency
//   option     : Struct to order the parameter for DIO
// Output:
//   time_axis  : Temporal positions.
//   f0         : F0 contour.
//-----------------------------------------------------------------------------
void Dio(const double *x, int x_length, int fs, DioOption option,
  double *time_axis, double *f0)
{
  OriginalDio(x, x_length, fs, option.frame_period, option.f0_floor, 
    option.f0_ceil, option.channels_in_octave, option.speed, time_axis, 
    f0); 
}

//-----------------------------------------------------------------------------
// DIO (version 0.1.0)
// You can only change the parameter "frame_period". If you want to change 
// other parameters, you should use latest Dio().
// This version will be destroyed in the future.
// Input:
//   x              : Input signal
//   x_length       : Length of x
//   fs             : Sampling frequency
//   frame_period   : 
// Output:
//   time_axis      : Temporal positions.
//   f0             : F0 contour.
//-----------------------------------------------------------------------------
void Dio(const double *x, int x_length, int fs, double frame_period,
  double *time_axis, double *f0)
{
  double target_fs = 4000;
  int decimation_ratio = (int)(fs/target_fs);
  OriginalDio(x, x_length, fs, frame_period, 80, 640, 2, decimation_ratio, time_axis, f0);
}

}
}
}
