/*
 * Author: Andrei Zavada <johnhommer@gmail.com>
 *
 * License: GPL-2+
 *
 * Initial version: 2008-11-11
 *
 */



#include <unistd.h>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <fstream>
#include <sstream>
#include <limits>
#include <stdexcept>
#include <vector>
#include <valarray>
#include <numeric>

using namespace std;


typedef vector<double>::iterator vd_i;
typedef vector<unsigned>::iterator vu_i;


enum TConvType {
	SDFF_CMP_NONE,
	SDFF_CMP_SQDIFF,
	SDFF_CMP_WEIGHT
};

enum TCFOpType {
	SDFF_CFOP_AVG,
	SDFF_CFOP_PROD,
	SDFF_CFOP_SUM
};


struct SOptions {
	const char
		*working_dir,
		*target_profiles_dir,
		*grand_target_fname,
		*grand_result_fname;
	vector<string>
		units;
	TCFOpType
		cf_op_type;
	vector<unsigned>
		dims;
	bool	go_sdf:1,
		use_shf:1,
		do_normalise:1,
		do_matrix_output:1,
		do_column_output:1,
		assume_no_shf_value:1,
		assume_generic_data:1,
		assume_no_timepoint:1,
		octave_compat:1,
		verbosely:1;
	double	sample_from,
		sample_period,
		sample_window;
	unsigned
		field_n,
		of_fields,
		skipped_first_lines;
	TConvType
		conv_type;

	SOptions()
	      : working_dir ("."),
		target_profiles_dir ("."),
		grand_target_fname ("overall.target"),
		grand_result_fname (nullptr),
		cf_op_type (SDFF_CFOP_AVG),
		go_sdf (true),
		use_shf (false),
		do_normalise (false),
		do_matrix_output (true),
		do_column_output (false),
		assume_no_shf_value (false),
		assume_generic_data (true),
		assume_no_timepoint (false),
		octave_compat (false),
		verbosely (true),
		sample_from (0),
		sample_period (0),
		sample_window (0),
		field_n (1),
		of_fields (1),
		skipped_first_lines (0),
		conv_type (SDFF_CMP_NONE)
		{}
};

static SOptions Options;

//static size_t dim_prod;

static int get_unit_cf( const char *unit_label, valarray<double> &Mi, double *result);

static int parse_cmdline( int argc, char *argv[]);
static void usage( const char *argv0);

#define SDFCAT_EARGS		-1
#define SDFCAT_EHELPREQUEST	-2
#define SDFCAT_EFILES		-3
#define SDFCAT_ERANGES		-4


static int read_matrices_from_sxf( const char* fname, valarray<double> &M, valarray<double> &H, double *sdf_max_p = nullptr);
static int construct_matrix_from_var( const char* fname, valarray<double> &M);
static int read_matrix( const char*, valarray<double>&);
static int write_matrix( const char*, const valarray<double>&);
static double convolute_matrix_against_target( const valarray<double>&, const valarray<double>&);



int
main( int argc, char *argv[])
{
	int retval = 0;

	if ( argc == 1 ) {
		usage( argv[0]);
		return SDFCAT_EARGS;
	}

	{
		int parse_retval = parse_cmdline( argc, argv);
		if ( parse_retval ) {
			if ( parse_retval == SDFCAT_EHELPREQUEST )
				usage( argv[0]);
			return -1;
		}

		if ( Options.assume_no_shf_value && Options.use_shf ) {
			cerr << "Conflicting options (-H and -H-)\n";
			return -1;
		}
	}

      // cd as requested
	char *pwd = nullptr;
	if ( Options.working_dir ) {
		pwd = getcwd( nullptr, 0);
		if ( chdir( Options.working_dir) ) {
			fprintf( stderr, "Failed to cd to \"%s\"\n", Options.working_dir);
			return -2;
		}
	}


//	vector<double> unit_CFs;

	size_t dim_prod = accumulate( Options.dims.begin(), Options.dims.end(), 1., multiplies<double>());
	valarray<double>
		Mi (dim_prod), Mi_valid_cases (dim_prod),
		G  (dim_prod), G_valid_cases  (dim_prod);

	for ( vector<string>::iterator uI = Options.units.begin(); uI != Options.units.end(); uI++ ) {
		double CFi;
		if ( get_unit_cf( uI->c_str(), Mi, &CFi) )  // does its own convolution
			return -4;

		for ( size_t i = 0; i < dim_prod; i++ )
			if ( !isfinite( Mi[i]) )
				Mi[i] = (Options.cf_op_type == SDFF_CFOP_PROD) ? 1. : 0.;
			else
				G_valid_cases[i]++;

		switch ( Options.cf_op_type ) {
		case SDFF_CFOP_SUM:
		case SDFF_CFOP_AVG:
			G += Mi;
		    break;
		case SDFF_CFOP_PROD:
			G *= Mi;
		    break;
		}

		if ( Options.conv_type != SDFF_CMP_NONE ) {
			ofstream o( (*uI)+".CF");
			o << CFi << endl;
		}
	}

	// for ( size_t i = 0; i < dim_prod; i++ )
	// 	if ( G_valid_cases[i] == 0. )
	// 		G_valid_cases[i] = 1;

	if ( Options.cf_op_type == SDFF_CFOP_AVG )
		G /= G_valid_cases; // Options.units.size();

	if ( Options.units.size() > 1 || Options.grand_result_fname ) {

		string grand_total_bname (Options.grand_result_fname ? Options.grand_result_fname
					  : (Options.cf_op_type == SDFF_CFOP_AVG)
					  ? "AVERAGE"
					   : (Options.cf_op_type == SDFF_CFOP_SUM)
					   ? "SUM" : "PRODUCT");
		write_matrix( grand_total_bname.c_str(), G);

		if ( Options.conv_type != SDFF_CMP_NONE ) {
			valarray<double> T (dim_prod);
			if ( read_matrix( (string(Options.target_profiles_dir) + '/' + Options.grand_target_fname).c_str(), T) )
				return -4;
			double grandCF = convolute_matrix_against_target( G, T);

			ofstream grand_CF_strm ((grand_total_bname + ".CF").c_str());
			grand_CF_strm << grandCF << endl;
		}
	}

	if ( pwd )
		if ( chdir( pwd) )
			;

	return retval;
}














static int
get_unit_cf( const char* ulabel, valarray<double> &M, double *result_p)
{
	valarray<double> H (M.size()), T (M.size());

	string eventual_fname;
	if ( Options.go_sdf ) {
		if ( (Options.assume_generic_data = true,
		      read_matrices_from_sxf( (eventual_fname = ulabel).c_str(), M, H)) &&

		     (Options.assume_generic_data = false,
		      read_matrices_from_sxf( (eventual_fname = string(ulabel) + ".sxf").c_str(), M, H)) &&

		     (Options.assume_no_shf_value = true, Options.use_shf = false,
		      read_matrices_from_sxf( (eventual_fname = string(ulabel) + ".sdf").c_str(), M, H)) ) {

			fprintf( stderr, "Failed to read data from\"%s\" or \"%s.s{x,d}f\"\n", ulabel, ulabel);
			return -2;
		}
	} else  // go var
		if ( construct_matrix_from_var( (eventual_fname = ulabel).c_str(), M) &&
		     construct_matrix_from_var( (eventual_fname = string(ulabel) + ".var").c_str(), M) ) {

			fprintf( stderr, "Failed to read \"%s.var\"\n", ulabel);
			return -2;
		}

	if ( (Options.do_matrix_output || Options.do_column_output)
	     && Options.dims.size() == 2 ) {  // only applicable to 2-dim matrices

		write_matrix( eventual_fname.c_str(), M);
		if ( Options.use_shf )
			write_matrix( (string(ulabel) + "(shf)").c_str(), H);
	}

	if ( Options.conv_type != SDFF_CMP_NONE ) {
		if ( read_matrix( (string(Options.target_profiles_dir) + '/' + eventual_fname + ".target").c_str(), T) ) {
			if ( !Options.do_matrix_output && !Options.do_column_output ) {
				fprintf( stderr, "Failed to read target profile for \"%s\", and no matrix folding output specified\n",
					 eventual_fname.c_str());
				return -2;
			}
		} else
			if ( result_p )
				*result_p = convolute_matrix_against_target( M, T);
	}

	return 0;
}



int
read_datum( ifstream &ifs, double& v) throw (invalid_argument)
{
	static string _s;
	ifs >> _s;
	if ( !ifs.good() )
		return -1;
	double _v = NAN;
	try { _v = stod( _s); }
	catch ( invalid_argument ex) {
		if ( strcasecmp( _s.c_str(), "NaN") == 0 )
			v = NAN;
		else if ( strcasecmp( _s.c_str(), "inf") == 0 || strcasecmp( _s.c_str(), "infinity") == 0 )
			v = INFINITY;
		else {
			throw (ex);  // rethrow
			return -2;
		}
	}
	v = _v;
	return 0;
}



// ------------------------- matrix io ------

static int
read_matrices_from_sxf( const char *fname, valarray<double> &M, valarray<double> &H, double *sdf_max_p)
{
	if ( Options.verbosely )
		printf( "Trying \"%s\" ... ", fname);

	ifstream ins( fname);
	if ( !ins.good() ) {
		if ( Options.verbosely )
			printf( "not found\n");
		return -1;
	} else
		if ( Options.verbosely )
			printf( "found\n");

//	size_t	ignored_lines = 0;

	double	sdf_max = -INFINITY,
		_;
	size_t	idx, row;
	for ( idx = row = 0; idx < M.size(); idx += (++row > Options.skipped_first_lines)) {
		while ( ins.peek() == '#' ) {
//			ignored_lines++;
			ins.ignore( numeric_limits<streamsize>::max(), '\n');
		}

		if ( ins.eof() ) {
			fprintf( stderr, "Short read from \"%s\" at element %zu\n", fname, idx);
			return -2;
		}
		if ( !Options.assume_no_timepoint )
			ins >> _;       // time

		try {
			read_datum( ins, M[idx]);
			if ( !Options.assume_generic_data ) {
				if ( !Options.assume_no_shf_value )
					read_datum( ins, H[idx]);  // shf
				read_datum( ins, _);       // nspikes
			}
		} catch (invalid_argument ex) {
			fprintf( stderr, "Bad value read from \"%s\" at element %zu\n", fname, idx);
			return -2;
		}

		if ( M[idx] > sdf_max )
			sdf_max = M[idx];
	}

	if ( Options.use_shf )
		M *= H;

	if ( Options.do_normalise ) {
		M[idx] /= sdf_max;
		//H[idx] /= sdf_max;
	}

	if ( sdf_max_p )
		*sdf_max_p = sdf_max;

	return 0;
}





static int
construct_matrix_from_var( const char *fname, valarray<double> &M)
{
	ifstream ins( fname);
	if ( !ins.good() ) {
//		cerr << "No results in " << fname << endl;
		return -1;
	}

	double	at, _, var;
	vector<double> sample;
	size_t	idx;

	string line;
	try {
		for ( idx = 0; idx < M.size(); ++idx ) {
			M[idx] = 0.;

			while ( ins.peek() == '#' )
				ins.ignore( numeric_limits<streamsize>::max(), '\n');

			sample.clear();
			do {
				getline( ins, line, '\n');
				if ( ins.eof() ) {
					if ( idx == M.size()-1 )
						break;
					else
						throw "bork";
				}
				stringstream fields (line);
				fields >> at;
				for ( size_t f = 1; f <= Options.of_fields; ++f )
					if ( f == Options.field_n )
						fields >> var;
					else
						fields >> _;

				if ( at < Options.sample_from + Options.sample_period * idx - Options.sample_window/2 )
					continue;

				sample.push_back( var);

			} while ( at <= Options.sample_from + Options.sample_period * idx + Options.sample_window/2 );

			M[idx] = accumulate( sample.begin(), sample.end(), 0.) / sample.size();
		}
	} catch (...) {
		fprintf( stderr, "Short read, bad data or some other IO error in %s at record %zd\n", fname, idx);
		return -2;
	}

	// if ( Options.do_normalise ) {
	// 	for ( idx = 0; idx < dim_prod; idx++ )
	// 		M[idx] /= sdf_max;
	// 	// if ( H )
	// 	// 	for ( idx = 0; idx < dim_prod; idx++ )
	// 	// 		H[idx] /= sdf_max;
	// }

	return 0;
}





static int
read_matrix( const char *fname, valarray<double> &M)
{
	ifstream ins( fname);
	if ( !ins.good() ) {
		cerr << "No results in " << fname << endl;
		return -1;
	}

	while ( ins.peek() == '#' ) {
		ins.ignore( numeric_limits<streamsize>::max(), '\n');  // skip header
	}

	size_t	idx;
	for ( idx = 0; idx < M.size(); idx++ )
		if ( ins.eof() ) {
			fprintf( stderr, "Short read from \"%s\" at element %zu\n", fname, idx);
			return -1;
		} else
			ins >> M[idx];
	return 0;
}






static int
write_matrix( const char *fname, const valarray<double> &X)
{
	if ( Options.do_matrix_output ) {
		ofstream outs( (string(fname) + ".mx").c_str());
		if ( Options.verbosely )
			printf( "Writing \"%s.mx\"\n", fname);
		for ( size_t k = 0; k < Options.dims[0]; k++ )
			for ( size_t l = 0; l < Options.dims[1]; l++ ) {
				if ( l > 0 )  outs << "\t";
				const double &datum = X[k*Options.dims[0] + l];
				if ( Options.octave_compat && !std::isfinite(datum) )
					outs << (std::isinf(datum) ? "Inf" : "NaN");
				else
					outs << datum;
				if ( l == Options.dims[1]-1 ) outs << endl;
			}
		if ( !outs.good() )
			return -1;
	}

	if ( Options.do_column_output ) {
		ofstream outs( (string(fname) + ".col").c_str());
		if ( Options.verbosely )
			printf( "Writing \"%s.mx\"\n", fname);
		for ( size_t k = 0; k < Options.dims[0]; k++ )
			for ( size_t l = 0; l < Options.dims[1]; l++ )
				outs << l << "\t" << k << "\t" << X[k*Options.dims[0] + l] << endl;
		if ( !outs.good() )
			return -1;
	}

	return 0;
}






static double
convolute_matrix_against_target( const valarray<double> &M, const valarray<double> &T)
{
	double	CF = 0.;
	size_t idx;

	switch ( Options.conv_type ) {
	case SDFF_CMP_WEIGHT:
		for ( idx = 0; idx < M.size(); idx++ )
			CF += M[idx] * T[idx];
		break;
	case SDFF_CMP_SQDIFF:
		for ( idx = 0; idx < M.size(); idx++ )
			CF += pow( M[idx] - T[idx], 2);
		CF = sqrt( CF);
		break;
	case SDFF_CMP_NONE:
		return NAN;
	}

	return CF;
}










static int
parse_cmdline( int argc, char *argv[])
{
	char c;
	while ( (c = getopt( argc, argv, "OC:Rd:f:G:H::-t:Nx:T:U:V:z:o:F:qh")) != -1 ) {
		switch ( c ) {
		case 'C':	Options.working_dir = optarg;				break;

		case 'R':	Options.go_sdf = false;					break;

		case 'T':	Options.grand_target_fname = optarg;			break;
		case 'U':	Options.grand_result_fname = optarg;			break;

		case 'd':	if ( sscanf( optarg, "%lg:%lg:%lg",
					     &Options.sample_from, &Options.sample_period, &Options.sample_window) < 2 ) {
					cerr << "Expecting three parameter with -d (from:period[:window])\n";
					return SDFCAT_EARGS;
				}
				if ( Options.sample_window == 0. )
					Options.sample_window = Options.sample_period;	break;

		case 'f':	if ( sscanf( optarg, "%d:%d",
					     &Options.field_n, &Options.of_fields) < 1 ) {
					cerr << "Expecting two parameters with -f (field:fields)\n";
					return SDFCAT_EARGS;
				}							break;

		case 'G':	Options.target_profiles_dir = optarg;			break;

		case 'u':	Options.units.push_back( string(optarg));		break;

		case 'H':	if ( optarg )
					if ( strcmp( optarg, "-") == 0 )
						Options.assume_no_shf_value = true, Options.use_shf = false;
					else {
						cerr << "Unrecognised option to -H: `" << optarg << "\n";
						return SDFCAT_EARGS;
					}
				else
					Options.use_shf = true;				break;

		case 't':	if ( optarg ) {
					if ( strcmp( optarg, "-") == 0 )
						Options.assume_no_timepoint = Options.assume_generic_data = true,
							Options.use_shf = false;
					else {
						cerr << "Option -t can only be -t-\n";
						return SDFCAT_EARGS;
					}
				}							break;

		case 'N':	Options.do_normalise = true;				break;

		case 'V':	if ( strcmp( optarg, "sqdiff" ) == 0 )
					Options.conv_type = SDFF_CMP_SQDIFF;
				else if ( strcmp( optarg, "weight") == 0 )
					Options.conv_type = SDFF_CMP_WEIGHT;
				else {
					cerr << "-V takes `sqdiff' or `weight'\n";
					return SDFCAT_EARGS;
				}
			break;
		case 'z':	if ( strcmp( optarg, "sum" ) == 0 )
					Options.cf_op_type = SDFF_CFOP_SUM;
				else if ( strcmp( optarg, "avg") == 0 )
					Options.cf_op_type = SDFF_CFOP_AVG;
				else if ( strcmp( optarg, "prod") == 0 )
					Options.cf_op_type = SDFF_CFOP_PROD;
				else {
					cerr << "-X can be `sum', `avg' or `prod'\n";
					return SDFCAT_EARGS;
				}
			break;
		case 'o':	Options.do_matrix_output = (strchr( optarg, 'm') != nullptr);
				Options.do_column_output = (strchr( optarg, 'c') != nullptr);
			break;

		case 'x':
		{
			unsigned d;
			if ( sscanf( optarg, "%ud", &d) < 1 ) {
				cerr << "-x takes an unsigned\n";
				return SDFCAT_EARGS;
			}
			Options.dims.push_back( d);
		}   break;

		case 'F':
			if ( sscanf( optarg, "%ud", &Options.skipped_first_lines) < 1 ) {
				cerr << "-F takes an unsigned\n";
				return SDFCAT_EARGS;
			}
		    break;

		case 'O':	Options.octave_compat = true;				break;

		case 'q':	Options.verbosely = false;				break;

		case 'h':
			return SDFCAT_EHELPREQUEST;
		default:
			return SDFCAT_EARGS;
		}
	}

	for ( int i = optind; i < argc; i++ )
		Options.units.push_back( string(argv[i]));

	if ( Options.units.empty() ) {
		cerr << "No units (-u) specified\n";
		return SDFCAT_EARGS;
	}
	if ( Options.dims.empty() ) {
		cerr << "No dimensions (-x) specified\n";
		return SDFCAT_EARGS;
	}

	return 0;
}




static void
usage( const char *argv0)
{
	cout << "Usage: " << argv0 << "[options] [unitname_or_filename] ...\n"
		"Options are\n"
		" -C <dir>\t\tcd into dir before working\n"
		" -G <dir>\t\tSearch for target profiles in dir (default " << Options.target_profiles_dir << ")\n"
		" -x <dim>\t\tDimensions for the target and data matrices (repeat as necessary)\n"
		" -V[sqdiff|weight]\tObtain resulting profile by this convolution method:\n"
		"\t\t\t  sum of squared differences between source and target profiles,\n"
		"\t\t\t  sum of source profile values weighted by those in the target profile\n"
		" -z[sum|avg|prod]\tOperation applied to individual CFs, to produce a grand total\n"
		" -T <fname>\tRead reference profile from this file (default \"" << Options.grand_target_fname << "\"\n"
		" -U <fname>\tWrite the total result to this file (default is {SUM,AVERAGE,PRODUCT}.mx, per option -z)\n"
		"\n"
		" -R\t\t\tCollect .var data rather than .sxf\n"
		"With -R, use\n"
		" -f <unsigned n1>:<unsigned n2>\n"
		"\t\t\tExtract n1th field of n2 consec. fields per record\n"
		"\t\t\t  (default " << Options.field_n << " of " << Options.of_fields << ")\n"
		" -d <double f>:<double p>:<double ws>\tSample from time f at period p with window size ws\n"
		"otherwise:\n"
		" -F <unsigned>\t\tRead sxf data from that position, not from 0\n"
		" -H \t\t\tMultiply sdf by shf\n"
		" -H-\t\t\tAssume there is no shf field in .sxf file\n"
		" -t-\t\t\tAssume no timestamp in data file; implies -H-\n"
		"\n"
		" -o[mc]\t\t\tWrite <unit>.[m]atrix and/or .[c]ol profiles\n"
		" -O\t\t\tWrite nan and inf as \"NaN\" and \"Inf\" to please octave\n"
		" -q\t\t\tSuppress normal messages\n"
		" -h\t\t\tDisplay this help\n"
		"\n"
		" unitname_or_filename\tData vector (e.g., PN.0; multiple entries as necessary;\n"
		"\t\t\t  will try label.sxf then label.sdf)\n";
}

// EOF
