//
// speed optimized scolorq by berupon at gmail dot com
//
// see original copyright notice.
//      
//      |
//      |
//      V
//
/* Copyright (c) 2006 Derrick Coetzee

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include "stdafx.h"

#include "scolorq.h"

#include <algorithm>
#include <math.h>
#include <stdio.h>
#include <time.h>
#include <vector>
#include <deque>
#include <limits>
#include <iostream>
#include "dxor.h"

using namespace std;

size_t compute_max_coarse_level(size_t width, size_t height)
{
	// We want the coarsest layer to have at most MAX_PIXELS pixels
	const size_t MAX_PIXELS = 4000;
	size_t result = 0;
	while (width * height > MAX_PIXELS) {
		width  >>= 1;
		height >>= 1;
		++result;
	}
	return result;
}

void fill_random(Array3D<double>& a)
{
	for (size_t y=0; y<a.height_; ++y) {
		for (size_t x=0; x<a.width_; ++x) {
			for (size_t z=0; z<a.depth_; ++z) {
				a(x,y,z) = dxor156();
			}
		}
	}
}

void random_permutation(
	size_t count,
	vector<int>& result
	)
{
	result.resize(count);
	for (size_t i=0; i<count; ++i) {
		result[i] = i;
	}
	random_shuffle(result.begin(), result.end());
}

void random_permutation_2d(
	size_t width,
	size_t height,
	deque< pair<int, int> >& result
	)
{
	vector<int> perm1d;
	random_permutation(width*height, perm1d);
	const size_t sz = perm1d.size();
	result.resize(sz);
	for (size_t i=0; i<sz; ++i) {
		int idx = perm1d[sz-1-i];
		result[i] = pair<int,int>(idx % width, idx / width);
	}
}

void init_image(Image& image)
{
	Color z;
	z.zero();
	std::fill(image.pBuff_, image.pBuff_+image.width_*image.height_, z);
}

void compute_b_array(
	const Image& filter_weights,
	Image& b
	)
{
	// Assume that the pixel i is always located at the center of b,
	// and vary pixel j's location through each location in b.
	int radius_width = (filter_weights.width_ - 1)/2;
	int radius_height = (filter_weights.height_ - 1)/2;
	int offset_x = (b.width_ - 1)/2 - radius_width;
	int offset_y = (b.height_ - 1)/2 - radius_height;
	for (int j_y=0; j_y<b.height_; ++j_y) {
		for (int j_x=0; j_x<b.width_; ++j_x) {
			Color sum;
			sum.zero();
			for (int k_y=0; k_y < filter_weights.height_; ++k_y) {
				for (int k_x = 0; k_x < filter_weights.width_; ++k_x) {
					if (k_x+offset_x >= j_x - radius_width &&
						k_x+offset_x <= j_x + radius_width &&
						k_y+offset_y >= j_y - radius_width &&
						k_y+offset_y <= j_y + radius_width)
					{
						sum += 
							filter_weights[k_y][k_x].direct_product(
								filter_weights[k_y+offset_y-j_y+radius_height][k_x+offset_x-j_x+radius_width]
							);
					}
				}
			}
			b[j_y][j_x] = sum;
		}
	}
}

__forceinline
Color b_value(const Image& b, int i_x, int i_y, int j_x, int j_y)
{
	int radius_width = (b.width_ - 1)/2;
	int radius_height = (b.height_ - 1)/2;
	int k_x = j_x - i_x + radius_width;
	int k_y = j_y - i_y + radius_height;
	if (k_x >= 0 && k_y >= 0 && k_x < b.width_ && k_y < b.height_)
		return b[k_y][k_x];
	else {
		Color z;
		z.zero();
		return z;
	}
}

void compute_a_image(const Image& image, const Image& b, Image& a)
{
	int radius_width = (b.width_ - 1)/2;
	int radius_height = (b.height_ - 1)/2;
	for (int i_y = 0; i_y<a.height_; ++i_y) {
		for (int i_x = 0; i_x<a.width_; ++i_x) {
			Color sum;
			sum.zero();
			for (int j_y = i_y - radius_height; j_y <= i_y + radius_height; ++j_y) {
				if (j_y < 0) j_y = 0;
				if (j_y >= a.height_) break;

				for (int j_x = i_x - radius_width; j_x <= i_x + radius_width; ++j_x) {
					if (j_x < 0) j_x = 0;
					if (j_x >= a.width_) break;

					sum += b_value(b, i_x, i_y, j_x, j_y).direct_product(image[j_y][j_x]);
				}
			}
			a[i_y][i_x] = sum * -2.0;
		}
	}
}

void sum_coarsen(
	const Image& fine,
	Image& coarse
	)
{
	for (size_t y=0; y<coarse.height_; ++y) {
		for (size_t x=0; x<coarse.width_; ++x) {
			double divisor = 1.0;
			Color val;
			val.zero();
			val += fine[y*2][x*2];
			if (x*2 + 1 < fine.width_)	 {
				divisor += 1; val += fine[y*2][x*2 + 1];
			}
			if (y*2 + 1 < fine.height_) {
				divisor += 1; val += fine[y*2 + 1][x*2];
			}
			if (x*2 + 1 < fine.width_ &&
				y*2 + 1 < fine.height_) {
				divisor += 1; val += fine[y*2 + 1][x*2 + 1];
			}
			coarse[y][x] = /*(1/divisor)**/val;
		}
	}
}

Array2D<double> extract_vector_layer_2d(const Image& s, size_t k)
{
	Array2D<double> result(s.width_, s.height_);
	for (size_t y=0; y<s.height_; ++y) {
		for (size_t x=0; x<s.width_; ++x) {
			result[y][x] = s[y][x][k];
		}
	}
	return result;
}

vector<double> extract_vector_layer_1d(const Color* s, size_t sz, size_t k)
{
	vector<double> result(sz);
	for (size_t i=0; i<sz; ++i) {
		result[i] = s[i][k];
	}
	return result;
}

size_t best_match_color(
	const Array3D<double>& vars,
	size_t i_x,
	size_t i_y,
	const Color* palette,
	size_t num_colors
	)
{
	size_t max_v = 0;
	double max_weight = vars(i_x,i_y,0);
	for (size_t v=1; v<num_colors; ++v) {
		if (vars(i_x,i_y,v) > max_weight) {
			max_v = v;
			max_weight = vars(i_x,i_y,v);
		}
	}
	return max_v;
}

void zoom(const Array3D<double>& small, Array3D<double>& big)
{
	// Simple scaling of the weights array based on mixing the four
	// pixels falling under each fine pixel, weighted by area.
	// To mix the pixels a little, we assume each fine pixel
	// is 1.2 fine pixels wide and high.
	for (int y=0; y<big.height_/2*2; y++) {
		for (int x=0; x<big.width_/2*2; x++) {
			double left = max(0.0, (x-0.1)/2.0), right	= min(small.width_-0.001, (x+1.1)/2.0);
			double top	= max(0.0, (y-0.1)/2.0), bottom = min(small.height_-0.001, (y+1.1)/2.0);
			int x_left = (int)floor(left), x_right	= (int)floor(right);
			int y_top  = (int)floor(top),  y_bottom = (int)floor(bottom);
			double area = (right-left)*(bottom-top);
			double top_left_weight	= (ceil(left) - left)*(ceil(top) - top)/area;
			double top_right_weight = (right - floor(right))*(ceil(top) - top)/area;
			double bottom_left_weight  = (ceil(left) - left)*(bottom - floor(bottom))/area;
			double bottom_right_weight = (right - floor(right))*(bottom - floor(bottom))/area;
			double top_weight	  = (right-left)*(ceil(top) - top)/area;
			double bottom_weight  = (right-left)*(bottom - floor(bottom))/area;
			double left_weight	  = (bottom-top)*(ceil(left) - left)/area;
			double right_weight	  = (bottom-top)*(right - floor(right))/area;
			for (int z=0; z<big.depth_; z++) {
				double val;
				if (x_left == x_right && y_top == y_bottom) {
					val =	small(x_left,y_top,z);
				} else if (x_left == x_right) {
					val =	top_weight * small(x_left,y_top,z) +
								   bottom_weight * small(x_left,y_bottom,z);
				} else if (y_top == y_bottom) {
					val =	left_weight * small(x_left,y_top,z) +
								   right_weight * small(x_right,y_top,z);
				} else {
					val =	top_left_weight * small(x_left,y_top,z) +
							top_right_weight * small(x_right,y_top,z) +
							bottom_left_weight * small(x_left,y_bottom,z) +
							bottom_right_weight * small(x_right,y_bottom,z);
				}
				big(x,y,z) = val;
			}
		}
	}
}

void compute_initial_s(
	Image& s,
	const Array3D<double>& coarse_variables,
	const Image& b
	)
{
	size_t palette_size  = s.width_;
	Color zero_vector;
	zero_vector.zero();
	for (size_t v=0; v<palette_size; ++v) {
		for (size_t v2=0; v2<v+1; ++v2) {
			s[v][v2] = zero_vector;
		}
	}
	const int coarse_width = coarse_variables.width_;
	const int coarse_height = coarse_variables.height_;
	const int center_x = (b.width_-1)/2, center_y = (b.height_-1)/2;
	const Color center_b = b_value(b,0,0,0,0);
	for (int i_y=0; i_y<coarse_height; ++i_y) {
		for (int i_x=0; i_x<coarse_width; ++i_x) {
			const double* p_icv = &coarse_variables(i_x, i_y, 0);
			const int max_j_x = min<int>(coarse_width, i_x - center_x + b.width_);
			const int max_j_y = min<int>(coarse_height, i_y - center_y + b.height_);
			for (size_t j_y=max<int>(0, i_y - center_y); j_y<max_j_y; ++j_y) {
				for (int j_x=max<int>(0, i_x - center_x); j_x<max_j_x; ++j_x) {
					if (i_x == j_x && i_y == j_y) continue;
					const Color b_ij = b_value(b,i_x,i_y,j_x,j_y);
					const double* p_jcv = &coarse_variables(j_x, j_y, 0);
					for (size_t v=0; v<palette_size; ++v) {
						const Color b_ij2 = b_ij * p_icv[v];
						const double* p_jcv2 = p_jcv++;
						double jcv = *p_jcv2;
						Color* ps = s.pBuff_ + v * palette_size + v;
// TODO: ύX摜Acł͂Ȃɑ삷Bœ]uB
						for (size_t alpha=v; alpha<palette_size; ++alpha) {
							++p_jcv2;
							double njcv = *p_jcv2;
							*ps += jcv * b_ij2;
							ps += palette_size;
							jcv = njcv;
						}
					}
				}
			}
			for (size_t v=0; v<palette_size; ++v) {
				s[v][v] += p_icv[v] * center_b;
			}
		}
	}
}

void update_s(
	Image& s,
	const Array3D<double>& coarse_variables,
	const Image& b,
	const int j_x,
	const int j_y,
	const size_t alpha,
	const double delta
	)
{
	const size_t palette_size = s.width_;
	const int center_x = (b.width_-1) / 2;
	const int center_y = (b.height_-1) / 2;
	const size_t max_i_x = min<int>(coarse_variables.width_, j_x + center_x + 1);
	const size_t max_i_y = min<int>(coarse_variables.height_, j_y + center_y + 1);
	for (size_t i_y=max(0, j_y - center_y); i_y<max_i_y; ++i_y) {
		for (size_t i_x=max(0, j_x - center_x); i_x<max_i_x; ++i_x) {
			if (i_x == j_x && i_y == j_y) continue;
			const Color delta_b_ij = delta * b_value(b,i_x,i_y,j_x,j_y);
			Color* ps = s[alpha];
			const double* p_cv = &coarse_variables(i_x, i_y, 0);
			double cv = *p_cv;
			for (size_t v=0; v<=alpha; ++v) {
				++p_cv;
				double ncv = *p_cv;
				*ps += cv * delta_b_ij;
				++ps;
				cv = ncv;
			}
			--p_cv;
			--ps;
			cv = *p_cv;
			for (size_t v=alpha; v<palette_size; ++v) {
				++p_cv;
				double ncv = *p_cv;
				*ps += cv * delta_b_ij;
				ps += palette_size;
				cv = ncv;
			}
		}
	}
	s[alpha][alpha] += delta*b_value(b,0,0,0,0);
}

void refine_palette(
	Image& s,
	const Array3D<double>& coarse_variables,
	const Image& a,
	Color* palette,
	size_t num_colors
	)
{
	// We only computed the half of S above the diagonal - reflect it
	for (size_t v=0; v<s.width_; ++v) {
		for (size_t alpha=0; alpha<v; ++alpha) {
			s[alpha][v] = s[v][alpha];
		}
	}

	Color r[256];
	for (size_t v=0; v<num_colors; ++v) {
		Color sum;
		sum.zero();
		for (size_t i_y=0; i_y<coarse_variables.height_; ++i_y) {
			for (size_t i_x=0; i_x<coarse_variables.width_; ++i_x) {
				double cv = coarse_variables(i_x,i_y,v);
				Color av = a[i_y][i_x];
				Color result = cv * av;
				sum += result;
			}
		}
		r[v] = sum;
	}

	for (size_t k=0; k<3; ++k) {
		Array2D<double> S_k = extract_vector_layer_2d(s, k);
		vector<double> R_k = extract_vector_layer_1d(&r[0], num_colors, k);
		vector<double> palette_channel = -1.0 * ((2.0*S_k).matrix_inverse()) * R_k;
		for (size_t v=0; v<num_colors; ++v) {
			double val = palette_channel[v];
			if (val < 0) val = 0;
			if (val > 1) val = 1;
			palette[v][k] = val;
		}				
	}

#if TRACE
	for (size_t v=0; v<num_colors; ++v) {
		cout << palette[v] << endl;
	}
#endif
}

void compute_initial_j_palette_sum(
	Image& j_palette_sum,
	const Array3D<double>& coarse_variables,
	const Color* palette,
	size_t num_colors
	)
{
	for (size_t j_y=0; j_y<coarse_variables.height_; ++j_y) {
		for (size_t j_x=0; j_x<coarse_variables.width_; ++j_x) {
			 Color palette_sum;
			 palette_sum.zero();
			 for (size_t alpha=0; alpha<num_colors; ++alpha) {
				 palette_sum += coarse_variables(j_x,j_y,alpha)*palette[alpha];
			 }
			 j_palette_sum[j_y][j_x] = palette_sum;
		 }
	 }
}

void spatial_color_quant(
	Image& image,
	Image& filter_weights,
	Array2D<uint8_t>& quantized_image,
	Color* palette, size_t num_colors,
	Array3D<double>*& p_coarse_variables,
	double initial_temperature,
	double final_temperature,
	int temps_per_level,
	int repeats_per_temp
	)
{
	size_t max_coarse_level = //1;
		compute_max_coarse_level(image.width_, image.height_);
	size_t width2 = image.width_  >> max_coarse_level;
	size_t height2 = image.height_ >> max_coarse_level;
	p_coarse_variables = new Array3D<double>(width2, height2, num_colors);
	// For syntactic convenience
	Array3D<double>& coarse_variables = *p_coarse_variables;
	fill_random(coarse_variables);
	
	double temperature = initial_temperature;
	
	// Compute a_i, b_{ij} according to (11)
	size_t extended_neighborhood_width = filter_weights.width_*2 - 1;
	size_t extended_neighborhood_height = filter_weights.height_*2 - 1;
	Image b0(extended_neighborhood_width, extended_neighborhood_height);
	compute_b_array(filter_weights, b0);
	Image a0(image.width_, image.height_);
	compute_a_image(image, b0, a0);
	
	// Compute a_I^l, b_{IJ}^l according to (18)
	vector<Image*> a_vec, b_vec;
	a_vec.push_back(&a0);
	b_vec.push_back(&b0);
	
	int coarse_level;
	for (coarse_level=1; coarse_level <= max_coarse_level; ++coarse_level) {
		size_t radius_width  = (filter_weights.width_ - 1)/2;
		size_t radius_height = (filter_weights.height_ - 1)/2;
		Image* p_bi = new Image(max<size_t>(3, b_vec.back()->width_-2), max<size_t>(3, b_vec.back()->height_-2));
		Image& bi = *p_bi;
		for (size_t J_y=0; J_y<bi.height_; ++J_y) {
			for (size_t J_x=0; J_x<bi.width_; ++J_x) {
				Color sum;
				sum.zero();
				for (size_t i_y=radius_height*2; i_y<radius_height*2+2; ++i_y) {
					for (size_t i_x=radius_width*2; i_x<radius_width*2+2; ++i_x) {
						for (size_t j_y=J_y*2; j_y<J_y*2+2; j_y++) {
							for (size_t j_x=J_x*2; j_x<J_x*2+2; j_x++) {
								sum += b_value(*b_vec.back(), i_x, i_y, j_x, j_y);
							}
						}
					}
				}
				bi[J_y][J_x] = sum;
			}
		}
		b_vec.push_back(p_bi);

		Image* p_ai = new Image(image.width_ >> coarse_level, image.height_ >> coarse_level);
		Image& ai = *p_ai;
		sum_coarsen(*a_vec.back(), ai);
		a_vec.push_back(p_ai);
	}
	
	// Multiscale annealing
	coarse_level = max_coarse_level;
	const size_t iters_per_level = temps_per_level;
	double temperature_multiplier = pow(final_temperature/initial_temperature, 1.0/(max<size_t>(3, max_coarse_level*iters_per_level)));
#if TRACE
	cout << "Temperature multiplier: " << temperature_multiplier << endl;
#endif
	size_t iters_at_current_level = 0;
	bool skip_palette_maintenance = false;
	Image s(num_colors, num_colors);
	init_image(s);
	compute_initial_s(s, coarse_variables, *b_vec[coarse_level]);
	Image* j_palette_sum =
		new Image(coarse_variables.width_, coarse_variables.height_);
	compute_initial_j_palette_sum(*j_palette_sum, coarse_variables, palette, num_colors);
	vector<double> meanfield_logs(num_colors), meanfields(num_colors);
	while (coarse_level >= 0 || temperature > final_temperature) {
		// Need to reseat this reference in case we changed p_coarse_variables
		Array3D<double>& coarse_variables = *p_coarse_variables;
		Image& a = *a_vec[coarse_level];
		Image& b = *b_vec[coarse_level];
		Color middle_b = b_value(b,0,0,0,0);
#if TRACE
		cout << "Temperature: " << temperature << endl;
#endif
		int center_x = (b.width_-1)/2;
		int center_y = (b.height_-1)/2;
		size_t step_counter = 0;
		for (size_t repeat=0; repeat<repeats_per_temp; ++repeat) {
			size_t pixels_changed = 0, pixels_visited = 0;
			deque< pair<int, int> > visit_queue;
			random_permutation_2d(coarse_variables.width_, coarse_variables.height_, visit_queue);

			// Compute 2*sum(j in extended neighborhood of i, j != i) b_ij

			const int radius_width = (b.width_ - 1)/2;
			const int radius_height = (b.height_ - 1)/2;
			while (!visit_queue.empty()) {
				// If we get to 10% above initial size, just revisit them all
				if (visit_queue.size() > coarse_variables.width_*coarse_variables.height_*11.0/10) {
					random_permutation_2d(coarse_variables.width_, coarse_variables.height_, visit_queue);
				}

				int i_x = visit_queue.front().first;
				int i_y = visit_queue.front().second;
				visit_queue.pop_front();

				// Compute (25)
				Color p_i;
				p_i.zero();
				int sy = i_y - center_y;
				int ey = sy + b.height_;
				sy = max(sy, 0);
				ey = min<int>(ey, coarse_variables.height_);
				int ky = sy - i_y + radius_height;
				if (ky < 0) {
					sy += -ky;
				}
				ky = ey - 1 - i_y + radius_height;
				if (ky >= b.height_) {
					ey -= (ky - b.height_) + 1;
				}
				int sx = i_x - center_x;
				int ex = sx + b.width_;
				sx = max(sx, 0);
				ex = min<int>(ex, coarse_variables.width_);
				int kx = sx - i_x + radius_width;
				if (kx < 0) {
					sx += -kx;
				}
				kx = ex - 1 - i_x + radius_width;
				if (kx >= b.width_) {
					ex -= (kx - b.width_) + 1;
				}
				for (int y=sy; y<ey; ++y) {
					int k_y = y - i_y + radius_height;
					Color* cb = b[k_y] + sx - i_x + radius_width;
					Color* cp = (*j_palette_sum)[y]+sx;
					if (i_y == y) {
						for (int x=sx; x<ex; ++x) {
							Color c = (*cb++) * (*cp++);
							if (i_x != x) {
								p_i += c;
							}
						}
					}else {
						for (int x=sx; x<ex; ++x) {
							p_i += (*cb++) * (*cp++);
						}
					}
				}
				p_i *= 2.0;
				p_i += a[i_y][i_x];

				double max_meanfield_log = -numeric_limits<double>::infinity();
				double meanfield_sum = 0.0;
				double minus_inv_temperature = -1.0 / temperature;
				for (size_t v=0; v<num_colors; ++v) {
					// Update m_{pi(i)v}^I according to (23)
					// We can subtract an arbitrary factor to prevent overflow,
					// since only the weight relative to the sum matters, so we
					// will choose a value that makes the maximum e^100.
					double m = palette[v].dot_product(p_i + middle_b.direct_product(palette[v])) * minus_inv_temperature;
					meanfield_logs[v] = m;
					if (m > max_meanfield_log) {
						max_meanfield_log = m;
					}
				}
				for (size_t v=0; v<num_colors; ++v) {
					double d = exp(meanfield_logs[v]-max_meanfield_log+100);
					meanfields[v] = d;
					meanfield_sum += d;
				}
				if (meanfield_sum == 0) {
					cout << "Fatal error: Meanfield sum underflowed. Please contact developer." << endl;
					exit(-1);
				}
				size_t old_max_v = best_match_color(coarse_variables, i_x, i_y, palette, num_colors);
				Color& j_pal = (*j_palette_sum)[i_y][i_x];
				for (size_t v=0; v<num_colors; ++v) {
					double new_val = meanfields[v]/meanfield_sum;
					// Prevent the matrix S from becoming singular
					if (new_val <= 0) new_val = 1e-10;
					if (new_val >= 1) new_val = 1 - 1e-10;
					double delta_m_iv = new_val - coarse_variables(i_x,i_y,v);
					coarse_variables(i_x,i_y,v) = new_val;
					j_pal += delta_m_iv * palette[v];
					if (abs(delta_m_iv) > 0.001 && !skip_palette_maintenance) {
						update_s(s, coarse_variables, b, i_x, i_y, v, delta_m_iv);
					}
				}
				size_t max_v = best_match_color(coarse_variables, i_x, i_y, palette, num_colors);
				// Only consider it a change if the colors are different enough
				if ((palette[max_v]-palette[old_max_v]).norm_squared() >= 1.0/(255.0*255.0)) {
					++pixels_changed;
					// We don't add the outer layer of pixels , because
					// there isn't much weight there, and if it does need
					// to be visited, it'll probably be added when we visit
					// neighboring pixels.
					// The commented out loops are faster but cause a little bit of distortion
					//for (int y=center_y-1; y<center_y+1; y++) {
					//	 for (int x=center_x-1; x<center_x+1; x++) {
					for (int y=min(1,center_y-1); y<max<int>(b.height_-1,center_y+1); ++y) {
						for (int x=min(1,center_x-1); x<max<int>(b.width_-1,center_x+1); ++x) {
							int j_x = x - center_x + i_x;
							int j_y = y - center_y + i_y;
							if (j_x < 0 || j_y < 0 || j_x >= coarse_variables.width_ || j_y >= coarse_variables.height_) continue;
							visit_queue.push_back(pair<size_t,size_t>(j_x,j_y));
						}
					}
				}
				++pixels_visited;

				// Show progress with dots - in a graphical interface,
				// we'd show progressive refinements of the image instead,
				// and maybe a palette preview.
				++step_counter;
				if ((step_counter % 10000) == 0) {
					cout << ".";
					cout.flush();
#if TRACE
					cout << visit_queue.size();
#endif
				}
			}
#if TRACE
			cout << "Pixels changed: " << pixels_changed << endl;
#endif
			if (skip_palette_maintenance) {
				compute_initial_s(s, *p_coarse_variables, *b_vec[coarse_level]);
			}
			refine_palette(s, coarse_variables, a, palette, num_colors);
			compute_initial_j_palette_sum(*j_palette_sum, coarse_variables, palette, num_colors);
		}

		++iters_at_current_level;
		skip_palette_maintenance = false;
		if ((temperature <= final_temperature || coarse_level > 0) &&
			iters_at_current_level >= iters_per_level)
		{
			--coarse_level;
			if (coarse_level < 0) break;
			Array3D<double>* p_new_coarse_variables = new Array3D<double>(
				image.width_  >> coarse_level,
				image.height_ >> coarse_level,
				num_colors);
			zoom(coarse_variables, *p_new_coarse_variables);
			delete p_coarse_variables;
			p_coarse_variables = p_new_coarse_variables;
			iters_at_current_level = 0;
			delete j_palette_sum;
			j_palette_sum = new Image((*p_coarse_variables).width_, (*p_coarse_variables).height_);
			compute_initial_j_palette_sum(*j_palette_sum, *p_coarse_variables, palette, num_colors);
			skip_palette_maintenance = true;
#ifdef TRACE
			cout << "Image size: " << p_coarse_variables->width_ << " " << p_coarse_variables->height_ << endl;
#endif
		}
		if (temperature > final_temperature) {
			temperature *= temperature_multiplier;
		}
	}
	
	// This is normally not used, but is handy sometimes for debugging
	while (coarse_level > 0) {
		coarse_level--;
		Array3D<double>* p_new_coarse_variables = new Array3D<double>(
			image.width_  >> coarse_level,
			image.height_ >> coarse_level,
			num_colors
			);
		zoom(*p_coarse_variables, *p_new_coarse_variables);
		delete p_coarse_variables;
		p_coarse_variables = p_new_coarse_variables;
	}
	
	{
		// Need to reseat this reference in case we changed p_coarse_variables
		Array3D<double>& coarse_variables = *p_coarse_variables;

		for (size_t i_x = 0; i_x < image.width_; ++i_x) {
			for (size_t i_y = 0; i_y < image.height_; ++i_y) {
				quantized_image[i_y][i_x] =
					best_match_color(coarse_variables, i_x, i_y, palette, num_colors);
			}
		}
		for (size_t v=0; v<num_colors; ++v) {
			for (size_t k=0; k<3; ++k) {
				if (palette[v][k] > 1.0) palette[v][k] = 1.0;
				if (palette[v][k] < 0.0) palette[v][k] = 0.0;
			}
	#ifdef TRACE
			cout << palette[v] << endl;
	#endif
		}
	}

}

bool scolorq(
	Image& image,
	Array2D<uint8_t>& quantized_image,
	Color* palette,
	size_t num_colors
	)
{
	sdxor156(time(NULL));
	
	for (size_t i=0; i<num_colors; ++i) {
		palette[i] = Color(
			dxor156(),
			dxor156(),
			dxor156(),
			0.0
			);
	}
	
	Array3D<double>* coarse_variables;
	double dithering_level = // 0.09*log((double)image.width_*image.height_) - 0.04*log((double)num_colors) + 0.001;
		 1.2;
	if (dithering_level <= 0.0) {
		puts("Dithering level must be more than zero.\n");
		return false;
	}
	int filter_size = 3;
	if (filter_size != 1 && filter_size != 3 && filter_size != 5) {
		puts("Filter size must be one of 1, 3, or 5.\n");
		return false;
	}
	
	Color buff_filter1_weights[1*1];
	Color buff_filter3_weights[3*3];
	Color buff_filter5_weights[5*5];
	Image filter1_weights(1, 1, buff_filter1_weights);
	Image filter3_weights(3, 3, buff_filter3_weights);
	Image filter5_weights(5, 5, buff_filter5_weights);
	filter1_weights[0][0] = Color(1.0, 1.0, 1.0, 0.0);
	double stddev = dithering_level;
	double sum = 0.0;
	for (int i=0; i<3; i++) {
		for (int j=0; j<3; j++) {
			double w = exp(-sqrt((double)((i-1)*(i-1) + (j-1)*(j-1)))/(stddev*stddev));
			filter3_weights[i][j] = Color(w,w,w,0);
			sum += w;
		}
	}
	double invSum = 1.0 / sum;
	for (int i=0; i<3; i++) {
		for (int j=0; j<3; j++) {
			filter3_weights[i][j] *= invSum;
		}
	}
	sum = 0.0;
	for (int i=0; i<5; i++) {
		for (int j=0; j<5; j++) {
			double w = exp(-sqrt((double)((i-2)*(i-2) + (j-2)*(j-2)))/(stddev*stddev));
			filter5_weights[i][j] = Color(w,w,w,0);
			sum += w;
		}
	}
	invSum = 1.0 / sum;
	for (int i=0; i<5; i++) {
		for (int j=0; j<5; j++) {
			filter5_weights[i][j] *= invSum;
		}
	}

	Image* filters[] = {
		NULL,
		&filter1_weights,
		NULL,
		&filter3_weights,
		NULL,
		&filter5_weights
	};

	spatial_color_quant(image, *filters[filter_size], quantized_image, palette, num_colors, coarse_variables, 1.0, 0.001, 3, 1);
	return true;
}
