From 55c5f0b3a0004a6591888e60a53cb92e53b6e18f Mon Sep 17 00:00:00 2001
From: Jorge Rodriguez <jorge@lunarworkshop.com>
Date: Wed, 23 Jul 2014 22:17:56 -0700
Subject: [PATCH] Beginning of a more sophisticated resample algorithm,
 starting with calculating filter contributions per scan line.

---
 stb_resample.h                               | 249 ++++++++++++++-----
 tests/{resample_test.c => resample_test.cpp} |  25 +-
 tests/resample_test_c.c                      |  11 +
 3 files changed, 220 insertions(+), 65 deletions(-)
 rename tests/{resample_test.c => resample_test.cpp} (55%)
 create mode 100644 tests/resample_test_c.c

diff --git a/stb_resample.h b/stb_resample.h
index 3477678..b187a2e 100644
--- a/stb_resample.h
+++ b/stb_resample.h
@@ -18,7 +18,7 @@ Initial implementation by Jorge L Rodriguez
 #define STBR_INCLUDE_STB_RESAMPLE_H
 
 // Basic usage:
-//    result = stbr_resize(input_data, input_w, input_h, input_components, 0, output_data, output_w, output_h, 0, STBR_FILTER_NEAREST, STBR_EDGE_CLAMP, STBR_COLORSPACE_SRGB);
+//    result = stbr_resize(input_data, input_w, input_h, 0, output_data, output_w, output_h, 0, channels, alpha_channel, STBR_TYPE_UINT8, STBR_FILTER_BILINEAR, STBR_EDGE_CLAMP, STBR_COLORSPACE_SRGB);
 //
 //    input_data is your supplied texels.
 //    output_data will be the resized texels. It should be of size output_w * output_h * input_components (or output_h * output_stride if you provided a stride.)
@@ -38,11 +38,16 @@ typedef enum
 typedef enum
 {
 	STBR_COLORSPACE_LINEAR = 1,
-	STBR_COLORSPACE_SRGB = 1,
+	STBR_COLORSPACE_SRGB = 2,
 } stbr_colorspace;
 
+typedef enum
+{
+	STBR_TYPE_UINT8 = 1,
+} stbr_type;
 
 typedef unsigned char stbr_uc;
+typedef unsigned int stbr_size_t; // to avoid including a header for size_t
 
 #ifdef __cplusplus
 extern "C" {
@@ -59,7 +64,15 @@ extern "C" {
 	// PRIMARY API - resize an image
 	//
 
-	STBRDEF int stbr_resize(const stbr_uc* input_data, int input_w, int input_h, int input_components, int input_stride, stbr_uc* output_data, int output_w, int output_h, int output_stride, stbr_filter filter, stbr_edge edge, stbr_colorspace colorspace);
+	STBRDEF stbr_size_t stbr_calculate_memory(int input_w, int input_h, int input_stride_in_bytes,
+		int output_w, int output_h, int output_stride_in_bytes,
+		int channels, stbr_filter filter);
+
+	STBRDEF int stbr_resize_arbitrary(const void* input_data, int input_w, int input_h, int input_stride_in_bytes,
+		void* output_data, int output_w, int output_h, int output_stride_in_bytes,
+		//int channels, int alpha_channel, stbr_type type, stbr_filter filter, stbr_edge edge, stbr_colorspace colorspace,
+		int channels, stbr_type type, stbr_filter filter,
+		void* tempmem, stbr_size_t tempmem_size_in_bytes);
 
 
 #ifdef __cplusplus
@@ -78,13 +91,21 @@ extern "C" {
 #define STBR_ASSERT(x) assert(x)
 #endif
 
+#ifdef STBR_DEBUG
+#define STBR_DEBUG_ASSERT STBR_ASSERT
+#else
+#define STBR_DEBUG_ASSERT
+#endif
+
+// If you hit this it means I haven't done it yet.
+#define STBR_UNIMPLEMENTED(x) STBR_ASSERT(!(x))
+
 #ifdef STBR_DEBUG_OVERWRITE_TEST
 #include <string.h>
 #endif
 
 
-// For size_t
-#include <stdlib.h>
+#include <math.h>
 
 
 #ifndef _MSC_VER
@@ -120,6 +141,64 @@ typedef unsigned char stbr__validate_uint32[sizeof(stbr__uint32) == 4 ? 1 : -1];
 #define STBR_NOTUSED(v)  (void)sizeof(v)
 #endif
 
+#define STBR_ARRAY_SIZE(a) (sizeof((a))/sizeof((a)[0]))
+
+// Kernel function centered at 0
+typedef float (stbr__kernel_fn)(float x);
+
+typedef struct
+{
+	stbr__kernel_fn* kernel;
+	float support;
+} stbr__filter_info;
+
+typedef struct
+{
+	int n0; // First contributing source texel
+	int n1; // Last contributing source texel
+} stbr__contributors;
+
+typedef struct
+{
+	int total_contributors;
+	int kernel_texel_width;
+
+	float* decode_buffer;
+	stbr__contributors* horizontal_contributors;
+	float* horizontal_coefficients;
+} stbr__info;
+
+
+float stbr__filter_nearest(float x)
+{
+	if (fabs(x) < 0.5)
+		return 1;
+	else
+		return 0;
+}
+
+stbr__filter_info stbr__filter_info_table[] = {
+		{ NULL,                 0.0f },
+		{ stbr__filter_nearest, 0.5f },
+};
+
+// This is the maximum number of input samples that can affect an output sample
+// with the given filter
+int stbr__get_filter_texel_width(stbr_filter filter, int upsample)
+{
+	STBR_UNIMPLEMENTED(!upsample);
+
+	STBR_ASSERT(filter != 0);
+	STBR_ASSERT(filter < STBR_ARRAY_SIZE(stbr__filter_info_table));
+
+	return (int)ceil(stbr__filter_info_table[filter].support * 2);
+}
+
+int stbr__get_total_contributors(stbr_filter filter, int input_w, int output_w)
+{
+	return output_w * stbr__get_filter_texel_width(filter, output_w > input_w ? 1 : 0);
+}
+
 // i0 is a texel in [0, n0-1]
 // What's the nearest texel center to i0's center in [0, n1-1] ?
 // Remapping [0, n0-1] to [0, n1-1] gives (i0 + 0.5)*n1/n0 but we want to avoid
@@ -129,95 +208,143 @@ stbr_inline static int stbr__nearest_texel(int i0, int n0, int n1)
 	return (n1*i0 + n1/2) / n0;
 }
 
-stbr_inline static size_t stbr__texel_index(int x, int y, int c, int width_stride, int num_c, int w, int h)
+stbr_inline static stbr_size_t stbr__texel_index(int x, int y, int c, int width_stride, int num_c, int w, int h)
 {
-	STBR_ASSERT(x >= 0 && x < w);
-	STBR_ASSERT(y >= 0 && y < h);
+	STBR_DEBUG_ASSERT(x >= 0 && x < w);
+	STBR_DEBUG_ASSERT(y >= 0 && y < h);
 
 	return y*width_stride + x*num_c + c;
 }
 
-static void stbr__filter_nearest_1(const stbr_uc* input_data, stbr_uc* output_data, size_t input_texel_index, size_t output_texel_index, size_t n)
+stbr_inline static stbr__contributors* stbr__get_contributor(stbr__info* stbr_info, int n)
 {
-	output_data[output_texel_index] = input_data[input_texel_index];
+	return &stbr_info->horizontal_contributors[n];
 }
 
-static void stbr__filter_nearest_3(const stbr_uc* input_data, stbr_uc* output_data, size_t input_texel_index, size_t output_texel_index, size_t n)
+stbr_inline static float* stbr__get_coefficient(stbr__info* stbr_info, int n, int c)
 {
-	output_data[output_texel_index] = input_data[input_texel_index];
-	output_data[output_texel_index + 1] = input_data[input_texel_index + 1];
-	output_data[output_texel_index + 2] = input_data[input_texel_index + 2];
+	return &stbr_info->horizontal_coefficients[stbr_info->kernel_texel_width*n + c];
 }
 
-static void stbr__filter_nearest_4(const stbr_uc* input_data, stbr_uc* output_data, size_t input_texel_index, size_t output_texel_index, size_t n)
+// Each scan line uses the same kernel values so we should calculate the kernel
+// values once and then we can use them for every scan line.
+static void stbr__calculate_horizontal_filters(stbr__info* stbr_info, stbr_filter filter, int input_w, int output_w)
 {
-	output_data[output_texel_index] = input_data[input_texel_index];
-	output_data[output_texel_index + 1] = input_data[input_texel_index + 1];
-	output_data[output_texel_index + 2] = input_data[input_texel_index + 2];
-	output_data[output_texel_index + 3] = input_data[input_texel_index + 3];
+	int n, i;
+	float scale_ratio = (float)output_w / input_w;
+
+	float out_pixels_radius = stbr__filter_info_table[filter].support * scale_ratio;
+
+	STBR_UNIMPLEMENTED(output_w < input_w);
+
+	for (n = 0; n < output_w; n++)
+	{
+		// What input texels contribute to this output texel?
+		float out_texel_center = (float)n + 0.5f;
+		float out_texel_influence_lowerbound = out_texel_center - out_pixels_radius;
+		float out_texel_influence_upperbound = out_texel_center + out_pixels_radius;
+
+		float in_center_of_out = out_texel_center / scale_ratio;
+		float in_texel_influence_lowerbound = out_texel_influence_lowerbound / scale_ratio;
+		float in_texel_influence_upperbound = out_texel_influence_upperbound / scale_ratio;
+
+		int in_first_texel = (int)(floor(in_texel_influence_lowerbound + 0.5));
+		int in_last_texel = (int)(floor(in_texel_influence_upperbound - 0.5));
+
+		float total_filter = 0;
+		float filter_scale;
+
+		STBR_DEBUG_ASSERT(in_last_texel - in_first_texel <= stbr_info->kernel_texel_width);
+		STBR_DEBUG_ASSERT(in_first_texel >= 0);
+		STBR_DEBUG_ASSERT(in_last_texel < input_w);
+
+		stbr__get_contributor(stbr_info, n)->n0 = in_first_texel;
+		stbr__get_contributor(stbr_info, n)->n1 = in_last_texel;
+
+		for (i = 0; i <= in_last_texel - in_first_texel; i++)
+		{
+			float in_texel_center = (float)(i + in_first_texel) + 0.5f;
+			total_filter += *stbr__get_coefficient(stbr_info, n, i) = stbr__filter_info_table[filter].kernel(in_center_of_out - in_texel_center);
+		}
+
+		STBR_DEBUG_ASSERT(total_filter > 0);
+		STBR_DEBUG_ASSERT(fabs(1-total_filter) < 0.1f); // Make sure it's not way off.
+
+		// Make sure the sum of all coefficients is 1.
+		filter_scale = 1 / total_filter;
+
+		for (i = 0; i <= in_last_texel - in_first_texel; i++)
+			*stbr__get_coefficient(stbr_info, n, i) *= filter_scale;
+	}
 }
 
-static void stbr__filter_nearest_n(const stbr_uc* input_data, stbr_uc* output_data, size_t input_texel_index, size_t output_texel_index, size_t n)
+STBRDEF int stbr_resize_arbitrary(const void* input_data, int input_w, int input_h, int input_stride_in_bytes,
+	void* output_data, int output_w, int output_h, int output_stride_in_bytes,
+	int channels, stbr_type type, stbr_filter filter,
+	void* tempmem, stbr_size_t tempmem_size_in_bytes)
 {
-	size_t c;
-	for (c = 0; c < n; c++)
-		output_data[output_texel_index + c] = input_data[input_texel_index + c];
-}
-
-typedef void (stbr__filter_fn)(const stbr_uc* input_data, stbr_uc* output_data, size_t input_texel_index, size_t output_texel_index, size_t n);
-
-STBRDEF int stbr_resize(const stbr_uc* input_data, int input_w, int input_h, int input_components, int input_stride, stbr_uc* output_data, int output_w, int output_h, int output_stride, stbr_filter filter, stbr_edge edge, stbr_colorspace colorspace)
-{
-	int x, y;
-	int width_stride_input = input_stride ? input_stride : input_components * input_w;
-	int width_stride_output = output_stride ? output_stride : input_components * output_w;
+	int width_stride_input = input_stride_in_bytes ? input_stride_in_bytes : channels * input_w;
+	int width_stride_output = output_stride_in_bytes ? output_stride_in_bytes : channels * output_w;
 
 #ifdef STBR_DEBUG_OVERWRITE_TEST
 #define OVERWRITE_ARRAY_SIZE 64
 	unsigned char overwrite_contents_pre[OVERWRITE_ARRAY_SIZE];
 
-	size_t begin_forbidden = width_stride_output * (output_h - 1) + output_w * input_components;
-	memcpy(overwrite_contents_pre, &output_data[begin_forbidden], OVERWRITE_ARRAY_SIZE);
+	stbr_size_t begin_forbidden = width_stride_output * (output_h - 1) + output_w * channels;
+	memcpy(overwrite_contents_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE);
 #endif
 
-	if (filter == STBR_FILTER_NEAREST)
-	{
-		stbr__filter_fn* filter_fn;
+	STBR_UNIMPLEMENTED(type != STBR_TYPE_UINT8);
 
-		filter_fn = &stbr__filter_nearest_n;
+	STBR_ASSERT(filter != 0);
+	STBR_ASSERT(filter < STBR_ARRAY_SIZE(stbr__filter_info_table));
 
-		if (input_components == 1)
-			filter_fn = &stbr__filter_nearest_1;
-		else if (input_components == 3)
-			filter_fn = &stbr__filter_nearest_3;
-		else if (input_components == 4)
-			filter_fn = &stbr__filter_nearest_4;
-
-		for (y = 0; y < output_h; y++)
-		{
-			int nearest_y = stbr__nearest_texel(y, output_h, input_h);
-
-			for (x = 0; x < output_w; x++)
-			{
-				int nearest_x = stbr__nearest_texel(x, output_w, input_w);
-				size_t input_texel_index = stbr__texel_index(nearest_x, nearest_y, 0, width_stride_input, input_components, input_w, input_h);
-				size_t output_texel_index = stbr__texel_index(x, y, 0, width_stride_output, input_components, output_w, output_h);
-
-				filter_fn(input_data, output_data, input_texel_index, output_texel_index, input_components);
-			}
-		}
-	}
-	else
+	if (!tempmem)
 		return 0;
 
+	if (tempmem_size_in_bytes < stbr_calculate_memory(input_w, input_h, input_stride_in_bytes, output_w, output_h, output_stride_in_bytes, channels, STBR_FILTER_NEAREST))
+		return 0;
+
+#define STBR__NEXT_MEMPTR(current, old, newtype) (newtype*)(((unsigned char*)current) + old)
+
+	memset(tempmem, 0, tempmem_size_in_bytes);
+
+	stbr__info* stbr_info = (stbr__info*)tempmem;
+
+	stbr_info->total_contributors = stbr__get_total_contributors(filter, input_w, output_w);
+	stbr_info->kernel_texel_width = stbr__get_filter_texel_width(filter, output_w > input_w ? 1 : 0);
+
+	stbr_info->decode_buffer = STBR__NEXT_MEMPTR(stbr_info, sizeof(stbr__info), float);
+	stbr_info->horizontal_contributors = STBR__NEXT_MEMPTR(stbr_info->decode_buffer, input_w * channels * sizeof(float), stbr__contributors);
+	stbr_info->horizontal_coefficients = STBR__NEXT_MEMPTR(stbr_info->horizontal_contributors, stbr_info->total_contributors * sizeof(stbr__contributors), float);
+
+#undef STBR__NEXT_MEMPTR
+
+	stbr__calculate_horizontal_filters(stbr_info, filter, input_w, output_w);
+
 #ifdef STBR_DEBUG_OVERWRITE_TEST
-	STBR_ASSERT(memcmp(overwrite_contents_pre, &output_data[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0);
+	STBR_DEBUG_ASSERT(memcmp(overwrite_contents_pre, &((unsigned char*)output_data)[begin_forbidden], OVERWRITE_ARRAY_SIZE) == 0);
 #endif
 
 	return 1;
 }
 
 
+STBRDEF stbr_size_t stbr_calculate_memory(int input_w, int input_h, int input_stride_in_bytes,
+	int output_w, int output_h, int output_stride_in_bytes,
+	int channels, stbr_filter filter)
+{
+	STBR_ASSERT(filter != 0);
+	STBR_ASSERT(filter < STBR_ARRAY_SIZE(stbr__filter_info_table));
+
+	int info_size = sizeof(stbr__info);
+	int decode_buffer_size = input_w * channels * sizeof(float);
+	int contributors_size = stbr__get_total_contributors(filter, input_w, output_w) * sizeof(stbr__contributors);
+	int coefficients_size = stbr__get_total_contributors(filter, input_w, output_w) * sizeof(float);
+
+	return info_size + decode_buffer_size + contributors_size + coefficients_size;
+}
+
 #endif // STB_RESAMPLE_IMPLEMENTATION
 
 /*
diff --git a/tests/resample_test.c b/tests/resample_test.cpp
similarity index 55%
rename from tests/resample_test.c
rename to tests/resample_test.cpp
index af20af4..2ee2ffb 100644
--- a/tests/resample_test.c
+++ b/tests/resample_test.cpp
@@ -5,6 +5,7 @@
 #endif
 
 #define STB_RESAMPLE_IMPLEMENTATION
+#define STB_RESAMPLE_STATIC
 #include "stb_resample.h"
 
 #define STB_IMAGE_WRITE_IMPLEMENTATION
@@ -13,6 +14,10 @@
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
 
+#ifdef _WIN32
+#include <sys/timeb.h>
+#endif
+
 int main(int argc, char** argv)
 {
 	unsigned char* input_data;
@@ -34,14 +39,26 @@ int main(int argc, char** argv)
 		return 1;
 	}
 
-	out_w = 512;
-	out_h = 512;
+	out_w = 1024;
+	out_h = 1024;
 	out_stride = (out_w + 10) * n;
 
-	output_data = malloc(out_stride * out_h);
+	output_data = (unsigned char*)malloc(out_stride * out_h);
+
+	int in_w = 512;
+	int in_h = 512;
+
+	size_t memory_required = stbr_calculate_memory(in_w, in_h, w*n, out_w, out_h, out_stride, n, STBR_FILTER_NEAREST);
+	void* extra_memory = malloc(memory_required);
 
 	// Cut out the outside 64 pixels all around to test the stride.
-	stbr_resize(input_data + w*64*n + 64*n, w - 128, h - 128, n, w*n, output_data, out_w, out_h, out_stride, STBR_FILTER_NEAREST, STBR_EDGE_CLAMP, STBR_COLORSPACE_SRGB);
+	int border = 64;
+	STBR_ASSERT(in_w + border <= w);
+	STBR_ASSERT(in_h + border <= h);
+
+	stbr_resize_arbitrary(input_data + w * border * n + border * n, in_w, in_h, w*n, output_data, out_w, out_h, out_stride, n, STBR_TYPE_UINT8, STBR_FILTER_NEAREST, extra_memory, memory_required);
+
+	free(extra_memory);
 
 	stbi_write_png("output.png", out_w, out_h, n, output_data, out_stride);
 
diff --git a/tests/resample_test_c.c b/tests/resample_test_c.c
new file mode 100644
index 0000000..dcc3572
--- /dev/null
+++ b/tests/resample_test_c.c
@@ -0,0 +1,11 @@
+#ifdef _WIN32
+#define STBR_ASSERT(x) \
+	if (!(x)) \
+		__debugbreak();
+#endif
+
+#define STB_RESAMPLE_IMPLEMENTATION
+#define STB_RESAMPLE_STATIC
+#include "stb_resample.h"
+
+// Just to make sure it will build properly with a c compiler