diff --git a/stb_image_resize.h b/stb_image_resize.h index 203f13b..ea03fc6 100644 --- a/stb_image_resize.h +++ b/stb_image_resize.h @@ -1903,6 +1903,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in memset(encode_buffer, 0, output_w * sizeof(float) * channels); + // I tried reblocking this for better cache usage of encode_buffer + // (using x_outer, k, x_inner), but it lost speed. -- stb + coefficient_counter = 0; switch (channels) { case 1: @@ -1911,9 +1914,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in int coefficient_index = coefficient_counter++; float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; - for (x = 0; x < output_w; x++) + for (x = 0; x < output_w; ++x) { - int in_pixel_index = x * channels; + int in_pixel_index = x * 1; encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; } } @@ -1924,9 +1927,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in int coefficient_index = coefficient_counter++; float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; - for (x = 0; x < output_w; x++) + for (x = 0; x < output_w; ++x) { - int in_pixel_index = x * channels; + int in_pixel_index = x * 2; encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; } @@ -1938,9 +1941,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in int coefficient_index = coefficient_counter++; float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; - for (x = 0; x < output_w; x++) + for (x = 0; x < output_w; ++x) { - int in_pixel_index = x * channels; + int in_pixel_index = x * 3; encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient; @@ -1953,9 +1956,9 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in int coefficient_index = coefficient_counter++; float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; - for (x = 0; x < output_w; x++) + for (x = 0; x < output_w; ++x) { - int in_pixel_index = x * channels; + int in_pixel_index = x * 4; encode_buffer[in_pixel_index + 0] += ring_buffer_entry[in_pixel_index + 0] * coefficient; encode_buffer[in_pixel_index + 1] += ring_buffer_entry[in_pixel_index + 1] * coefficient; encode_buffer[in_pixel_index + 2] += ring_buffer_entry[in_pixel_index + 2] * coefficient; @@ -1969,7 +1972,7 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in int coefficient_index = coefficient_counter++; float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length); float coefficient = vertical_coefficients[coefficient_group + coefficient_index]; - for (x = 0; x < output_w; x++) + for (x = 0; x < output_w; ++x) { int in_pixel_index = x * channels; int c; diff --git a/tests/resample_test.cpp b/tests/resample_test.cpp index 0d47530..c42df7b 100644 --- a/tests/resample_test.cpp +++ b/tests/resample_test.cpp @@ -132,8 +132,8 @@ static void resizer(int argc, char **argv) int n; int out_w, out_h; input_pixels = stbi_load(argv[1], &w, &h, &n, 0); - out_w = w/4; - out_h = h/4; + out_w = w*3; + out_h = h*3; output_pixels = (unsigned char*) malloc(out_w*out_h*n); //stbir_resize_uint8_srgb(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n, -1,0); stbir_resize_uint8(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n); @@ -148,8 +148,7 @@ static void performance(int argc, char **argv) int w, h, count; int n, i; int out_w, out_h, srgb=1; - input_pixels = stbi_load(argv[1], &w, &h, &n, 4); - n=4; + input_pixels = stbi_load(argv[1], &w, &h, &n, 0); #if 0 out_w = w/4; out_h = h/4; count=100; // 1 #elif 0 @@ -159,15 +158,15 @@ static void performance(int argc, char **argv) #elif 0 out_w = w*3; out_h = h*3; count=2; srgb=0; // 4 #else - out_w = w*3; out_h = h*3; count=1; // 5 // this is dominated by linear->sRGB conversion + out_w = w*3; out_h = h*3; count=2; // 5 // this is dominated by linear->sRGB conversion #endif output_pixels = (unsigned char*) malloc(out_w*out_h*n); for (i=0; i < count; ++i) if (srgb) - stbir_resize_uint8_srgb(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n, 3,0); + stbir_resize_uint8_srgb(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n,-1,0); else - stbir_resize(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, STBIR_TYPE_UINT8, n, 3, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, STBIR_COLORSPACE_LINEAR, NULL); + stbir_resize(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, STBIR_TYPE_UINT8, n,-1, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT, STBIR_FILTER_DEFAULT, STBIR_COLORSPACE_LINEAR, NULL); exit(0); }