unroll channel loops in downsampler
This commit is contained in:
parent
3ee97c221f
commit
c9600c012f
@ -1429,16 +1429,64 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, int n
|
||||
int max_n = n1;
|
||||
int coefficient_group = coefficient_width * x;
|
||||
|
||||
for (k = n0; k <= max_n; k++)
|
||||
{
|
||||
int out_pixel_index = k * channels;
|
||||
float coefficient = horizontal_coefficients[coefficient_group + k - n0];
|
||||
int c;
|
||||
switch (channels) {
|
||||
case 1:
|
||||
for (k = n0; k <= max_n; k++)
|
||||
{
|
||||
int out_pixel_index = k * 1;
|
||||
float coefficient = horizontal_coefficients[coefficient_group + k - n0];
|
||||
STBIR__DEBUG_ASSERT(coefficient != 0);
|
||||
output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
|
||||
}
|
||||
break;
|
||||
|
||||
STBIR__DEBUG_ASSERT(coefficient != 0);
|
||||
case 2:
|
||||
for (k = n0; k <= max_n; k++)
|
||||
{
|
||||
int out_pixel_index = k * 2;
|
||||
float coefficient = horizontal_coefficients[coefficient_group + k - n0];
|
||||
STBIR__DEBUG_ASSERT(coefficient != 0);
|
||||
output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
|
||||
output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
|
||||
}
|
||||
break;
|
||||
|
||||
for (c = 0; c < channels; c++)
|
||||
output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
|
||||
case 3:
|
||||
for (k = n0; k <= max_n; k++)
|
||||
{
|
||||
int out_pixel_index = k * 3;
|
||||
float coefficient = horizontal_coefficients[coefficient_group + k - n0];
|
||||
STBIR__DEBUG_ASSERT(coefficient != 0);
|
||||
output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
|
||||
output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
|
||||
output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
|
||||
}
|
||||
break;
|
||||
|
||||
case 4:
|
||||
for (k = n0; k <= max_n; k++)
|
||||
{
|
||||
int out_pixel_index = k * 4;
|
||||
float coefficient = horizontal_coefficients[coefficient_group + k - n0];
|
||||
STBIR__DEBUG_ASSERT(coefficient != 0);
|
||||
output_buffer[out_pixel_index + 0] += decode_buffer[in_pixel_index + 0] * coefficient;
|
||||
output_buffer[out_pixel_index + 1] += decode_buffer[in_pixel_index + 1] * coefficient;
|
||||
output_buffer[out_pixel_index + 2] += decode_buffer[in_pixel_index + 2] * coefficient;
|
||||
output_buffer[out_pixel_index + 3] += decode_buffer[in_pixel_index + 3] * coefficient;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
for (k = n0; k <= max_n; k++)
|
||||
{
|
||||
int c;
|
||||
int out_pixel_index = k * channels;
|
||||
float coefficient = horizontal_coefficients[coefficient_group + k - n0];
|
||||
STBIR__DEBUG_ASSERT(coefficient != 0);
|
||||
for (c = 0; c < channels; c++)
|
||||
output_buffer[out_pixel_index + c] += decode_buffer[in_pixel_index + c] * coefficient;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1717,13 +1765,51 @@ static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n,
|
||||
|
||||
float* ring_buffer_entry = stbir__get_ring_buffer_scanline(k, ring_buffer, ring_buffer_begin_index, ring_buffer_first_scanline, kernel_pixel_width, ring_buffer_length);
|
||||
|
||||
for (x = 0; x < output_w; x++)
|
||||
{
|
||||
int in_pixel_index = x * channels;
|
||||
switch (channels) {
|
||||
case 1:
|
||||
for (x = 0; x < output_w; x++)
|
||||
{
|
||||
int in_pixel_index = x * 1;
|
||||
ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (x = 0; x < output_w; x++)
|
||||
{
|
||||
int in_pixel_index = x * 2;
|
||||
ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
|
||||
ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (x = 0; x < output_w; x++)
|
||||
{
|
||||
int in_pixel_index = x * 3;
|
||||
ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
|
||||
ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
|
||||
ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
for (x = 0; x < output_w; x++)
|
||||
{
|
||||
int in_pixel_index = x * 4;
|
||||
ring_buffer_entry[in_pixel_index + 0] += horizontal_buffer[in_pixel_index + 0] * coefficient;
|
||||
ring_buffer_entry[in_pixel_index + 1] += horizontal_buffer[in_pixel_index + 1] * coefficient;
|
||||
ring_buffer_entry[in_pixel_index + 2] += horizontal_buffer[in_pixel_index + 2] * coefficient;
|
||||
ring_buffer_entry[in_pixel_index + 3] += horizontal_buffer[in_pixel_index + 3] * coefficient;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (x = 0; x < output_w; x++)
|
||||
{
|
||||
int in_pixel_index = x * channels;
|
||||
|
||||
int c;
|
||||
for (c = 0; c < channels; c++)
|
||||
ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient;
|
||||
int c;
|
||||
for (c = 0; c < channels; c++)
|
||||
ring_buffer_entry[in_pixel_index + c] += horizontal_buffer[in_pixel_index + c] * coefficient;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -141,6 +141,21 @@ static void resizer(int argc, char **argv)
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void performance(int argc, char **argv)
|
||||
{
|
||||
unsigned char* input_pixels;
|
||||
unsigned char* output_pixels;
|
||||
int w, h;
|
||||
int n, i;
|
||||
int out_w, out_h;
|
||||
input_pixels = stbi_load(argv[1], &w, &h, &n, 0);
|
||||
out_w = w/4;
|
||||
out_h = h/4;
|
||||
output_pixels = (unsigned char*) malloc(out_w*out_h*n);
|
||||
for (i=0; i < 100; ++i)
|
||||
stbir_resize_uint8_srgb(input_pixels, w, h, 0, output_pixels, out_w, out_h, 0, n, -1,0);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void test_suite(int argc, char **argv);
|
||||
|
||||
@ -153,6 +168,7 @@ int main(int argc, char** argv)
|
||||
int out_w, out_h, out_stride;
|
||||
|
||||
//resizer(argc, argv);
|
||||
performance(argc, argv);
|
||||
|
||||
#if 1
|
||||
test_suite(argc, argv);
|
||||
|
Loading…
Reference in New Issue
Block a user