remove most per-pixel switches, beginnings of removing encode_pixel switch
This commit is contained in:
parent
5eb0236d9d
commit
32b626859d
@ -563,7 +563,7 @@ stbir__inline static float* stbir__get_coefficient(stbir__info* stbir_info, int
|
||||
return &stbir_info->horizontal_coefficients[stbir__get_filter_pixel_width(stbir_info->filter, stbir_info->input_w, stbir_info->output_w, stbir_info->horizontal_scale)*n + c];
|
||||
}
|
||||
|
||||
stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
|
||||
static int stbir__edge_wrap_slow(stbir_edge edge, int n, int max)
|
||||
{
|
||||
switch (edge)
|
||||
{
|
||||
@ -617,6 +617,14 @@ stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
|
||||
}
|
||||
}
|
||||
|
||||
stbir__inline static int stbir__edge_wrap(stbir_edge edge, int n, int max)
|
||||
{
|
||||
// avoid per-pixel switch
|
||||
if (n >= 0 && n < max)
|
||||
return n;
|
||||
return stbir__edge_wrap_slow(edge, n, max);
|
||||
}
|
||||
|
||||
// What input pixels contribute to this output pixel?
|
||||
static void stbir__calculate_sample_range_upsample(int n, float out_filter_radius, float scale_ratio, float out_shift, int* in_first_pixel, int* in_last_pixel, float* in_center_of_out)
|
||||
{
|
||||
@ -778,7 +786,7 @@ static float* stbir__get_decode_buffer(stbir__info* stbir_info)
|
||||
|
||||
static void stbir__decode_scanline(stbir__info* stbir_info, int n)
|
||||
{
|
||||
int x, c;
|
||||
int c;
|
||||
int channels = stbir_info->channels;
|
||||
int alpha_channel = stbir_info->alpha_channel;
|
||||
int type = stbir_info->type;
|
||||
@ -793,77 +801,114 @@ static void stbir__decode_scanline(stbir__info* stbir_info, int n)
|
||||
int max_x = input_w + stbir__get_filter_pixel_margin_horizontal(stbir_info);
|
||||
int decode = STBIR__DECODE(type, colorspace);
|
||||
|
||||
for (x = -stbir__get_filter_pixel_margin_horizontal(stbir_info); x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
int x = -stbir__get_filter_pixel_margin_horizontal(stbir_info);
|
||||
|
||||
switch (decode)
|
||||
switch (decode)
|
||||
{
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
|
||||
for (; x < max_x; x++)
|
||||
{
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_LINEAR):
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
for (c = 0; c < channels; c++)
|
||||
decode_buffer[decode_pixel_index + c] = ((float)((const unsigned char*)input_data)[input_pixel_index + c]) / 255;
|
||||
}
|
||||
break;
|
||||
|
||||
break;
|
||||
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT8, STBIR_COLORSPACE_SRGB):
|
||||
for (; x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
for (c = 0; c < channels; c++)
|
||||
decode_buffer[decode_pixel_index + c] = stbir__srgb_uchar_to_linear_float[((const unsigned char*)input_data)[input_pixel_index + c]];
|
||||
|
||||
if (!(stbir_info->flags&STBIR_FLAG_GAMMA_CORRECT_ALPHA))
|
||||
decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned char*)input_data)[input_pixel_index + alpha_channel]) / 255;
|
||||
}
|
||||
break;
|
||||
|
||||
break;
|
||||
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_LINEAR):
|
||||
for (; x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
for (c = 0; c < channels; c++)
|
||||
decode_buffer[decode_pixel_index + c] = ((float)((const unsigned short*)input_data)[input_pixel_index + c]) / 65535;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT16, STBIR_COLORSPACE_SRGB):
|
||||
for (; x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
for (c = 0; c < channels; c++)
|
||||
decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((float)((const unsigned short*)input_data)[input_pixel_index + c]) / 65535);
|
||||
|
||||
if (!(stbir_info->flags&STBIR_FLAG_GAMMA_CORRECT_ALPHA))
|
||||
decode_buffer[decode_pixel_index + alpha_channel] = ((float)((const unsigned short*)input_data)[input_pixel_index + alpha_channel]) / 65535;
|
||||
}
|
||||
break;
|
||||
|
||||
break;
|
||||
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_LINEAR):
|
||||
for (; x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
for (c = 0; c < channels; c++)
|
||||
decode_buffer[decode_pixel_index + c] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / 4294967295);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
|
||||
case STBIR__DECODE(STBIR_TYPE_UINT32, STBIR_COLORSPACE_SRGB):
|
||||
for (; x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
for (c = 0; c < channels; c++)
|
||||
decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear((float)(((double)((const unsigned int*)input_data)[input_pixel_index + c]) / 4294967295));
|
||||
|
||||
if (!(stbir_info->flags&STBIR_FLAG_GAMMA_CORRECT_ALPHA))
|
||||
decode_buffer[decode_pixel_index + alpha_channel] = (float)(((double)((const unsigned int*)input_data)[input_pixel_index + alpha_channel]) / 4294967295);
|
||||
}
|
||||
break;
|
||||
|
||||
break;
|
||||
|
||||
case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
|
||||
case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_LINEAR):
|
||||
for (; x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
for (c = 0; c < channels; c++)
|
||||
decode_buffer[decode_pixel_index + c] = ((const float*)input_data)[input_pixel_index + c];
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
|
||||
case STBIR__DECODE(STBIR_TYPE_FLOAT, STBIR_COLORSPACE_SRGB):
|
||||
for (; x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
for (c = 0; c < channels; c++)
|
||||
decode_buffer[decode_pixel_index + c] = stbir__srgb_to_linear(((const float*)input_data)[input_pixel_index + c]);
|
||||
|
||||
if (!(stbir_info->flags&STBIR_FLAG_GAMMA_CORRECT_ALPHA))
|
||||
decode_buffer[decode_pixel_index + alpha_channel] = ((const float*)input_data)[input_pixel_index + alpha_channel];
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
STBIR__UNIMPLEMENTED("Unknown type/colorspace/channels combination.");
|
||||
break;
|
||||
}
|
||||
|
||||
if (stbir_info->flags&STBIR_FLAG_NONPREMUL_ALPHA)
|
||||
break;
|
||||
|
||||
default:
|
||||
STBIR__UNIMPLEMENTED("Unknown type/colorspace/channels combination.");
|
||||
break;
|
||||
}
|
||||
|
||||
if (stbir_info->flags & STBIR_FLAG_NONPREMUL_ALPHA)
|
||||
{
|
||||
for (x = -stbir__get_filter_pixel_margin_horizontal(stbir_info); x < max_x; x++)
|
||||
{
|
||||
int decode_pixel_index = x * channels;
|
||||
int input_pixel_index = in_buffer_row_index + stbir__edge_wrap(edge_horizontal, x, input_w) * channels;
|
||||
float alpha = decode_buffer[decode_pixel_index + alpha_channel];
|
||||
for (c = 0; c < channels; c++)
|
||||
{
|
||||
@ -1047,7 +1092,7 @@ static stbir__inline void stbir__encode_pixel(stbir__info* stbir_info, void* out
|
||||
for (n = 0; n < channels; n++)
|
||||
((unsigned char*)output_buffer)[output_pixel_index + n] = stbir__linear_uchar_to_srgb_uchar[(unsigned char)(stbir__saturate(encode_buffer[encode_pixel_index + n]) * 255)];
|
||||
|
||||
if (!(stbir_info->flags&STBIR_FLAG_FORCE_GAMMA_CORRECT_ALPHA))
|
||||
if (!(stbir_info->flags&STBIR_FLAG_GAMMA_CORRECT_ALPHA))
|
||||
((unsigned char*)output_buffer)[output_pixel_index + alpha_channel] = (unsigned char)(stbir__saturate(encode_buffer[encode_pixel_index + alpha_channel]) * 255);
|
||||
|
||||
break;
|
||||
@ -1100,6 +1145,16 @@ static stbir__inline void stbir__encode_pixel(stbir__info* stbir_info, void* out
|
||||
}
|
||||
}
|
||||
|
||||
// @OPTIMIZE: embed stbir__encode_pixel and move switch out of per-pixel loop
|
||||
static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void *output_buffer, int output_offset, float *encode_buffer, int channels, int alpha_channel, int decode)
|
||||
{
|
||||
int x;
|
||||
for (x=0; x < num_pixels; ++x)
|
||||
{
|
||||
stbir__encode_pixel(stbir_info, output_buffer, output_offset+x*channels, encode_buffer, x*channels, channels, alpha_channel, decode);
|
||||
}
|
||||
}
|
||||
|
||||
static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, int in_first_scanline, int in_last_scanline, float in_center_of_out)
|
||||
{
|
||||
int x, k;
|
||||
@ -1134,6 +1189,8 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
|
||||
STBIR__DEBUG_ASSERT(n0 >= in_first_scanline);
|
||||
STBIR__DEBUG_ASSERT(n1 <= in_last_scanline);
|
||||
|
||||
memset(encode_buffer, 0, output_w * sizeof(float) * channels);
|
||||
|
||||
for (x = 0; x < output_w; x++)
|
||||
{
|
||||
int in_pixel_index = x * channels;
|
||||
@ -1142,8 +1199,6 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
|
||||
|
||||
STBIR__DEBUG_ASSERT(n1 >= n0);
|
||||
|
||||
memset(encode_buffer, 0, sizeof(float) * channels);
|
||||
|
||||
for (k = n0; k <= n1; k++)
|
||||
{
|
||||
int coefficient_index = coefficient_counter++;
|
||||
@ -1152,11 +1207,10 @@ static void stbir__resample_vertical_upsample(stbir__info* stbir_info, int n, in
|
||||
|
||||
int c;
|
||||
for (c = 0; c < channels; c++)
|
||||
encode_buffer[c] += ring_buffer_entry[in_pixel_index + c] * coefficient;
|
||||
encode_buffer[x*channels + c] += ring_buffer_entry[in_pixel_index + c] * coefficient;
|
||||
}
|
||||
|
||||
stbir__encode_pixel(stbir_info, output_data, out_pixel_index, encode_buffer, 0, channels, alpha_channel, decode);
|
||||
}
|
||||
stbir__encode_scanline(stbir_info, output_w, output_data, output_row_index, encode_buffer, channels, alpha_channel, decode);
|
||||
}
|
||||
|
||||
static void stbir__resample_vertical_downsample(stbir__info* stbir_info, int n, int in_first_scanline, int in_last_scanline, float in_center_of_out)
|
||||
@ -1280,18 +1334,9 @@ static void stbir__empty_ring_buffer(stbir__info* stbir_info, int first_necessar
|
||||
{
|
||||
if (stbir_info->ring_buffer_first_scanline >= 0 && stbir_info->ring_buffer_first_scanline < stbir_info->output_h)
|
||||
{
|
||||
int x;
|
||||
int output_row = stbir_info->ring_buffer_first_scanline * output_stride;
|
||||
float* ring_buffer_entry = stbir__get_ring_buffer_entry(ring_buffer, stbir_info->ring_buffer_begin_index, ring_buffer_length);
|
||||
|
||||
for (x = 0; x < output_w; x++)
|
||||
{
|
||||
int pixel_index = x * channels;
|
||||
int ring_pixel_index = pixel_index;
|
||||
int output_pixel_index = output_row + pixel_index;
|
||||
|
||||
stbir__encode_pixel(stbir_info, output_data, output_pixel_index, ring_buffer_entry, ring_pixel_index, channels, alpha_channel, decode);
|
||||
}
|
||||
stbir__encode_scanline(stbir_info, output_w, output_data, output_row, ring_buffer_entry, channels, alpha_channel, decode);
|
||||
}
|
||||
|
||||
if (stbir_info->ring_buffer_first_scanline == stbir_info->ring_buffer_last_scanline)
|
||||
@ -1367,7 +1412,7 @@ static stbir__inline stbir_size_t stbir__calculate_memory(int input_w, int input
|
||||
int decode_buffer_size = (input_w + pixel_margin*2) * channels * sizeof(float);
|
||||
int horizontal_buffer_size = output_w * channels * sizeof(float);
|
||||
int ring_buffer_size = output_w * channels * filter_height * sizeof(float);
|
||||
int encode_buffer_size = channels * sizeof(float);
|
||||
int encode_buffer_size = output_w * channels * sizeof(float);
|
||||
|
||||
STBIR_ASSERT(filter != 0);
|
||||
STBIR_ASSERT(filter < STBIR__ARRAY_SIZE(stbir__filter_info_table)); // this now happens too late
|
||||
|
Loading…
Reference in New Issue
Block a user