From 9d924f8a47649ce650fb6943d2394e563eae39c8 Mon Sep 17 00:00:00 2001 From: Sean Barrett Date: Thu, 14 Dec 2023 03:09:00 -0800 Subject: [PATCH] stb_image_resize: 2.04 --- stb_image_resize2.h | 72 +++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/stb_image_resize2.h b/stb_image_resize2.h index e0c4282..4b246f3 100644 --- a/stb_image_resize2.h +++ b/stb_image_resize2.h @@ -1,4 +1,4 @@ -/* stb_image_resize2 - v2.01 - public domain image resizing +/* stb_image_resize2 - v2.04 - public domain image resizing by Jeff Roberts (v2) and Jorge L Rodriguez http://github.com/nothings/stb @@ -328,9 +328,11 @@ Nathan Reed: warning fixes for 1.0 REVISIONS - 2.00 (2022-02-20) mostly new source: new api, optimizations, simd, vertical-first, etc - (2x-5x faster without simd, 4x-12x faster with simd) - (in some cases, 20x to 40x faster - resizing to very small for example) + 2.04 (2023-11-17) Fix for rare AVX bug, shadowed symbol (thanks Nikola Smiljanic). + 2.03 (2023-11-01) ASAN and TSAN warnings fixed, minor tweaks. + 2.00 (2023-10-10) mostly new source: new api, optimizations, simd, vertical-first, etc + (2x-5x faster without simd, 4x-12x faster with simd) + (in some cases, 20x to 40x faster - resizing to very small for example) 0.96 (2019-03-04) fixed warnings 0.95 (2017-07-23) fixed warnings 0.94 (2017-03-18) fixed warnings @@ -450,25 +452,33 @@ typedef uint64_t stbir_uint64; // for back compatibility, you can cast the old channel count to an stbir_pixel_layout typedef enum { - STBIR_BGR = 0, // 3-chan, with order specified (for channel flipping) STBIR_1CHANNEL = 1, STBIR_2CHANNEL = 2, STBIR_RGB = 3, // 3-chan, with order specified (for channel flipping) - STBIR_RGBA = 4, // alpha formats, alpha is NOT premultiplied into color channels - + STBIR_BGR = 0, // 3-chan, with order specified (for channel flipping) STBIR_4CHANNEL = 5, + + STBIR_RGBA = 4, // alpha formats, where alpha is NOT premultiplied into color channels STBIR_BGRA = 6, STBIR_ARGB = 7, STBIR_ABGR = 8, STBIR_RA = 9, STBIR_AR = 10, - STBIR_RGBA_PM = 11, // alpha formats, alpha is premultiplied into color channels + STBIR_RGBA_PM = 11, // alpha formats, where alpha is premultiplied into color channels STBIR_BGRA_PM = 12, STBIR_ARGB_PM = 13, STBIR_ABGR_PM = 14, STBIR_RA_PM = 15, STBIR_AR_PM = 16, + + STBIR_RGBA_NO_AW = 11, // alpha formats, where NO alpha weighting is applied at all! + STBIR_BGRA_NO_AW = 12, // these are just synonyms for the _PM flags (which also do + STBIR_ARGB_NO_AW = 13, // no alpha weighting). These names just make it more clear + STBIR_ABGR_NO_AW = 14, // for some folks). + STBIR_RA_NO_AW = 15, + STBIR_AR_NO_AW = 16, + } stbir_pixel_layout; //=============================================================== @@ -1172,6 +1182,10 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) #define STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT 32 // when downsampling and <= 32 scanlines of buffering, use gather. gather used down to 1/8th scaling for 25% win. #endif +#ifndef STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS +#define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split? +#endif + // restrict pointers for the output pointers #if defined( _MSC_VER ) && !defined(__clang__) #define STBIR_STREAMOUT_PTR( star ) star __restrict @@ -1549,7 +1563,6 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) #define stbir__simdf8_0123to2222( out, in ) (out) = stbir__simdf_swiz(_mm256_castps256_ps128(in), 2,2,2,2 ) - #define stbir__simdf8_load2( out, ptr ) (out) = _mm256_castsi256_ps(_mm256_castsi128_si256( _mm_loadl_epi64( (__m128i*)(ptr)) )) // top values can be random (not denormal or nan for perf) #define stbir__simdf8_load4b( out, ptr ) (out) = _mm256_broadcast_ps( (__m128 const *)(ptr) ) static __m256i stbir_00112233 = { STBIR__CONST_4d_32i( 0, 0, 1, 1 ), STBIR__CONST_4d_32i( 2, 2, 3, 3 ) }; @@ -1582,11 +1595,11 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in) #ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd #define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_fmadd_ps( mul1, mul2, add ) #define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_fmadd_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ), add ) - #define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_fmadd_ps( _mm256_castps128_ps256( mul ), _mm256_castps128_ps256( _mm_loadu_ps( (float const*)(ptr) ) ), add ) + #define stbir__simdf8_madd_mem4( out, add, mul, ptr )(out) = _mm256_fmadd_ps( _mm256_setr_m128( mul, _mm_setzero_ps() ), _mm256_setr_m128( _mm_loadu_ps( (float const*)(ptr) ), _mm_setzero_ps() ), add ) #else #define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul1, mul2 ) ) #define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ) ) ) - #define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_castps128_ps256( _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ) ) ) + #define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_setr_m128( _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ), _mm_setzero_ps() ) ) #endif #define stbir__if_simdf8_cast_to_simdf4( val ) _mm256_castps256_ps128( val ) @@ -3697,7 +3710,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors* float * coeffs = coefficents + widest * ( num_contributors - 1 ); // go until no chance of clipping (this is usually less than 8 lops) - while ( ( ( contribs->n0 + widest*2 ) >= row_width ) && ( contribs >= contributors ) ) + while ( ( contribs >= contributors ) && ( ( contribs->n0 + widest*2 ) >= row_width ) ) { // might we clip?? if ( ( contribs->n0 + widest ) > row_width ) @@ -4652,10 +4665,10 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float stbir__simdf8_madd( tot0, tot0, c, d ); } #define stbir__store_output() \ - { stbir__simdf t,c; \ + { stbir__simdf t,d; \ stbir__simdf8_add4halves( t, stbir__if_simdf8_cast_to_simdf4(tot0), tot0 ); \ - stbir__simdf_0123to2301( c, t ); \ - stbir__simdf_add( t, t, c ); \ + stbir__simdf_0123to2301( d, t ); \ + stbir__simdf_add( t, t, d ); \ stbir__simdf_store2( output, t ); \ horizontal_coefficients += coefficient_width; \ ++horizontal_contributors; \ @@ -7389,7 +7402,6 @@ static void stbir__init_and_set_layout( STBIR_RESIZE * resize, stbir_pixel_layou resize->output_cb = 0; resize->user_data = resize; resize->samplers = 0; - resize->needs_rebuild = 1; resize->called_alloc = 0; resize->horizontal_filter = STBIR_FILTER_DEFAULT; resize->horizontal_filter_kernel = 0; resize->horizontal_filter_support = 0; @@ -7403,6 +7415,7 @@ static void stbir__init_and_set_layout( STBIR_RESIZE * resize, stbir_pixel_layou resize->output_data_type = data_type; resize->input_pixel_layout_public = pixel_layout; resize->output_pixel_layout_public = pixel_layout; + resize->needs_rebuild = 1; } STBIRDEF void stbir_resize_init( STBIR_RESIZE * resize, @@ -7428,17 +7441,27 @@ STBIRDEF void stbir_set_datatypes( STBIR_RESIZE * resize, stbir_datatype input_t { resize->input_data_type = input_type; resize->output_data_type = output_type; + if ( ( resize->samplers ) && ( !resize->needs_rebuild ) ) + stbir__update_info_from_resize( resize->samplers, resize ); } STBIRDEF void stbir_set_pixel_callbacks( STBIR_RESIZE * resize, stbir_input_callback * input_cb, stbir_output_callback * output_cb ) // no callbacks by default { resize->input_cb = input_cb; resize->output_cb = output_cb; + + if ( ( resize->samplers ) && ( !resize->needs_rebuild ) ) + { + resize->samplers->in_pixels_cb = input_cb; + resize->samplers->out_pixels_cb = output_cb; + } } STBIRDEF void stbir_set_user_data( STBIR_RESIZE * resize, void * user_data ) // pass back STBIR_RESIZE* by default { resize->user_data = user_data; + if ( ( resize->samplers ) && ( !resize->needs_rebuild ) ) + resize->samplers->user_data = user_data; } STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_pixels, int input_stride_in_bytes, void * output_pixels, int output_stride_in_bytes ) @@ -7447,6 +7470,8 @@ STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_p resize->input_stride_in_bytes = input_stride_in_bytes; resize->output_pixels = output_pixels; resize->output_stride_in_bytes = output_stride_in_bytes; + if ( ( resize->samplers ) && ( !resize->needs_rebuild ) ) + stbir__update_info_from_resize( resize->samplers, resize ); } @@ -7593,9 +7618,9 @@ static int stbir__perform_build( STBIR_RESIZE * resize, int splits ) stbir__get_conservative_extents( &horizontal, &conservative, resize->user_data ); stbir__set_sampler(&vertical, resize->vertical_filter, resize->horizontal_filter_kernel, resize->vertical_filter_support, resize->vertical_edge, &vertical.scale_info, 0, resize->user_data ); - if ( ( vertical.scale_info.output_sub_size / splits ) < 4 ) // each split should be a minimum of 4 scanlines (handwavey choice) + if ( ( vertical.scale_info.output_sub_size / splits ) < STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS ) // each split should be a minimum of 4 scanlines (handwavey choice) { - splits = vertical.scale_info.output_sub_size / 4; + splits = vertical.scale_info.output_sub_size / STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS; if ( splits == 0 ) splits = 1; } @@ -7612,6 +7637,10 @@ static int stbir__perform_build( STBIR_RESIZE * resize, int splits ) #ifdef STBIR_PROFILE STBIR_MEMCPY( &out_info->profile, &profile_infod.profile, sizeof( out_info->profile ) ); #endif + + // update anything that can be changed without recalcing samplers + stbir__update_info_from_resize( out_info, resize ); + return splits; } @@ -7680,10 +7709,6 @@ STBIRDEF int stbir_resize_extended( STBIR_RESIZE * resize ) STBIR_PROFILE_BUILD_CLEAR( resize->samplers ); } - - // update anything that can be changed without recalcing samplers - stbir__update_info_from_resize( resize->samplers, resize ); - // do resize result = stbir__perform_resize( resize->samplers, 0, resize->splits ); @@ -7712,9 +7737,6 @@ STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start if ( ( split_start >= resize->splits ) || ( split_start < 0 ) || ( ( split_start + split_count ) > resize->splits ) || ( split_count <= 0 ) ) return 0; - // update anything that can be changed without recalcing samplers - stbir__update_info_from_resize( resize->samplers, resize ); - // do resize return stbir__perform_resize( resize->samplers, split_start, split_count ); }