From 70136cd5f1f0f40fc5df529689797f67c3a79c07 Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Sun, 4 Jul 2021 20:54:18 -0700 Subject: [PATCH] stb_vorbis: Change imdct_step3_inner_s_loop_ld654 Released Clang 12 generates bad code for the original loop in here. While this is a compiler bug plain and simple, we still have to deal with it. This is related to the SLP vectorizer, and in particular the two reverse subtracts in the butterflies for the second half to avoid unary negates. Use the more regular dataflow that has the unary negates in it (we can at least fold one of them into a constant, namely for A2) and introduce a few temporaries that also make alias analysis (and possible block-level vectorization) a whole let easier while I'm at it. This fixes the codegen issues on Clang 12, which now produces a working decoder, and I expect the single unary negate that we actually gain per iteration of this loop is not a significant perf concern. (There are bigger fish to fry here regardless.) Fixes issue #1152. --- stb_vorbis.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/stb_vorbis.c b/stb_vorbis.c index 51e8eec..534e689 100644 --- a/stb_vorbis.c +++ b/stb_vorbis.c @@ -2592,34 +2592,33 @@ static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, while (z > base) { float k00,k11; + float l00,l11; - k00 = z[-0] - z[-8]; - k11 = z[-1] - z[-9]; - z[-0] = z[-0] + z[-8]; - z[-1] = z[-1] + z[-9]; - z[-8] = k00; - z[-9] = k11 ; + k00 = z[-0] - z[ -8]; + k11 = z[-1] - z[ -9]; + l00 = z[-2] - z[-10]; + l11 = z[-3] - z[-11]; + z[ -0] = z[-0] + z[ -8]; + z[ -1] = z[-1] + z[ -9]; + z[ -2] = z[-2] + z[-10]; + z[ -3] = z[-3] + z[-11]; + z[ -8] = k00; + z[ -9] = k11; + z[-10] = (l00+l11) * A2; + z[-11] = (l11-l00) * A2; - k00 = z[ -2] - z[-10]; - k11 = z[ -3] - z[-11]; - z[ -2] = z[ -2] + z[-10]; - z[ -3] = z[ -3] + z[-11]; - z[-10] = (k00+k11) * A2; - z[-11] = (k11-k00) * A2; - - k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation + k00 = z[ -4] - z[-12]; k11 = z[ -5] - z[-13]; + l00 = z[ -6] - z[-14]; + l11 = z[ -7] - z[-15]; z[ -4] = z[ -4] + z[-12]; z[ -5] = z[ -5] + z[-13]; - z[-12] = k11; - z[-13] = k00; - - k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation - k11 = z[ -7] - z[-15]; z[ -6] = z[ -6] + z[-14]; z[ -7] = z[ -7] + z[-15]; - z[-14] = (k00+k11) * A2; - z[-15] = (k00-k11) * A2; + z[-12] = k11; + z[-13] = -k00; + z[-14] = (l11-l00) * A2; + z[-15] = (l00+l11) * -A2; iter_54(z); iter_54(z-8);