From 254e1c99759a018bc0932a088b00d4c2740f1399 Mon Sep 17 00:00:00 2001
From: Ignacio Castano <castano@gmail.com>
Date: Thu, 19 Mar 2020 23:23:36 -0700
Subject: [PATCH] Perfect quantization of DXT endpoints

A small change to quantize floating point endpoints to RGB565 as expanded in the DXT spec. For more info see: https://gist.github.com/castano/c92c7626f288f9e99e158520b14a61cf
---
 stb_dxt.h | 47 +++++++++++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/stb_dxt.h b/stb_dxt.h
index 4a5b15c..f84e710 100644
--- a/stb_dxt.h
+++ b/stb_dxt.h
@@ -415,12 +415,29 @@ static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax1
    *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
 }
 
-static int stb__sclamp(float y, int p0, int p1)
-{
-   int x = (int) y;
-   if (x < p0) return p0;
-   if (x > p1) return p1;
-   return x;
+static const float midpoints5[32] = {
+    0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
+    0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f
+};
+
+static const float midpoints6[64] = {
+    0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f, 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f,
+    0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f, 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f,
+    0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f, 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f,
+    0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f, 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f
+};
+
+static unsigned short stb__Quantize5(float x) {
+    x = x < 0 ? 0 : x > 1 ? 1 : x;  // saturate
+    unsigned short q = (unsigned short)(x * 31);
+    q += (x > midpoints5[q]);
+    return q;
+}
+static unsigned short stb__Quantize6(float x) {
+    x = x < 0 ? 0 : x > 1 ? 1 : x;  // saturate
+    unsigned short q = (unsigned short)(x * 63);
+    q += (x > midpoints6[q]);
+    return q;
 }
 
 // The refinement function. (Clever code, part 2)
@@ -433,7 +450,7 @@ static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsign
    // ^some magic to save a lot of multiplies in the accumulating loop...
    // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
 
-   float frb,fg;
+   float f;
    unsigned short oldMin, oldMax, min16, max16;
    int i, akku = 0, xx,xy,yy;
    int At1_r,At1_g,At1_b;
@@ -486,17 +503,15 @@ static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsign
       yy = (akku >> 8) & 0xff;
       xy = (akku >> 0) & 0xff;
 
-      frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
-      fg = frb * 63.0f / 31.0f;
+      f = 3.0f / 255.0f / (xx*yy - xy*xy);
 
-      // solve.
-      max16 =  (unsigned short)(stb__sclamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11);
-      max16 |= (unsigned short)(stb__sclamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5);
-      max16 |= (unsigned short)(stb__sclamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0);
+      max16 =  stb__Quantize5((At1_r*yy - At2_r * xy) * f) << 11;
+      max16 |= stb__Quantize6((At1_g*yy - At2_g * xy) * f) << 5;
+      max16 |= stb__Quantize5((At1_b*yy - At2_b * xy) * f) << 0;
 
-      min16 =  (unsigned short)(stb__sclamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11);
-      min16 |= (unsigned short)(stb__sclamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5);
-      min16 |= (unsigned short)(stb__sclamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0);
+      min16 =  stb__Quantize5((At2_r*xx - At1_r * xy) * f) << 11;
+      min16 |= stb__Quantize6((At2_g*xx - At1_g * xy) * f) << 5;
+      min16 |= stb__Quantize5((At2_b*xx - At1_b * xy) * f) << 0;
    }
 
    *pmin16 = min16;