diff --git a/stb_image_resize.h b/stb_image_resize.h
index 215a0ab..b8fbd16 100644
--- a/stb_image_resize.h
+++ b/stb_image_resize.h
@@ -795,12 +795,13 @@ static void stbir__calculate_coefficients_downsample(stbir__info* stbir_info, st
 
 static void stbir__normalize_downsample_coefficients(stbir__info* stbir_info)
 {
+	int num_contributors = stbir__get_horizontal_contributors(stbir_info);
 	int i;
 	for (i = 0; i < stbir_info->output_w; i++)
 	{
 		float total = 0;
 		int j;
-		for (j = 0; j < stbir__get_horizontal_contributors(stbir_info); j++)
+		for (j = 0; j < num_contributors; j++)
 		{
 			if (i >= stbir_info->horizontal_contributors[j].n0 && i <= stbir_info->horizontal_contributors[j].n1)
 			{
@@ -816,7 +817,7 @@ static void stbir__normalize_downsample_coefficients(stbir__info* stbir_info)
 
 		float scale = 1 / total;
 
-		for (j = 0; j < stbir__get_horizontal_contributors(stbir_info); j++)
+		for (j = 0; j < num_contributors; j++)
 		{
 			if (i >= stbir_info->horizontal_contributors[j].n0 && i <= stbir_info->horizontal_contributors[j].n1)
 				*stbir__get_coefficient(stbir_info, j, i - stbir_info->horizontal_contributors[j].n0) *= scale;
@@ -824,6 +825,15 @@ static void stbir__normalize_downsample_coefficients(stbir__info* stbir_info)
 				break;
 		}
 	}
+
+	// Using min to avoid writing into invalid pixels.
+	for (i = 0; i < num_contributors; i++)
+	{
+		stbir__contributors* contributors = &stbir_info->horizontal_contributors[i];
+		STBIR__DEBUG_ASSERT(contributors->n1 >= contributors->n0);
+
+		contributors->n1 = stbir__min(contributors->n1, stbir_info->output_w - 1);
+	}
 }
 
 // Each scan line uses the same kernel values so we should calculate the kernel
@@ -1014,6 +1024,10 @@ static void stbir__decode_scanline(stbir__info* stbir_info, int n)
 		{
 			int decode_pixel_index = x * channels;
 			float alpha = decode_buffer[decode_pixel_index + alpha_channel];
+
+			if (alpha == 0)
+				alpha = decode_buffer[decode_pixel_index + alpha_channel] = (float)1 / 17179869184; // 1/2^34 should be small enough that it won't affect anything.
+
 			for (c = 0; c < channels; c++)
 			{
 				if (c == alpha_channel)
@@ -1129,12 +1143,10 @@ static void stbir__resample_horizontal_downsample(stbir__info* stbir_info, int n
 
 		int in_x = x - filter_pixel_margin;
 		int in_pixel_index = in_x * channels;
-		int max_n = stbir__min(n1, output_w-1);
+		int max_n = n1;
 		int coefficient_group = x*kernel_pixel_width;
 
-		STBIR__DEBUG_ASSERT(n1 >= n0);
-
-		// Using min and max to avoid writing into invalid pixels.
+		// Using max to avoid writing into invalid pixels.
 		for (k = stbir__max(n0, 0); k <= max_n; k++)
 		{
 			int coefficient_index = (k - n0) + coefficient_group;
@@ -1199,8 +1211,8 @@ static void stbir__encode_scanline(stbir__info* stbir_info, int num_pixels, void
 			int output_pixel_index = x*channels;
 			int encode_pixel_index = x*channels;
 			float alpha = encode_buffer[encode_pixel_index + alpha_channel];
+			STBIR__DEBUG_ASSERT(alpha > 0);
 			float reciprocal_alpha = alpha ? 1.0f / alpha : 0;
-			// @TODO: if final alpha=0, we actually want to have ignored alpha... set alpha to sRGB_to_linear(1/255)/(2^24) so floats will discard it?
 			for (n = 0; n < channels; n++)
 				if (n != alpha_channel)
 					encode_buffer[encode_pixel_index + n] *= reciprocal_alpha;
diff --git a/tests/resample_test.cpp b/tests/resample_test.cpp
index 0300f6f..1b23397 100644
--- a/tests/resample_test.cpp
+++ b/tests/resample_test.cpp
@@ -384,18 +384,21 @@ void test_subpixel(const char* file, float width_percent, float height_percent,
 	free(output_data);
 }
 
-unsigned char* pixel(unsigned char* buffer, int x, int y, int c, int w, int n)
+unsigned int* pixel(unsigned int* buffer, int x, int y, int c, int w, int n)
 {
 	return &buffer[y*w*n + x*n + c];
 }
 
 void test_premul()
 {
-	unsigned char input[2 * 2 * 4];
-	unsigned char output[1 * 1 * 4];
+	unsigned int input[2 * 2 * 4];
+	unsigned int output[1 * 1 * 4];
+	unsigned int output2[2 * 2 * 4];
 
 	memset(input, 0, sizeof(input));
 
+	// First a test to make sure premul is working properly.
+
 	// Top left - solid red
 	*pixel(input, 0, 0, 0, 2, 4) = 255;
 	*pixel(input, 0, 0, 3, 2, 4) = 255;
@@ -412,18 +415,50 @@ void test_premul()
 	*pixel(input, 1, 1, 1, 2, 4) = 255;
 	*pixel(input, 1, 1, 3, 2, 4) = 25;
 
-	stbir_resize_uint8_generic(input, 2, 2, 0, output, 1, 1, 0, 4, 3, 0, STBIR_EDGE_CLAMP, STBIR_FILTER_BOX, STBIR_COLORSPACE_LINEAR, &g_context);
+	stbir_resize(input, 2, 2, 0, output, 1, 1, 0, STBIR_TYPE_UINT32, 4, 3, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_BOX, STBIR_FILTER_BOX, STBIR_COLORSPACE_LINEAR, &g_context);
 
-	float r = 1.0f;
-	float g = 1.0f;
-	float ra = 1.0;
-	float ga = (float)25 / 255;
+	float r = (float)255 / 4294967296;
+	float g = (float)255 / 4294967296;
+	float ra = (float)255 / 4294967296;
+	float ga = (float)25 / 4294967296;
 	float a = (ra + ga) / 2;
 
-	STBIR_ASSERT(output[0] == (int)(r * ra / 2 / a * 255 + 0.5f)); // 232
-	STBIR_ASSERT(output[1] == (int)(g * ga / 2 / a * 255 + 0.5f)); // 23
+	STBIR_ASSERT(output[0] == (int)(r * ra / 2 / a * 4294967296 + 0.5f)); // 232
+	STBIR_ASSERT(output[1] == (int)(g * ga / 2 / a * 4294967296 + 0.5f)); // 23
 	STBIR_ASSERT(output[2] == 0);
-	STBIR_ASSERT(output[3] == (int)(a * 255 + 0.5f)); // 140
+	STBIR_ASSERT(output[3] == (int)(a * 4294967296 + 0.5f)); // 140
+
+	// Now a test to make sure it doesn't clobber existing values.
+
+	// Top right - completely transparent green
+	*pixel(input, 1, 0, 1, 2, 4) = 255;
+	*pixel(input, 1, 0, 3, 2, 4) = 0;
+
+	// Bottom right - completely transparent green
+	*pixel(input, 1, 1, 1, 2, 4) = 255;
+	*pixel(input, 1, 1, 3, 2, 4) = 0;
+
+	stbir_resize(input, 2, 2, 0, output2, 2, 2, 0, STBIR_TYPE_UINT32, 4, 3, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_BOX, STBIR_FILTER_BOX, STBIR_COLORSPACE_LINEAR, &g_context);
+
+	STBIR_ASSERT(*pixel(output2, 0, 0, 0, 2, 4) == 255);
+	STBIR_ASSERT(*pixel(output2, 0, 0, 1, 2, 4) == 0);
+	STBIR_ASSERT(*pixel(output2, 0, 0, 2, 2, 4) == 0);
+	STBIR_ASSERT(*pixel(output2, 0, 0, 3, 2, 4) == 255);
+
+	STBIR_ASSERT(*pixel(output2, 0, 1, 0, 2, 4) == 255);
+	STBIR_ASSERT(*pixel(output2, 0, 1, 1, 2, 4) == 0);
+	STBIR_ASSERT(*pixel(output2, 0, 1, 2, 2, 4) == 0);
+	STBIR_ASSERT(*pixel(output2, 0, 1, 3, 2, 4) == 255);
+
+	STBIR_ASSERT(*pixel(output2, 1, 0, 0, 2, 4) == 0);
+	STBIR_ASSERT(*pixel(output2, 1, 0, 1, 2, 4) == 255);
+	STBIR_ASSERT(*pixel(output2, 1, 0, 2, 2, 4) == 0);
+	STBIR_ASSERT(*pixel(output2, 1, 0, 3, 2, 4) == 0);
+
+	STBIR_ASSERT(*pixel(output2, 1, 1, 0, 2, 4) == 0);
+	STBIR_ASSERT(*pixel(output2, 1, 1, 1, 2, 4) == 255);
+	STBIR_ASSERT(*pixel(output2, 1, 1, 2, 2, 4) == 0);
+	STBIR_ASSERT(*pixel(output2, 1, 1, 3, 2, 4) == 0);
 }
 
 // test that splitting a pow-2 image into tiles produces identical results