// Tencent is pleased to support the open source community by making ncnn available. // // author:BUG1989 (https://github.com/BUG1989/) Long-term support. // author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration. // // Copyright (C) 2019 BUG1989. All rights reserved. // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. // // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except // in compliance with the License. You may obtain a copy of the License at // // https://opensource.org/licenses/BSD-3-Clause // // Unless required by applicable law or agreed to in writing, software distributed // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. #ifdef _MSC_VER #define _CRT_SECURE_NO_DEPRECATE #endif #include #include #include #include #include #include #include #if defined(USE_NCNN_SIMPLEOCV) #include "simpleocv.h" #elif defined(USE_LOCAL_IMREADWRITE) #include "imreadwrite.h" #else #include #include #endif #include #include // ncnn public header #include "benchmark.h" #include "cpu.h" #include "net.h" // ncnn private header #include "layer/convolution.h" #include "layer/convolutiondepthwise.h" #include "layer/innerproduct.h" class QuantBlobStat { public: QuantBlobStat() { threshold = 0.f; absmax = 0.f; total = 0; } public: float threshold; float absmax; // ACIQ int total; // KL std::vector histogram; std::vector histogram_normed; }; class QuantNet : public ncnn::Net { public: QuantNet(); std::vector& blobs; std::vector& layers; public: std::vector > listspaths; std::vector > means; std::vector > norms; std::vector > shapes; std::vector type_to_pixels; int quantize_num_threads; public: int init(); void print_quant_info() const; int save_table(const char* tablepath); int quantize_KL(); int quantize_ACIQ(); int quantize_EQ(); public: std::vector input_blobs; std::vector conv_layers; std::vector conv_bottom_blobs; std::vector conv_top_blobs; // result std::vector quant_blob_stats; std::vector weight_scales; std::vector bottom_blob_scales; }; QuantNet::QuantNet() : blobs(mutable_blobs()), layers(mutable_layers()) { quantize_num_threads = ncnn::get_cpu_count(); } int QuantNet::init() { // find all input layers for (int i = 0; i < (int)layers.size(); i++) { const ncnn::Layer* layer = layers[i]; if (layer->type == "Input") { input_blobs.push_back(layer->tops[0]); } } // find all conv layers for (int i = 0; i < (int)layers.size(); i++) { const ncnn::Layer* layer = layers[i]; if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct") { conv_layers.push_back(i); conv_bottom_blobs.push_back(layer->bottoms[0]); conv_top_blobs.push_back(layer->tops[0]); } } const int conv_layer_count = (int)conv_layers.size(); const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); quant_blob_stats.resize(conv_bottom_blob_count); weight_scales.resize(conv_layer_count); bottom_blob_scales.resize(conv_bottom_blob_count); return 0; } int QuantNet::save_table(const char* tablepath) { FILE* fp = fopen(tablepath, "wb"); if (!fp) { fprintf(stderr, "fopen %s failed\n", tablepath); return -1; } const int conv_layer_count = (int)conv_layers.size(); const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); for (int i = 0; i < conv_layer_count; i++) { const ncnn::Mat& weight_scale = weight_scales[i]; fprintf(fp, "%s_param_0 ", layers[conv_layers[i]]->name.c_str()); for (int j = 0; j < weight_scale.w; j++) { fprintf(fp, "%f ", weight_scale[j]); } fprintf(fp, "\n"); } for (int i = 0; i < conv_bottom_blob_count; i++) { const ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i]; fprintf(fp, "%s ", layers[conv_layers[i]]->name.c_str()); for (int j = 0; j < bottom_blob_scale.w; j++) { fprintf(fp, "%f ", bottom_blob_scale[j]); } fprintf(fp, "\n"); } fclose(fp); fprintf(stderr, "ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\\(^0^)/...233...\n"); return 0; } void QuantNet::print_quant_info() const { for (int i = 0; i < (int)conv_bottom_blobs.size(); i++) { const QuantBlobStat& stat = quant_blob_stats[i]; float scale = 127 / stat.threshold; fprintf(stderr, "%-40s : max = %-15f threshold = %-15f scale = %-15f\n", layers[conv_layers[i]]->name.c_str(), stat.absmax, stat.threshold, scale); } } /** * Read and resize image * shape is input as [w,h,...] * if w and h both are given, image will be resized to exactly size. * if w and h both are zero or negative, image will not be resized. * if only h is zero or negative, image's width will scaled resize to w, keeping aspect ratio. * if only w is zero or negative, image's height will scaled resize to h * @return ncnn::Mat */ inline ncnn::Mat read_and_resize_image(const std::vector& shape, const std::string& imagepath, int pixel_convert_type) { int target_w = shape[0]; int target_h = shape[1]; cv::Mat bgr = cv::imread(imagepath, 1); if (target_h <= 0 && target_w <= 0) { return ncnn::Mat::from_pixels(bgr.data, pixel_convert_type, bgr.cols, bgr.rows); } if (target_h <= 0 || target_w <= 0) { float scale = 1.0; if (target_h <= 0) { scale = 1.0 * bgr.cols / target_w; target_h = int(1.0 * bgr.rows / scale); } if (target_w <= 0) { scale = 1.0 * bgr.rows / target_h; target_w = int(1.0 * bgr.cols / scale); } } return ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h); } static float compute_kl_divergence(const std::vector& a, const std::vector& b) { const size_t length = a.size(); float result = 0; for (size_t i = 0; i < length; i++) { result += a[i] * log(a[i] / b[i]); } return result; } int QuantNet::quantize_KL() { const int input_blob_count = (int)input_blobs.size(); const int conv_layer_count = (int)conv_layers.size(); const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); const int image_count = (int)listspaths[0].size(); const int num_histogram_bins = 2048; std::vector blob_allocators(quantize_num_threads); std::vector workspace_allocators(quantize_num_threads); // initialize conv weight scales #pragma omp parallel for num_threads(quantize_num_threads) for (int i = 0; i < conv_layer_count; i++) { const ncnn::Layer* layer = layers[conv_layers[i]]; if (layer->type == "Convolution") { const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer; const int num_output = convolution->num_output; const int kernel_w = convolution->kernel_w; const int kernel_h = convolution->kernel_h; const int dilation_w = convolution->dilation_w; const int dilation_h = convolution->dilation_h; const int stride_w = convolution->stride_w; const int stride_h = convolution->stride_h; const int weight_data_size_output = convolution->weight_data_size / num_output; // int8 winograd F43 needs weight data to use 6bit quantization // TODO proper condition for winograd 3x3 int8 bool quant_6bit = false; if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) quant_6bit = true; weight_scales[i].create(num_output); for (int n = 0; n < num_output; n++) { const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output); float absmax = 0.f; for (int k = 0; k < weight_data_size_output; k++) { absmax = std::max(absmax, (float)fabs(weight_data_n[k])); } if (quant_6bit) { weight_scales[i][n] = 31 / absmax; } else { weight_scales[i][n] = 127 / absmax; } } } if (layer->type == "ConvolutionDepthWise") { const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer; const int group = convolutiondepthwise->group; const int weight_data_size_output = convolutiondepthwise->weight_data_size / group; std::vector scales; weight_scales[i].create(group); for (int n = 0; n < group; n++) { const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output); float absmax = 0.f; for (int k = 0; k < weight_data_size_output; k++) { absmax = std::max(absmax, (float)fabs(weight_data_n[k])); } weight_scales[i][n] = 127 / absmax; } } if (layer->type == "InnerProduct") { const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer; const int num_output = innerproduct->num_output; const int weight_data_size_output = innerproduct->weight_data_size / num_output; weight_scales[i].create(num_output); for (int n = 0; n < num_output; n++) { const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output); float absmax = 0.f; for (int k = 0; k < weight_data_size_output; k++) { absmax = std::max(absmax, (float)fabs(weight_data_n[k])); } weight_scales[i][n] = 127 / absmax; } } } // count the absmax #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int i = 0; i < image_count; i++) { if (i % 100 == 0) { fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); } ncnn::Extractor ex = create_extractor(); const int thread_num = ncnn::get_omp_thread_num(); ex.set_blob_allocator(&blob_allocators[thread_num]); ex.set_workspace_allocator(&workspace_allocators[thread_num]); for (int j = 0; j < input_blob_count; j++) { const int type_to_pixel = type_to_pixels[j]; const std::vector& mean_vals = means[j]; const std::vector& norm_vals = norms[j]; int pixel_convert_type = ncnn::Mat::PIXEL_BGR; if (type_to_pixel != pixel_convert_type) { pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); } ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type); in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); ex.input(input_blobs[j], in); } for (int j = 0; j < conv_bottom_blob_count; j++) { ncnn::Mat out; ex.extract(conv_bottom_blobs[j], out); // count absmax { float absmax = 0.f; const int outc = out.c; const int outsize = out.w * out.h; for (int p = 0; p < outc; p++) { const float* ptr = out.channel(p); for (int k = 0; k < outsize; k++) { absmax = std::max(absmax, (float)fabs(ptr[k])); } } #pragma omp critical { QuantBlobStat& stat = quant_blob_stats[j]; stat.absmax = std::max(stat.absmax, absmax); } } } } // initialize histogram #pragma omp parallel for num_threads(quantize_num_threads) for (int i = 0; i < conv_bottom_blob_count; i++) { QuantBlobStat& stat = quant_blob_stats[i]; stat.histogram.resize(num_histogram_bins, 0); stat.histogram_normed.resize(num_histogram_bins, 0); } // build histogram #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int i = 0; i < image_count; i++) { if (i % 100 == 0) { fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); } ncnn::Extractor ex = create_extractor(); const int thread_num = ncnn::get_omp_thread_num(); ex.set_blob_allocator(&blob_allocators[thread_num]); ex.set_workspace_allocator(&workspace_allocators[thread_num]); for (int j = 0; j < input_blob_count; j++) { const int type_to_pixel = type_to_pixels[j]; const std::vector& mean_vals = means[j]; const std::vector& norm_vals = norms[j]; int pixel_convert_type = ncnn::Mat::PIXEL_BGR; if (type_to_pixel != pixel_convert_type) { pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); } ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type); in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); ex.input(input_blobs[j], in); } for (int j = 0; j < conv_bottom_blob_count; j++) { ncnn::Mat out; ex.extract(conv_bottom_blobs[j], out); // count histogram bin { const float absmax = quant_blob_stats[j].absmax; std::vector histogram(num_histogram_bins, 0); const int outc = out.c; const int outsize = out.w * out.h; for (int p = 0; p < outc; p++) { const float* ptr = out.channel(p); for (int k = 0; k < outsize; k++) { if (ptr[k] == 0.f) continue; const int index = std::min((int)(fabs(ptr[k]) / absmax * num_histogram_bins), (num_histogram_bins - 1)); histogram[index] += 1; } } #pragma omp critical { QuantBlobStat& stat = quant_blob_stats[j]; for (int k = 0; k < num_histogram_bins; k++) { stat.histogram[k] += histogram[k]; } } } } } // using kld to find the best threshold value #pragma omp parallel for num_threads(quantize_num_threads) for (int i = 0; i < conv_bottom_blob_count; i++) { QuantBlobStat& stat = quant_blob_stats[i]; // normalize histogram bin { uint64_t sum = 0; for (int j = 0; j < num_histogram_bins; j++) { sum += stat.histogram[j]; } for (int j = 0; j < num_histogram_bins; j++) { stat.histogram_normed[j] = (float)(stat.histogram[j] / (double)sum); } } const int target_bin = 128; int target_threshold = target_bin; float min_kl_divergence = FLT_MAX; for (int threshold = target_bin; threshold < num_histogram_bins; threshold++) { const float kl_eps = 0.0001f; std::vector clip_distribution(threshold, kl_eps); { for (int j = 0; j < threshold; j++) { clip_distribution[j] += stat.histogram_normed[j]; } for (int j = threshold; j < num_histogram_bins; j++) { clip_distribution[threshold - 1] += stat.histogram_normed[j]; } } const float num_per_bin = (float)threshold / target_bin; std::vector quantize_distribution(target_bin, 0.f); { { const float end = num_per_bin; const int right_lower = (int)floor(end); const float right_scale = end - right_lower; if (right_scale > 0) { quantize_distribution[0] += right_scale * stat.histogram_normed[right_lower]; } for (int k = 0; k < right_lower; k++) { quantize_distribution[0] += stat.histogram_normed[k]; } quantize_distribution[0] /= right_lower + right_scale; } for (int j = 1; j < target_bin - 1; j++) { const float start = j * num_per_bin; const float end = (j + 1) * num_per_bin; const int left_upper = (int)ceil(start); const float left_scale = left_upper - start; const int right_lower = (int)floor(end); const float right_scale = end - right_lower; if (left_scale > 0) { quantize_distribution[j] += left_scale * stat.histogram_normed[left_upper - 1]; } if (right_scale > 0) { quantize_distribution[j] += right_scale * stat.histogram_normed[right_lower]; } for (int k = left_upper; k < right_lower; k++) { quantize_distribution[j] += stat.histogram_normed[k]; } quantize_distribution[j] /= right_lower - left_upper + left_scale + right_scale; } { const float start = threshold - num_per_bin; const int left_upper = (int)ceil(start); const float left_scale = left_upper - start; if (left_scale > 0) { quantize_distribution[target_bin - 1] += left_scale * stat.histogram_normed[left_upper - 1]; } for (int k = left_upper; k < threshold; k++) { quantize_distribution[target_bin - 1] += stat.histogram_normed[k]; } quantize_distribution[target_bin - 1] /= threshold - left_upper + left_scale; } } std::vector expand_distribution(threshold, kl_eps); { { const float end = num_per_bin; const int right_lower = (int)floor(end); const float right_scale = end - right_lower; if (right_scale > 0) { expand_distribution[right_lower] += right_scale * quantize_distribution[0]; } for (int k = 0; k < right_lower; k++) { expand_distribution[k] += quantize_distribution[0]; } } for (int j = 1; j < target_bin - 1; j++) { const float start = j * num_per_bin; const float end = (j + 1) * num_per_bin; const int left_upper = (int)ceil(start); const float left_scale = left_upper - start; const int right_lower = (int)floor(end); const float right_scale = end - right_lower; if (left_scale > 0) { expand_distribution[left_upper - 1] += left_scale * quantize_distribution[j]; } if (right_scale > 0) { expand_distribution[right_lower] += right_scale * quantize_distribution[j]; } for (int k = left_upper; k < right_lower; k++) { expand_distribution[k] += quantize_distribution[j]; } } { const float start = threshold - num_per_bin; const int left_upper = (int)ceil(start); const float left_scale = left_upper - start; if (left_scale > 0) { expand_distribution[left_upper - 1] += left_scale * quantize_distribution[target_bin - 1]; } for (int k = left_upper; k < threshold; k++) { expand_distribution[k] += quantize_distribution[target_bin - 1]; } } } // kl const float kl_divergence = compute_kl_divergence(clip_distribution, expand_distribution); // the best num of bin if (kl_divergence < min_kl_divergence) { min_kl_divergence = kl_divergence; target_threshold = threshold; } } stat.threshold = (target_threshold + 0.5f) * stat.absmax / num_histogram_bins; float scale = 127 / stat.threshold; bottom_blob_scales[i].create(1); bottom_blob_scales[i][0] = scale; } return 0; } static float compute_aciq_gaussian_clip(float absmax, int N, int num_bits = 8) { const float alpha_gaussian[8] = {0, 1.71063519, 2.15159277, 2.55913646, 2.93620062, 3.28691474, 3.6151146, 3.92403714}; const double gaussian_const = (0.5 * 0.35) * (1 + sqrt(3.14159265358979323846 * log(4))); double std = (absmax * 2 * gaussian_const) / sqrt(2 * log(N)); return (float)(alpha_gaussian[num_bits - 1] * std); } int QuantNet::quantize_ACIQ() { const int input_blob_count = (int)input_blobs.size(); const int conv_layer_count = (int)conv_layers.size(); const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); const int image_count = (int)listspaths[0].size(); std::vector blob_allocators(quantize_num_threads); std::vector workspace_allocators(quantize_num_threads); // initialize conv weight scales #pragma omp parallel for num_threads(quantize_num_threads) for (int i = 0; i < conv_layer_count; i++) { const ncnn::Layer* layer = layers[conv_layers[i]]; if (layer->type == "Convolution") { const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer; const int num_output = convolution->num_output; const int kernel_w = convolution->kernel_w; const int kernel_h = convolution->kernel_h; const int dilation_w = convolution->dilation_w; const int dilation_h = convolution->dilation_h; const int stride_w = convolution->stride_w; const int stride_h = convolution->stride_h; const int weight_data_size_output = convolution->weight_data_size / num_output; // int8 winograd F43 needs weight data to use 6bit quantization // TODO proper condition for winograd 3x3 int8 bool quant_6bit = false; if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1) quant_6bit = true; weight_scales[i].create(num_output); for (int n = 0; n < num_output; n++) { const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output); float absmax = 0.f; for (int k = 0; k < weight_data_size_output; k++) { absmax = std::max(absmax, (float)fabs(weight_data_n[k])); } if (quant_6bit) { const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output, 6); weight_scales[i][n] = 31 / threshold; } else { const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output); weight_scales[i][n] = 127 / threshold; } } } if (layer->type == "ConvolutionDepthWise") { const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer; const int group = convolutiondepthwise->group; const int weight_data_size_output = convolutiondepthwise->weight_data_size / group; std::vector scales; weight_scales[i].create(group); for (int n = 0; n < group; n++) { const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output); float absmax = 0.f; for (int k = 0; k < weight_data_size_output; k++) { absmax = std::max(absmax, (float)fabs(weight_data_n[k])); } const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output); weight_scales[i][n] = 127 / threshold; } } if (layer->type == "InnerProduct") { const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer; const int num_output = innerproduct->num_output; const int weight_data_size_output = innerproduct->weight_data_size / num_output; weight_scales[i].create(num_output); for (int n = 0; n < num_output; n++) { const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output); float absmax = 0.f; for (int k = 0; k < weight_data_size_output; k++) { absmax = std::max(absmax, (float)fabs(weight_data_n[k])); } const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output); weight_scales[i][n] = 127 / threshold; } } } // count the absmax #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int i = 0; i < image_count; i++) { if (i % 100 == 0) { fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count); } ncnn::Extractor ex = create_extractor(); const int thread_num = ncnn::get_omp_thread_num(); ex.set_blob_allocator(&blob_allocators[thread_num]); ex.set_workspace_allocator(&workspace_allocators[thread_num]); for (int j = 0; j < input_blob_count; j++) { const int type_to_pixel = type_to_pixels[j]; const std::vector& mean_vals = means[j]; const std::vector& norm_vals = norms[j]; int pixel_convert_type = ncnn::Mat::PIXEL_BGR; if (type_to_pixel != pixel_convert_type) { pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); } ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type); in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); ex.input(input_blobs[j], in); } for (int j = 0; j < conv_bottom_blob_count; j++) { ncnn::Mat out; ex.extract(conv_bottom_blobs[j], out); // count absmax { float absmax = 0.f; const int outc = out.c; const int outsize = out.w * out.h; for (int p = 0; p < outc; p++) { const float* ptr = out.channel(p); for (int k = 0; k < outsize; k++) { absmax = std::max(absmax, (float)fabs(ptr[k])); } } #pragma omp critical { QuantBlobStat& stat = quant_blob_stats[j]; stat.absmax = std::max(stat.absmax, absmax); stat.total = outc * outsize; } } } } // alpha gaussian #pragma omp parallel for num_threads(quantize_num_threads) for (int i = 0; i < conv_bottom_blob_count; i++) { QuantBlobStat& stat = quant_blob_stats[i]; stat.threshold = compute_aciq_gaussian_clip(stat.absmax, stat.total); float scale = 127 / stat.threshold; bottom_blob_scales[i].create(1); bottom_blob_scales[i][0] = scale; } return 0; } static float cosine_similarity(const ncnn::Mat& a, const ncnn::Mat& b) { const int chanenls = a.c; const int size = a.w * a.h; float sa = 0; float sb = 0; float sum = 0; for (int p = 0; p < chanenls; p++) { const float* pa = a.channel(p); const float* pb = b.channel(p); for (int i = 0; i < size; i++) { sa += pa[i] * pa[i]; sb += pb[i] * pb[i]; sum += pa[i] * pb[i]; } } float sim = (float)sum / sqrt(sa) / sqrt(sb); return sim; } static int get_layer_param(const ncnn::Layer* layer, ncnn::ParamDict& pd) { if (layer->type == "Convolution") { ncnn::Convolution* convolution = (ncnn::Convolution*)layer; pd.set(0, convolution->num_output); pd.set(1, convolution->kernel_w); pd.set(11, convolution->kernel_h); pd.set(2, convolution->dilation_w); pd.set(12, convolution->dilation_h); pd.set(3, convolution->stride_w); pd.set(13, convolution->stride_h); pd.set(4, convolution->pad_left); pd.set(15, convolution->pad_right); pd.set(14, convolution->pad_top); pd.set(16, convolution->pad_bottom); pd.set(18, convolution->pad_value); pd.set(5, convolution->bias_term); pd.set(6, convolution->weight_data_size); pd.set(8, convolution->int8_scale_term); pd.set(9, convolution->activation_type); pd.set(10, convolution->activation_params); } else if (layer->type == "ConvolutionDepthWise") { ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer; pd.set(0, convolutiondepthwise->num_output); pd.set(1, convolutiondepthwise->kernel_w); pd.set(11, convolutiondepthwise->kernel_h); pd.set(2, convolutiondepthwise->dilation_w); pd.set(12, convolutiondepthwise->dilation_h); pd.set(3, convolutiondepthwise->stride_w); pd.set(13, convolutiondepthwise->stride_h); pd.set(4, convolutiondepthwise->pad_left); pd.set(15, convolutiondepthwise->pad_right); pd.set(14, convolutiondepthwise->pad_top); pd.set(16, convolutiondepthwise->pad_bottom); pd.set(18, convolutiondepthwise->pad_value); pd.set(5, convolutiondepthwise->bias_term); pd.set(6, convolutiondepthwise->weight_data_size); pd.set(7, convolutiondepthwise->group); pd.set(8, convolutiondepthwise->int8_scale_term); pd.set(9, convolutiondepthwise->activation_type); pd.set(10, convolutiondepthwise->activation_params); } else if (layer->type == "InnerProduct") { ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer; pd.set(0, innerproduct->num_output); pd.set(1, innerproduct->bias_term); pd.set(2, innerproduct->weight_data_size); pd.set(8, innerproduct->int8_scale_term); pd.set(9, innerproduct->activation_type); pd.set(10, innerproduct->activation_params); } else { fprintf(stderr, "unexpected layer type %s in get_layer_param\n", layer->type.c_str()); return -1; } return 0; } static int get_layer_weights(const ncnn::Layer* layer, std::vector& weights) { if (layer->type == "Convolution") { ncnn::Convolution* convolution = (ncnn::Convolution*)layer; weights.push_back(convolution->weight_data); if (convolution->bias_term) weights.push_back(convolution->bias_data); } else if (layer->type == "ConvolutionDepthWise") { ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer; weights.push_back(convolutiondepthwise->weight_data); if (convolutiondepthwise->bias_term) weights.push_back(convolutiondepthwise->bias_data); } else if (layer->type == "InnerProduct") { ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer; weights.push_back(innerproduct->weight_data); if (innerproduct->bias_term) weights.push_back(innerproduct->bias_data); } else { fprintf(stderr, "unexpected layer type %s in get_layer_weights\n", layer->type.c_str()); return -1; } return 0; } int QuantNet::quantize_EQ() { // find the initial scale via KL quantize_KL(); print_quant_info(); const int input_blob_count = (int)input_blobs.size(); const int conv_layer_count = (int)conv_layers.size(); const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); std::vector blob_allocators(quantize_num_threads); std::vector workspace_allocators(quantize_num_threads); // max 50 images for EQ const int image_count = std::min((int)listspaths[0].size(), 50); const float scale_range_lower = 0.5f; const float scale_range_upper = 2.0f; const int search_steps = 100; for (int i = 0; i < conv_layer_count; i++) { ncnn::Mat& weight_scale = weight_scales[i]; ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i]; const ncnn::Layer* layer = layers[conv_layers[i]]; // search weight scale for (int j = 0; j < weight_scale.w; j++) { const float scale = weight_scale[j]; const float scale_lower = scale * scale_range_lower; const float scale_upper = scale * scale_range_upper; const float scale_step = (scale_upper - scale_lower) / search_steps; std::vector avgsims(search_steps, 0.0); #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int ii = 0; ii < image_count; ii++) { if (ii % 100 == 0) { fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count); } ncnn::Extractor ex = create_extractor(); const int thread_num = ncnn::get_omp_thread_num(); ex.set_blob_allocator(&blob_allocators[thread_num]); ex.set_workspace_allocator(&workspace_allocators[thread_num]); for (int jj = 0; jj < input_blob_count; jj++) { const int type_to_pixel = type_to_pixels[jj]; const std::vector& mean_vals = means[jj]; const std::vector& norm_vals = norms[jj]; int pixel_convert_type = ncnn::Mat::PIXEL_BGR; if (type_to_pixel != pixel_convert_type) { pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); } ncnn::Mat in = read_and_resize_image(shapes[jj], listspaths[jj][ii], pixel_convert_type); in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); ex.input(input_blobs[jj], in); } ncnn::Mat in; ex.extract(conv_bottom_blobs[i], in); ncnn::Mat out; ex.extract(conv_top_blobs[i], out); ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex); ncnn::ParamDict pd; get_layer_param(layer, pd); pd.set(8, 1); //int8_scale_term layer_int8->load_param(pd); std::vector sims(search_steps); for (int k = 0; k < search_steps; k++) { ncnn::Mat new_weight_scale = weight_scale.clone(); new_weight_scale[j] = scale_lower + k * scale_step; std::vector weights; get_layer_weights(layer, weights); weights.push_back(new_weight_scale); weights.push_back(bottom_blob_scale); layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data())); ncnn::Option opt_int8; opt_int8.use_packing_layout = false; layer_int8->create_pipeline(opt_int8); ncnn::Mat out_int8; layer_int8->forward(in, out_int8, opt_int8); layer_int8->destroy_pipeline(opt_int8); sims[k] = cosine_similarity(out, out_int8); } delete layer_int8; #pragma omp critical { for (int k = 0; k < search_steps; k++) { avgsims[k] += sims[k]; } } } double max_avgsim = 0.0; float new_scale = scale; // find the scale with min cosine distance for (int k = 0; k < search_steps; k++) { if (max_avgsim < avgsims[k]) { max_avgsim = avgsims[k]; new_scale = scale_lower + k * scale_step; } } fprintf(stderr, "%s w %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale); weight_scale[j] = new_scale; } // search bottom blob scale for (int j = 0; j < bottom_blob_scale.w; j++) { const float scale = bottom_blob_scale[j]; const float scale_lower = scale * scale_range_lower; const float scale_upper = scale * scale_range_upper; const float scale_step = (scale_upper - scale_lower) / search_steps; std::vector avgsims(search_steps, 0.0); #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1) for (int ii = 0; ii < image_count; ii++) { if (ii % 100 == 0) { fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count); } ncnn::Extractor ex = create_extractor(); const int thread_num = ncnn::get_omp_thread_num(); ex.set_blob_allocator(&blob_allocators[thread_num]); ex.set_workspace_allocator(&workspace_allocators[thread_num]); for (int jj = 0; jj < input_blob_count; jj++) { const int type_to_pixel = type_to_pixels[jj]; const std::vector& mean_vals = means[jj]; const std::vector& norm_vals = norms[jj]; int pixel_convert_type = ncnn::Mat::PIXEL_BGR; if (type_to_pixel != pixel_convert_type) { pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT); } ncnn::Mat in = read_and_resize_image(shapes[jj], listspaths[jj][ii], pixel_convert_type); in.substract_mean_normalize(mean_vals.data(), norm_vals.data()); ex.input(input_blobs[jj], in); } ncnn::Mat in; ex.extract(conv_bottom_blobs[i], in); ncnn::Mat out; ex.extract(conv_top_blobs[i], out); ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex); ncnn::ParamDict pd; get_layer_param(layer, pd); pd.set(8, 1); //int8_scale_term layer_int8->load_param(pd); std::vector sims(search_steps); for (int k = 0; k < search_steps; k++) { ncnn::Mat new_bottom_blob_scale = bottom_blob_scale.clone(); new_bottom_blob_scale[j] = scale_lower + k * scale_step; std::vector weights; get_layer_weights(layer, weights); weights.push_back(weight_scale); weights.push_back(new_bottom_blob_scale); layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data())); ncnn::Option opt_int8; opt_int8.use_packing_layout = false; layer_int8->create_pipeline(opt_int8); ncnn::Mat out_int8; layer_int8->forward(in, out_int8, opt_int8); layer_int8->destroy_pipeline(opt_int8); sims[k] = cosine_similarity(out, out_int8); } delete layer_int8; #pragma omp critical { for (int k = 0; k < search_steps; k++) { avgsims[k] += sims[k]; } } } double max_avgsim = 0.0; float new_scale = scale; // find the scale with min cosine distance for (int k = 0; k < search_steps; k++) { if (max_avgsim < avgsims[k]) { max_avgsim = avgsims[k]; new_scale = scale_lower + k * scale_step; } } fprintf(stderr, "%s b %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale); bottom_blob_scale[j] = new_scale; } // update quant info QuantBlobStat& stat = quant_blob_stats[i]; stat.threshold = 127 / bottom_blob_scale[0]; } return 0; } static std::vector > parse_comma_path_list(char* s) { std::vector > aps; char* pch = strtok(s, ","); while (pch != NULL) { FILE* fp = fopen(pch, "rb"); if (!fp) { fprintf(stderr, "fopen %s failed\n", pch); break; } std::vector paths; // one filepath per line char line[1024]; while (!feof(fp)) { char* ss = fgets(line, 1024, fp); if (!ss) break; char filepath[256]; int nscan = sscanf(line, "%255s", filepath); if (nscan != 1) continue; paths.push_back(std::string(filepath)); } fclose(fp); aps.push_back(paths); pch = strtok(NULL, ","); } return aps; } static float vstr_to_float(const char vstr[20]) { double v = 0.0; const char* p = vstr; // sign bool sign = *p != '-'; if (*p == '+' || *p == '-') { p++; } // digits before decimal point or exponent uint64_t v1 = 0; while (isdigit(*p)) { v1 = v1 * 10 + (*p - '0'); p++; } v = (double)v1; // digits after decimal point if (*p == '.') { p++; uint64_t pow10 = 1; uint64_t v2 = 0; while (isdigit(*p)) { v2 = v2 * 10 + (*p - '0'); pow10 *= 10; p++; } v += v2 / (double)pow10; } // exponent if (*p == 'e' || *p == 'E') { p++; // sign of exponent bool fact = *p != '-'; if (*p == '+' || *p == '-') { p++; } // digits of exponent uint64_t expon = 0; while (isdigit(*p)) { expon = expon * 10 + (*p - '0'); p++; } double scale = 1.0; while (expon >= 8) { scale *= 1e8; expon -= 8; } while (expon > 0) { scale *= 10.0; expon -= 1; } v = fact ? v * scale : v / scale; } // fprintf(stderr, "v = %f\n", v); return sign ? (float)v : (float)-v; } static std::vector > parse_comma_float_array_list(char* s) { std::vector > aaf; char* pch = strtok(s, "[]"); while (pch != NULL) { // parse a,b,c char vstr[20]; int nconsumed = 0; int nscan = sscanf(pch, "%19[^,]%n", vstr, &nconsumed); if (nscan == 1) { // ok we get array pch += nconsumed; std::vector af; float v = vstr_to_float(vstr); af.push_back(v); nscan = sscanf(pch, ",%19[^,]%n", vstr, &nconsumed); while (nscan == 1) { pch += nconsumed; float v = vstr_to_float(vstr); af.push_back(v); nscan = sscanf(pch, ",%19[^,]%n", vstr, &nconsumed); } // array end aaf.push_back(af); } pch = strtok(NULL, "[]"); } return aaf; } static std::vector > parse_comma_int_array_list(char* s) { std::vector > aai; char* pch = strtok(s, "[]"); while (pch != NULL) { // parse a,b,c int v; int nconsumed = 0; int nscan = sscanf(pch, "%d%n", &v, &nconsumed); if (nscan == 1) { // ok we get array pch += nconsumed; std::vector ai; ai.push_back(v); nscan = sscanf(pch, ",%d%n", &v, &nconsumed); while (nscan == 1) { pch += nconsumed; ai.push_back(v); nscan = sscanf(pch, ",%d%n", &v, &nconsumed); } // array end aai.push_back(ai); } pch = strtok(NULL, "[]"); } return aai; } static std::vector parse_comma_pixel_type_list(char* s) { std::vector aps; char* pch = strtok(s, ","); while (pch != NULL) { // RAW/RGB/BGR/GRAY/RGBA/BGRA if (strcmp(pch, "RAW") == 0) aps.push_back(-233); if (strcmp(pch, "RGB") == 0) aps.push_back(ncnn::Mat::PIXEL_RGB); if (strcmp(pch, "BGR") == 0) aps.push_back(ncnn::Mat::PIXEL_BGR); if (strcmp(pch, "GRAY") == 0) aps.push_back(ncnn::Mat::PIXEL_GRAY); if (strcmp(pch, "RGBA") == 0) aps.push_back(ncnn::Mat::PIXEL_RGBA); if (strcmp(pch, "BGRA") == 0) aps.push_back(ncnn::Mat::PIXEL_BGRA); pch = strtok(NULL, ","); } return aps; } static void print_float_array_list(const std::vector >& list) { for (size_t i = 0; i < list.size(); i++) { const std::vector& array = list[i]; fprintf(stderr, "["); for (size_t j = 0; j < array.size(); j++) { fprintf(stderr, "%f", array[j]); if (j != array.size() - 1) fprintf(stderr, ","); } fprintf(stderr, "]"); if (i != list.size() - 1) fprintf(stderr, ","); } } static void print_int_array_list(const std::vector >& list) { for (size_t i = 0; i < list.size(); i++) { const std::vector& array = list[i]; fprintf(stderr, "["); for (size_t j = 0; j < array.size(); j++) { fprintf(stderr, "%d", array[j]); if (j != array.size() - 1) fprintf(stderr, ","); } fprintf(stderr, "]"); if (i != list.size() - 1) fprintf(stderr, ","); } } static void print_pixel_type_list(const std::vector& list) { for (size_t i = 0; i < list.size(); i++) { const int type = list[i]; if (type == -233) fprintf(stderr, "RAW"); if (type == ncnn::Mat::PIXEL_RGB) fprintf(stderr, "RGB"); if (type == ncnn::Mat::PIXEL_BGR) fprintf(stderr, "BGR"); if (type == ncnn::Mat::PIXEL_GRAY) fprintf(stderr, "GRAY"); if (type == ncnn::Mat::PIXEL_RGBA) fprintf(stderr, "RGBA"); if (type == ncnn::Mat::PIXEL_BGRA) fprintf(stderr, "BGRA"); if (i != list.size() - 1) fprintf(stderr, ","); } } static void show_usage() { fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n"); fprintf(stderr, " mean=[104.0,117.0,123.0],...\n"); fprintf(stderr, " norm=[1.0,1.0,1.0],...\n"); fprintf(stderr, " shape=[224,224,3],...[w,h,c] or [w,h] **[0,0] will not resize\n"); fprintf(stderr, " pixel=RAW/RGB/BGR/GRAY/RGBA/BGRA,...\n"); fprintf(stderr, " thread=8\n"); fprintf(stderr, " method=kl/aciq/eq\n"); fprintf(stderr, "Sample usage: ncnn2table squeezenet.param squeezenet.bin imagelist.txt squeezenet.table mean=[104.0,117.0,123.0] norm=[1.0,1.0,1.0] shape=[227,227,3] pixel=BGR method=kl\n"); } int main(int argc, char** argv) { if (argc < 5) { show_usage(); return -1; } for (int i = 1; i < argc; i++) { if (argv[i][0] == '-') { show_usage(); return -1; } } const char* inparam = argv[1]; const char* inbin = argv[2]; char* lists = argv[3]; const char* outtable = argv[4]; ncnn::Option opt; opt.num_threads = 1; opt.use_fp16_packed = false; opt.use_fp16_storage = false; opt.use_fp16_arithmetic = false; QuantNet net; net.opt = opt; net.load_param(inparam); net.load_model(inbin); net.init(); // load lists net.listspaths = parse_comma_path_list(lists); std::string method = "kl"; for (int i = 5; i < argc; i++) { // key=value char* kv = argv[i]; char* eqs = strchr(kv, '='); if (eqs == NULL) { fprintf(stderr, "unrecognized arg %s\n", kv); continue; } // split k v eqs[0] = '\0'; const char* key = kv; char* value = eqs + 1; // load mean norm shape if (memcmp(key, "mean", 4) == 0) net.means = parse_comma_float_array_list(value); if (memcmp(key, "norm", 4) == 0) net.norms = parse_comma_float_array_list(value); if (memcmp(key, "shape", 5) == 0) net.shapes = parse_comma_int_array_list(value); if (memcmp(key, "pixel", 5) == 0) net.type_to_pixels = parse_comma_pixel_type_list(value); if (memcmp(key, "thread", 6) == 0) net.quantize_num_threads = atoi(value); if (memcmp(key, "method", 6) == 0) method = std::string(value); } // sanity check const size_t input_blob_count = net.input_blobs.size(); if (net.listspaths.size() != input_blob_count) { fprintf(stderr, "expect %d lists, but got %d\n", (int)input_blob_count, (int)net.listspaths.size()); return -1; } if (net.means.size() != input_blob_count) { fprintf(stderr, "expect %d means, but got %d\n", (int)input_blob_count, (int)net.means.size()); return -1; } if (net.norms.size() != input_blob_count) { fprintf(stderr, "expect %d norms, but got %d\n", (int)input_blob_count, (int)net.norms.size()); return -1; } if (net.shapes.size() != input_blob_count) { fprintf(stderr, "expect %d shapes, but got %d\n", (int)input_blob_count, (int)net.shapes.size()); return -1; } if (net.type_to_pixels.size() != input_blob_count) { fprintf(stderr, "expect %d pixels, but got %d\n", (int)input_blob_count, (int)net.type_to_pixels.size()); return -1; } if (net.quantize_num_threads < 0) { fprintf(stderr, "malformed thread %d\n", net.quantize_num_threads); return -1; } // print quantnet config { fprintf(stderr, "mean = "); print_float_array_list(net.means); fprintf(stderr, "\n"); fprintf(stderr, "norm = "); print_float_array_list(net.norms); fprintf(stderr, "\n"); fprintf(stderr, "shape = "); print_int_array_list(net.shapes); fprintf(stderr, "\n"); fprintf(stderr, "pixel = "); print_pixel_type_list(net.type_to_pixels); fprintf(stderr, "\n"); fprintf(stderr, "thread = %d\n", net.quantize_num_threads); fprintf(stderr, "method = %s\n", method.c_str()); fprintf(stderr, "---------------------------------------\n"); } if (method == "kl") { net.quantize_KL(); } else if (method == "aciq") { net.quantize_ACIQ(); } else if (method == "eq") { net.quantize_EQ(); } else { fprintf(stderr, "not implemented yet !\n"); fprintf(stderr, "unknown method %s, expect kl / aciq / eq\n", method.c_str()); return -1; } net.print_quant_info(); net.save_table(outtable); return 0; }