deepin-ocr/3rdparty/ncnn/tools/quantize/ncnn2table.cpp
wangzhengyang 718c41634f feat: 切换后端至PaddleOCR-NCNN,切换工程为CMake
1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试
2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程
3.重整权利声明文件,重整代码工程,确保最小化侵权风险

Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake
Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
2022-05-10 10:22:11 +08:00

1736 lines
54 KiB
C++

// Tencent is pleased to support the open source community by making ncnn available.
//
// author:BUG1989 (https://github.com/BUG1989/) Long-term support.
// author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration.
//
// Copyright (C) 2019 BUG1989. All rights reserved.
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#ifdef _MSC_VER
#define _CRT_SECURE_NO_DEPRECATE
#endif
#include <float.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#elif defined(USE_LOCAL_IMREADWRITE)
#include "imreadwrite.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#endif
#include <string>
#include <vector>
// ncnn public header
#include "benchmark.h"
#include "cpu.h"
#include "net.h"
// ncnn private header
#include "layer/convolution.h"
#include "layer/convolutiondepthwise.h"
#include "layer/innerproduct.h"
class QuantBlobStat
{
public:
QuantBlobStat()
{
threshold = 0.f;
absmax = 0.f;
total = 0;
}
public:
float threshold;
float absmax;
// ACIQ
int total;
// KL
std::vector<uint64_t> histogram;
std::vector<float> histogram_normed;
};
class QuantNet : public ncnn::Net
{
public:
QuantNet();
std::vector<ncnn::Blob>& blobs;
std::vector<ncnn::Layer*>& layers;
public:
std::vector<std::vector<std::string> > listspaths;
std::vector<std::vector<float> > means;
std::vector<std::vector<float> > norms;
std::vector<std::vector<int> > shapes;
std::vector<int> type_to_pixels;
int quantize_num_threads;
public:
int init();
void print_quant_info() const;
int save_table(const char* tablepath);
int quantize_KL();
int quantize_ACIQ();
int quantize_EQ();
public:
std::vector<int> input_blobs;
std::vector<int> conv_layers;
std::vector<int> conv_bottom_blobs;
std::vector<int> conv_top_blobs;
// result
std::vector<QuantBlobStat> quant_blob_stats;
std::vector<ncnn::Mat> weight_scales;
std::vector<ncnn::Mat> bottom_blob_scales;
};
QuantNet::QuantNet()
: blobs(mutable_blobs()), layers(mutable_layers())
{
quantize_num_threads = ncnn::get_cpu_count();
}
int QuantNet::init()
{
// find all input layers
for (int i = 0; i < (int)layers.size(); i++)
{
const ncnn::Layer* layer = layers[i];
if (layer->type == "Input")
{
input_blobs.push_back(layer->tops[0]);
}
}
// find all conv layers
for (int i = 0; i < (int)layers.size(); i++)
{
const ncnn::Layer* layer = layers[i];
if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
{
conv_layers.push_back(i);
conv_bottom_blobs.push_back(layer->bottoms[0]);
conv_top_blobs.push_back(layer->tops[0]);
}
}
const int conv_layer_count = (int)conv_layers.size();
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
quant_blob_stats.resize(conv_bottom_blob_count);
weight_scales.resize(conv_layer_count);
bottom_blob_scales.resize(conv_bottom_blob_count);
return 0;
}
int QuantNet::save_table(const char* tablepath)
{
FILE* fp = fopen(tablepath, "wb");
if (!fp)
{
fprintf(stderr, "fopen %s failed\n", tablepath);
return -1;
}
const int conv_layer_count = (int)conv_layers.size();
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
for (int i = 0; i < conv_layer_count; i++)
{
const ncnn::Mat& weight_scale = weight_scales[i];
fprintf(fp, "%s_param_0 ", layers[conv_layers[i]]->name.c_str());
for (int j = 0; j < weight_scale.w; j++)
{
fprintf(fp, "%f ", weight_scale[j]);
}
fprintf(fp, "\n");
}
for (int i = 0; i < conv_bottom_blob_count; i++)
{
const ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i];
fprintf(fp, "%s ", layers[conv_layers[i]]->name.c_str());
for (int j = 0; j < bottom_blob_scale.w; j++)
{
fprintf(fp, "%f ", bottom_blob_scale[j]);
}
fprintf(fp, "\n");
}
fclose(fp);
fprintf(stderr, "ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\\(^0^)/...233...\n");
return 0;
}
void QuantNet::print_quant_info() const
{
for (int i = 0; i < (int)conv_bottom_blobs.size(); i++)
{
const QuantBlobStat& stat = quant_blob_stats[i];
float scale = 127 / stat.threshold;
fprintf(stderr, "%-40s : max = %-15f threshold = %-15f scale = %-15f\n", layers[conv_layers[i]]->name.c_str(), stat.absmax, stat.threshold, scale);
}
}
/**
* Read and resize image
* shape is input as [w,h,...]
* if w and h both are given, image will be resized to exactly size.
* if w and h both are zero or negative, image will not be resized.
* if only h is zero or negative, image's width will scaled resize to w, keeping aspect ratio.
* if only w is zero or negative, image's height will scaled resize to h
* @return ncnn::Mat
*/
inline ncnn::Mat read_and_resize_image(const std::vector<int>& shape, const std::string& imagepath, int pixel_convert_type)
{
int target_w = shape[0];
int target_h = shape[1];
cv::Mat bgr = cv::imread(imagepath, 1);
if (target_h <= 0 && target_w <= 0)
{
return ncnn::Mat::from_pixels(bgr.data, pixel_convert_type, bgr.cols, bgr.rows);
}
if (target_h <= 0 || target_w <= 0)
{
float scale = 1.0;
if (target_h <= 0)
{
scale = 1.0 * bgr.cols / target_w;
target_h = int(1.0 * bgr.rows / scale);
}
if (target_w <= 0)
{
scale = 1.0 * bgr.rows / target_h;
target_w = int(1.0 * bgr.cols / scale);
}
}
return ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
}
static float compute_kl_divergence(const std::vector<float>& a, const std::vector<float>& b)
{
const size_t length = a.size();
float result = 0;
for (size_t i = 0; i < length; i++)
{
result += a[i] * log(a[i] / b[i]);
}
return result;
}
int QuantNet::quantize_KL()
{
const int input_blob_count = (int)input_blobs.size();
const int conv_layer_count = (int)conv_layers.size();
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
const int image_count = (int)listspaths[0].size();
const int num_histogram_bins = 2048;
std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
// initialize conv weight scales
#pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_layer_count; i++)
{
const ncnn::Layer* layer = layers[conv_layers[i]];
if (layer->type == "Convolution")
{
const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer;
const int num_output = convolution->num_output;
const int kernel_w = convolution->kernel_w;
const int kernel_h = convolution->kernel_h;
const int dilation_w = convolution->dilation_w;
const int dilation_h = convolution->dilation_h;
const int stride_w = convolution->stride_w;
const int stride_h = convolution->stride_h;
const int weight_data_size_output = convolution->weight_data_size / num_output;
// int8 winograd F43 needs weight data to use 6bit quantization
// TODO proper condition for winograd 3x3 int8
bool quant_6bit = false;
if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
quant_6bit = true;
weight_scales[i].create(num_output);
for (int n = 0; n < num_output; n++)
{
const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output);
float absmax = 0.f;
for (int k = 0; k < weight_data_size_output; k++)
{
absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
}
if (quant_6bit)
{
weight_scales[i][n] = 31 / absmax;
}
else
{
weight_scales[i][n] = 127 / absmax;
}
}
}
if (layer->type == "ConvolutionDepthWise")
{
const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer;
const int group = convolutiondepthwise->group;
const int weight_data_size_output = convolutiondepthwise->weight_data_size / group;
std::vector<float> scales;
weight_scales[i].create(group);
for (int n = 0; n < group; n++)
{
const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output);
float absmax = 0.f;
for (int k = 0; k < weight_data_size_output; k++)
{
absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
}
weight_scales[i][n] = 127 / absmax;
}
}
if (layer->type == "InnerProduct")
{
const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer;
const int num_output = innerproduct->num_output;
const int weight_data_size_output = innerproduct->weight_data_size / num_output;
weight_scales[i].create(num_output);
for (int n = 0; n < num_output; n++)
{
const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output);
float absmax = 0.f;
for (int k = 0; k < weight_data_size_output; k++)
{
absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
}
weight_scales[i][n] = 127 / absmax;
}
}
}
// count the absmax
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++)
{
if (i % 100 == 0)
{
fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}
ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int j = 0; j < input_blob_count; j++)
{
const int type_to_pixel = type_to_pixels[j];
const std::vector<float>& mean_vals = means[j];
const std::vector<float>& norm_vals = norms[j];
int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
if (type_to_pixel != pixel_convert_type)
{
pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
}
ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
ex.input(input_blobs[j], in);
}
for (int j = 0; j < conv_bottom_blob_count; j++)
{
ncnn::Mat out;
ex.extract(conv_bottom_blobs[j], out);
// count absmax
{
float absmax = 0.f;
const int outc = out.c;
const int outsize = out.w * out.h;
for (int p = 0; p < outc; p++)
{
const float* ptr = out.channel(p);
for (int k = 0; k < outsize; k++)
{
absmax = std::max(absmax, (float)fabs(ptr[k]));
}
}
#pragma omp critical
{
QuantBlobStat& stat = quant_blob_stats[j];
stat.absmax = std::max(stat.absmax, absmax);
}
}
}
}
// initialize histogram
#pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_bottom_blob_count; i++)
{
QuantBlobStat& stat = quant_blob_stats[i];
stat.histogram.resize(num_histogram_bins, 0);
stat.histogram_normed.resize(num_histogram_bins, 0);
}
// build histogram
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++)
{
if (i % 100 == 0)
{
fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}
ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int j = 0; j < input_blob_count; j++)
{
const int type_to_pixel = type_to_pixels[j];
const std::vector<float>& mean_vals = means[j];
const std::vector<float>& norm_vals = norms[j];
int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
if (type_to_pixel != pixel_convert_type)
{
pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
}
ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
ex.input(input_blobs[j], in);
}
for (int j = 0; j < conv_bottom_blob_count; j++)
{
ncnn::Mat out;
ex.extract(conv_bottom_blobs[j], out);
// count histogram bin
{
const float absmax = quant_blob_stats[j].absmax;
std::vector<uint64_t> histogram(num_histogram_bins, 0);
const int outc = out.c;
const int outsize = out.w * out.h;
for (int p = 0; p < outc; p++)
{
const float* ptr = out.channel(p);
for (int k = 0; k < outsize; k++)
{
if (ptr[k] == 0.f)
continue;
const int index = std::min((int)(fabs(ptr[k]) / absmax * num_histogram_bins), (num_histogram_bins - 1));
histogram[index] += 1;
}
}
#pragma omp critical
{
QuantBlobStat& stat = quant_blob_stats[j];
for (int k = 0; k < num_histogram_bins; k++)
{
stat.histogram[k] += histogram[k];
}
}
}
}
}
// using kld to find the best threshold value
#pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_bottom_blob_count; i++)
{
QuantBlobStat& stat = quant_blob_stats[i];
// normalize histogram bin
{
uint64_t sum = 0;
for (int j = 0; j < num_histogram_bins; j++)
{
sum += stat.histogram[j];
}
for (int j = 0; j < num_histogram_bins; j++)
{
stat.histogram_normed[j] = (float)(stat.histogram[j] / (double)sum);
}
}
const int target_bin = 128;
int target_threshold = target_bin;
float min_kl_divergence = FLT_MAX;
for (int threshold = target_bin; threshold < num_histogram_bins; threshold++)
{
const float kl_eps = 0.0001f;
std::vector<float> clip_distribution(threshold, kl_eps);
{
for (int j = 0; j < threshold; j++)
{
clip_distribution[j] += stat.histogram_normed[j];
}
for (int j = threshold; j < num_histogram_bins; j++)
{
clip_distribution[threshold - 1] += stat.histogram_normed[j];
}
}
const float num_per_bin = (float)threshold / target_bin;
std::vector<float> quantize_distribution(target_bin, 0.f);
{
{
const float end = num_per_bin;
const int right_lower = (int)floor(end);
const float right_scale = end - right_lower;
if (right_scale > 0)
{
quantize_distribution[0] += right_scale * stat.histogram_normed[right_lower];
}
for (int k = 0; k < right_lower; k++)
{
quantize_distribution[0] += stat.histogram_normed[k];
}
quantize_distribution[0] /= right_lower + right_scale;
}
for (int j = 1; j < target_bin - 1; j++)
{
const float start = j * num_per_bin;
const float end = (j + 1) * num_per_bin;
const int left_upper = (int)ceil(start);
const float left_scale = left_upper - start;
const int right_lower = (int)floor(end);
const float right_scale = end - right_lower;
if (left_scale > 0)
{
quantize_distribution[j] += left_scale * stat.histogram_normed[left_upper - 1];
}
if (right_scale > 0)
{
quantize_distribution[j] += right_scale * stat.histogram_normed[right_lower];
}
for (int k = left_upper; k < right_lower; k++)
{
quantize_distribution[j] += stat.histogram_normed[k];
}
quantize_distribution[j] /= right_lower - left_upper + left_scale + right_scale;
}
{
const float start = threshold - num_per_bin;
const int left_upper = (int)ceil(start);
const float left_scale = left_upper - start;
if (left_scale > 0)
{
quantize_distribution[target_bin - 1] += left_scale * stat.histogram_normed[left_upper - 1];
}
for (int k = left_upper; k < threshold; k++)
{
quantize_distribution[target_bin - 1] += stat.histogram_normed[k];
}
quantize_distribution[target_bin - 1] /= threshold - left_upper + left_scale;
}
}
std::vector<float> expand_distribution(threshold, kl_eps);
{
{
const float end = num_per_bin;
const int right_lower = (int)floor(end);
const float right_scale = end - right_lower;
if (right_scale > 0)
{
expand_distribution[right_lower] += right_scale * quantize_distribution[0];
}
for (int k = 0; k < right_lower; k++)
{
expand_distribution[k] += quantize_distribution[0];
}
}
for (int j = 1; j < target_bin - 1; j++)
{
const float start = j * num_per_bin;
const float end = (j + 1) * num_per_bin;
const int left_upper = (int)ceil(start);
const float left_scale = left_upper - start;
const int right_lower = (int)floor(end);
const float right_scale = end - right_lower;
if (left_scale > 0)
{
expand_distribution[left_upper - 1] += left_scale * quantize_distribution[j];
}
if (right_scale > 0)
{
expand_distribution[right_lower] += right_scale * quantize_distribution[j];
}
for (int k = left_upper; k < right_lower; k++)
{
expand_distribution[k] += quantize_distribution[j];
}
}
{
const float start = threshold - num_per_bin;
const int left_upper = (int)ceil(start);
const float left_scale = left_upper - start;
if (left_scale > 0)
{
expand_distribution[left_upper - 1] += left_scale * quantize_distribution[target_bin - 1];
}
for (int k = left_upper; k < threshold; k++)
{
expand_distribution[k] += quantize_distribution[target_bin - 1];
}
}
}
// kl
const float kl_divergence = compute_kl_divergence(clip_distribution, expand_distribution);
// the best num of bin
if (kl_divergence < min_kl_divergence)
{
min_kl_divergence = kl_divergence;
target_threshold = threshold;
}
}
stat.threshold = (target_threshold + 0.5f) * stat.absmax / num_histogram_bins;
float scale = 127 / stat.threshold;
bottom_blob_scales[i].create(1);
bottom_blob_scales[i][0] = scale;
}
return 0;
}
static float compute_aciq_gaussian_clip(float absmax, int N, int num_bits = 8)
{
const float alpha_gaussian[8] = {0, 1.71063519, 2.15159277, 2.55913646, 2.93620062, 3.28691474, 3.6151146, 3.92403714};
const double gaussian_const = (0.5 * 0.35) * (1 + sqrt(3.14159265358979323846 * log(4)));
double std = (absmax * 2 * gaussian_const) / sqrt(2 * log(N));
return (float)(alpha_gaussian[num_bits - 1] * std);
}
int QuantNet::quantize_ACIQ()
{
const int input_blob_count = (int)input_blobs.size();
const int conv_layer_count = (int)conv_layers.size();
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
const int image_count = (int)listspaths[0].size();
std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
// initialize conv weight scales
#pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_layer_count; i++)
{
const ncnn::Layer* layer = layers[conv_layers[i]];
if (layer->type == "Convolution")
{
const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer;
const int num_output = convolution->num_output;
const int kernel_w = convolution->kernel_w;
const int kernel_h = convolution->kernel_h;
const int dilation_w = convolution->dilation_w;
const int dilation_h = convolution->dilation_h;
const int stride_w = convolution->stride_w;
const int stride_h = convolution->stride_h;
const int weight_data_size_output = convolution->weight_data_size / num_output;
// int8 winograd F43 needs weight data to use 6bit quantization
// TODO proper condition for winograd 3x3 int8
bool quant_6bit = false;
if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
quant_6bit = true;
weight_scales[i].create(num_output);
for (int n = 0; n < num_output; n++)
{
const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output);
float absmax = 0.f;
for (int k = 0; k < weight_data_size_output; k++)
{
absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
}
if (quant_6bit)
{
const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output, 6);
weight_scales[i][n] = 31 / threshold;
}
else
{
const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
weight_scales[i][n] = 127 / threshold;
}
}
}
if (layer->type == "ConvolutionDepthWise")
{
const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer;
const int group = convolutiondepthwise->group;
const int weight_data_size_output = convolutiondepthwise->weight_data_size / group;
std::vector<float> scales;
weight_scales[i].create(group);
for (int n = 0; n < group; n++)
{
const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output);
float absmax = 0.f;
for (int k = 0; k < weight_data_size_output; k++)
{
absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
}
const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
weight_scales[i][n] = 127 / threshold;
}
}
if (layer->type == "InnerProduct")
{
const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer;
const int num_output = innerproduct->num_output;
const int weight_data_size_output = innerproduct->weight_data_size / num_output;
weight_scales[i].create(num_output);
for (int n = 0; n < num_output; n++)
{
const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output);
float absmax = 0.f;
for (int k = 0; k < weight_data_size_output; k++)
{
absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
}
const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
weight_scales[i][n] = 127 / threshold;
}
}
}
// count the absmax
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++)
{
if (i % 100 == 0)
{
fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}
ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int j = 0; j < input_blob_count; j++)
{
const int type_to_pixel = type_to_pixels[j];
const std::vector<float>& mean_vals = means[j];
const std::vector<float>& norm_vals = norms[j];
int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
if (type_to_pixel != pixel_convert_type)
{
pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
}
ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
ex.input(input_blobs[j], in);
}
for (int j = 0; j < conv_bottom_blob_count; j++)
{
ncnn::Mat out;
ex.extract(conv_bottom_blobs[j], out);
// count absmax
{
float absmax = 0.f;
const int outc = out.c;
const int outsize = out.w * out.h;
for (int p = 0; p < outc; p++)
{
const float* ptr = out.channel(p);
for (int k = 0; k < outsize; k++)
{
absmax = std::max(absmax, (float)fabs(ptr[k]));
}
}
#pragma omp critical
{
QuantBlobStat& stat = quant_blob_stats[j];
stat.absmax = std::max(stat.absmax, absmax);
stat.total = outc * outsize;
}
}
}
}
// alpha gaussian
#pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_bottom_blob_count; i++)
{
QuantBlobStat& stat = quant_blob_stats[i];
stat.threshold = compute_aciq_gaussian_clip(stat.absmax, stat.total);
float scale = 127 / stat.threshold;
bottom_blob_scales[i].create(1);
bottom_blob_scales[i][0] = scale;
}
return 0;
}
static float cosine_similarity(const ncnn::Mat& a, const ncnn::Mat& b)
{
const int chanenls = a.c;
const int size = a.w * a.h;
float sa = 0;
float sb = 0;
float sum = 0;
for (int p = 0; p < chanenls; p++)
{
const float* pa = a.channel(p);
const float* pb = b.channel(p);
for (int i = 0; i < size; i++)
{
sa += pa[i] * pa[i];
sb += pb[i] * pb[i];
sum += pa[i] * pb[i];
}
}
float sim = (float)sum / sqrt(sa) / sqrt(sb);
return sim;
}
static int get_layer_param(const ncnn::Layer* layer, ncnn::ParamDict& pd)
{
if (layer->type == "Convolution")
{
ncnn::Convolution* convolution = (ncnn::Convolution*)layer;
pd.set(0, convolution->num_output);
pd.set(1, convolution->kernel_w);
pd.set(11, convolution->kernel_h);
pd.set(2, convolution->dilation_w);
pd.set(12, convolution->dilation_h);
pd.set(3, convolution->stride_w);
pd.set(13, convolution->stride_h);
pd.set(4, convolution->pad_left);
pd.set(15, convolution->pad_right);
pd.set(14, convolution->pad_top);
pd.set(16, convolution->pad_bottom);
pd.set(18, convolution->pad_value);
pd.set(5, convolution->bias_term);
pd.set(6, convolution->weight_data_size);
pd.set(8, convolution->int8_scale_term);
pd.set(9, convolution->activation_type);
pd.set(10, convolution->activation_params);
}
else if (layer->type == "ConvolutionDepthWise")
{
ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer;
pd.set(0, convolutiondepthwise->num_output);
pd.set(1, convolutiondepthwise->kernel_w);
pd.set(11, convolutiondepthwise->kernel_h);
pd.set(2, convolutiondepthwise->dilation_w);
pd.set(12, convolutiondepthwise->dilation_h);
pd.set(3, convolutiondepthwise->stride_w);
pd.set(13, convolutiondepthwise->stride_h);
pd.set(4, convolutiondepthwise->pad_left);
pd.set(15, convolutiondepthwise->pad_right);
pd.set(14, convolutiondepthwise->pad_top);
pd.set(16, convolutiondepthwise->pad_bottom);
pd.set(18, convolutiondepthwise->pad_value);
pd.set(5, convolutiondepthwise->bias_term);
pd.set(6, convolutiondepthwise->weight_data_size);
pd.set(7, convolutiondepthwise->group);
pd.set(8, convolutiondepthwise->int8_scale_term);
pd.set(9, convolutiondepthwise->activation_type);
pd.set(10, convolutiondepthwise->activation_params);
}
else if (layer->type == "InnerProduct")
{
ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer;
pd.set(0, innerproduct->num_output);
pd.set(1, innerproduct->bias_term);
pd.set(2, innerproduct->weight_data_size);
pd.set(8, innerproduct->int8_scale_term);
pd.set(9, innerproduct->activation_type);
pd.set(10, innerproduct->activation_params);
}
else
{
fprintf(stderr, "unexpected layer type %s in get_layer_param\n", layer->type.c_str());
return -1;
}
return 0;
}
static int get_layer_weights(const ncnn::Layer* layer, std::vector<ncnn::Mat>& weights)
{
if (layer->type == "Convolution")
{
ncnn::Convolution* convolution = (ncnn::Convolution*)layer;
weights.push_back(convolution->weight_data);
if (convolution->bias_term)
weights.push_back(convolution->bias_data);
}
else if (layer->type == "ConvolutionDepthWise")
{
ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer;
weights.push_back(convolutiondepthwise->weight_data);
if (convolutiondepthwise->bias_term)
weights.push_back(convolutiondepthwise->bias_data);
}
else if (layer->type == "InnerProduct")
{
ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer;
weights.push_back(innerproduct->weight_data);
if (innerproduct->bias_term)
weights.push_back(innerproduct->bias_data);
}
else
{
fprintf(stderr, "unexpected layer type %s in get_layer_weights\n", layer->type.c_str());
return -1;
}
return 0;
}
int QuantNet::quantize_EQ()
{
// find the initial scale via KL
quantize_KL();
print_quant_info();
const int input_blob_count = (int)input_blobs.size();
const int conv_layer_count = (int)conv_layers.size();
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
// max 50 images for EQ
const int image_count = std::min((int)listspaths[0].size(), 50);
const float scale_range_lower = 0.5f;
const float scale_range_upper = 2.0f;
const int search_steps = 100;
for (int i = 0; i < conv_layer_count; i++)
{
ncnn::Mat& weight_scale = weight_scales[i];
ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i];
const ncnn::Layer* layer = layers[conv_layers[i]];
// search weight scale
for (int j = 0; j < weight_scale.w; j++)
{
const float scale = weight_scale[j];
const float scale_lower = scale * scale_range_lower;
const float scale_upper = scale * scale_range_upper;
const float scale_step = (scale_upper - scale_lower) / search_steps;
std::vector<double> avgsims(search_steps, 0.0);
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int ii = 0; ii < image_count; ii++)
{
if (ii % 100 == 0)
{
fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count);
}
ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int jj = 0; jj < input_blob_count; jj++)
{
const int type_to_pixel = type_to_pixels[jj];
const std::vector<float>& mean_vals = means[jj];
const std::vector<float>& norm_vals = norms[jj];
int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
if (type_to_pixel != pixel_convert_type)
{
pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
}
ncnn::Mat in = read_and_resize_image(shapes[jj], listspaths[jj][ii], pixel_convert_type);
in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
ex.input(input_blobs[jj], in);
}
ncnn::Mat in;
ex.extract(conv_bottom_blobs[i], in);
ncnn::Mat out;
ex.extract(conv_top_blobs[i], out);
ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex);
ncnn::ParamDict pd;
get_layer_param(layer, pd);
pd.set(8, 1); //int8_scale_term
layer_int8->load_param(pd);
std::vector<float> sims(search_steps);
for (int k = 0; k < search_steps; k++)
{
ncnn::Mat new_weight_scale = weight_scale.clone();
new_weight_scale[j] = scale_lower + k * scale_step;
std::vector<ncnn::Mat> weights;
get_layer_weights(layer, weights);
weights.push_back(new_weight_scale);
weights.push_back(bottom_blob_scale);
layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data()));
ncnn::Option opt_int8;
opt_int8.use_packing_layout = false;
layer_int8->create_pipeline(opt_int8);
ncnn::Mat out_int8;
layer_int8->forward(in, out_int8, opt_int8);
layer_int8->destroy_pipeline(opt_int8);
sims[k] = cosine_similarity(out, out_int8);
}
delete layer_int8;
#pragma omp critical
{
for (int k = 0; k < search_steps; k++)
{
avgsims[k] += sims[k];
}
}
}
double max_avgsim = 0.0;
float new_scale = scale;
// find the scale with min cosine distance
for (int k = 0; k < search_steps; k++)
{
if (max_avgsim < avgsims[k])
{
max_avgsim = avgsims[k];
new_scale = scale_lower + k * scale_step;
}
}
fprintf(stderr, "%s w %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale);
weight_scale[j] = new_scale;
}
// search bottom blob scale
for (int j = 0; j < bottom_blob_scale.w; j++)
{
const float scale = bottom_blob_scale[j];
const float scale_lower = scale * scale_range_lower;
const float scale_upper = scale * scale_range_upper;
const float scale_step = (scale_upper - scale_lower) / search_steps;
std::vector<double> avgsims(search_steps, 0.0);
#pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int ii = 0; ii < image_count; ii++)
{
if (ii % 100 == 0)
{
fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count);
}
ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int jj = 0; jj < input_blob_count; jj++)
{
const int type_to_pixel = type_to_pixels[jj];
const std::vector<float>& mean_vals = means[jj];
const std::vector<float>& norm_vals = norms[jj];
int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
if (type_to_pixel != pixel_convert_type)
{
pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
}
ncnn::Mat in = read_and_resize_image(shapes[jj], listspaths[jj][ii], pixel_convert_type);
in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
ex.input(input_blobs[jj], in);
}
ncnn::Mat in;
ex.extract(conv_bottom_blobs[i], in);
ncnn::Mat out;
ex.extract(conv_top_blobs[i], out);
ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex);
ncnn::ParamDict pd;
get_layer_param(layer, pd);
pd.set(8, 1); //int8_scale_term
layer_int8->load_param(pd);
std::vector<float> sims(search_steps);
for (int k = 0; k < search_steps; k++)
{
ncnn::Mat new_bottom_blob_scale = bottom_blob_scale.clone();
new_bottom_blob_scale[j] = scale_lower + k * scale_step;
std::vector<ncnn::Mat> weights;
get_layer_weights(layer, weights);
weights.push_back(weight_scale);
weights.push_back(new_bottom_blob_scale);
layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data()));
ncnn::Option opt_int8;
opt_int8.use_packing_layout = false;
layer_int8->create_pipeline(opt_int8);
ncnn::Mat out_int8;
layer_int8->forward(in, out_int8, opt_int8);
layer_int8->destroy_pipeline(opt_int8);
sims[k] = cosine_similarity(out, out_int8);
}
delete layer_int8;
#pragma omp critical
{
for (int k = 0; k < search_steps; k++)
{
avgsims[k] += sims[k];
}
}
}
double max_avgsim = 0.0;
float new_scale = scale;
// find the scale with min cosine distance
for (int k = 0; k < search_steps; k++)
{
if (max_avgsim < avgsims[k])
{
max_avgsim = avgsims[k];
new_scale = scale_lower + k * scale_step;
}
}
fprintf(stderr, "%s b %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale);
bottom_blob_scale[j] = new_scale;
}
// update quant info
QuantBlobStat& stat = quant_blob_stats[i];
stat.threshold = 127 / bottom_blob_scale[0];
}
return 0;
}
static std::vector<std::vector<std::string> > parse_comma_path_list(char* s)
{
std::vector<std::vector<std::string> > aps;
char* pch = strtok(s, ",");
while (pch != NULL)
{
FILE* fp = fopen(pch, "rb");
if (!fp)
{
fprintf(stderr, "fopen %s failed\n", pch);
break;
}
std::vector<std::string> paths;
// one filepath per line
char line[1024];
while (!feof(fp))
{
char* ss = fgets(line, 1024, fp);
if (!ss)
break;
char filepath[256];
int nscan = sscanf(line, "%255s", filepath);
if (nscan != 1)
continue;
paths.push_back(std::string(filepath));
}
fclose(fp);
aps.push_back(paths);
pch = strtok(NULL, ",");
}
return aps;
}
static float vstr_to_float(const char vstr[20])
{
double v = 0.0;
const char* p = vstr;
// sign
bool sign = *p != '-';
if (*p == '+' || *p == '-')
{
p++;
}
// digits before decimal point or exponent
uint64_t v1 = 0;
while (isdigit(*p))
{
v1 = v1 * 10 + (*p - '0');
p++;
}
v = (double)v1;
// digits after decimal point
if (*p == '.')
{
p++;
uint64_t pow10 = 1;
uint64_t v2 = 0;
while (isdigit(*p))
{
v2 = v2 * 10 + (*p - '0');
pow10 *= 10;
p++;
}
v += v2 / (double)pow10;
}
// exponent
if (*p == 'e' || *p == 'E')
{
p++;
// sign of exponent
bool fact = *p != '-';
if (*p == '+' || *p == '-')
{
p++;
}
// digits of exponent
uint64_t expon = 0;
while (isdigit(*p))
{
expon = expon * 10 + (*p - '0');
p++;
}
double scale = 1.0;
while (expon >= 8)
{
scale *= 1e8;
expon -= 8;
}
while (expon > 0)
{
scale *= 10.0;
expon -= 1;
}
v = fact ? v * scale : v / scale;
}
// fprintf(stderr, "v = %f\n", v);
return sign ? (float)v : (float)-v;
}
static std::vector<std::vector<float> > parse_comma_float_array_list(char* s)
{
std::vector<std::vector<float> > aaf;
char* pch = strtok(s, "[]");
while (pch != NULL)
{
// parse a,b,c
char vstr[20];
int nconsumed = 0;
int nscan = sscanf(pch, "%19[^,]%n", vstr, &nconsumed);
if (nscan == 1)
{
// ok we get array
pch += nconsumed;
std::vector<float> af;
float v = vstr_to_float(vstr);
af.push_back(v);
nscan = sscanf(pch, ",%19[^,]%n", vstr, &nconsumed);
while (nscan == 1)
{
pch += nconsumed;
float v = vstr_to_float(vstr);
af.push_back(v);
nscan = sscanf(pch, ",%19[^,]%n", vstr, &nconsumed);
}
// array end
aaf.push_back(af);
}
pch = strtok(NULL, "[]");
}
return aaf;
}
static std::vector<std::vector<int> > parse_comma_int_array_list(char* s)
{
std::vector<std::vector<int> > aai;
char* pch = strtok(s, "[]");
while (pch != NULL)
{
// parse a,b,c
int v;
int nconsumed = 0;
int nscan = sscanf(pch, "%d%n", &v, &nconsumed);
if (nscan == 1)
{
// ok we get array
pch += nconsumed;
std::vector<int> ai;
ai.push_back(v);
nscan = sscanf(pch, ",%d%n", &v, &nconsumed);
while (nscan == 1)
{
pch += nconsumed;
ai.push_back(v);
nscan = sscanf(pch, ",%d%n", &v, &nconsumed);
}
// array end
aai.push_back(ai);
}
pch = strtok(NULL, "[]");
}
return aai;
}
static std::vector<int> parse_comma_pixel_type_list(char* s)
{
std::vector<int> aps;
char* pch = strtok(s, ",");
while (pch != NULL)
{
// RAW/RGB/BGR/GRAY/RGBA/BGRA
if (strcmp(pch, "RAW") == 0)
aps.push_back(-233);
if (strcmp(pch, "RGB") == 0)
aps.push_back(ncnn::Mat::PIXEL_RGB);
if (strcmp(pch, "BGR") == 0)
aps.push_back(ncnn::Mat::PIXEL_BGR);
if (strcmp(pch, "GRAY") == 0)
aps.push_back(ncnn::Mat::PIXEL_GRAY);
if (strcmp(pch, "RGBA") == 0)
aps.push_back(ncnn::Mat::PIXEL_RGBA);
if (strcmp(pch, "BGRA") == 0)
aps.push_back(ncnn::Mat::PIXEL_BGRA);
pch = strtok(NULL, ",");
}
return aps;
}
static void print_float_array_list(const std::vector<std::vector<float> >& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const std::vector<float>& array = list[i];
fprintf(stderr, "[");
for (size_t j = 0; j < array.size(); j++)
{
fprintf(stderr, "%f", array[j]);
if (j != array.size() - 1)
fprintf(stderr, ",");
}
fprintf(stderr, "]");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}
static void print_int_array_list(const std::vector<std::vector<int> >& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const std::vector<int>& array = list[i];
fprintf(stderr, "[");
for (size_t j = 0; j < array.size(); j++)
{
fprintf(stderr, "%d", array[j]);
if (j != array.size() - 1)
fprintf(stderr, ",");
}
fprintf(stderr, "]");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}
static void print_pixel_type_list(const std::vector<int>& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const int type = list[i];
if (type == -233)
fprintf(stderr, "RAW");
if (type == ncnn::Mat::PIXEL_RGB)
fprintf(stderr, "RGB");
if (type == ncnn::Mat::PIXEL_BGR)
fprintf(stderr, "BGR");
if (type == ncnn::Mat::PIXEL_GRAY)
fprintf(stderr, "GRAY");
if (type == ncnn::Mat::PIXEL_RGBA)
fprintf(stderr, "RGBA");
if (type == ncnn::Mat::PIXEL_BGRA)
fprintf(stderr, "BGRA");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}
static void show_usage()
{
fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n");
fprintf(stderr, " mean=[104.0,117.0,123.0],...\n");
fprintf(stderr, " norm=[1.0,1.0,1.0],...\n");
fprintf(stderr, " shape=[224,224,3],...[w,h,c] or [w,h] **[0,0] will not resize\n");
fprintf(stderr, " pixel=RAW/RGB/BGR/GRAY/RGBA/BGRA,...\n");
fprintf(stderr, " thread=8\n");
fprintf(stderr, " method=kl/aciq/eq\n");
fprintf(stderr, "Sample usage: ncnn2table squeezenet.param squeezenet.bin imagelist.txt squeezenet.table mean=[104.0,117.0,123.0] norm=[1.0,1.0,1.0] shape=[227,227,3] pixel=BGR method=kl\n");
}
int main(int argc, char** argv)
{
if (argc < 5)
{
show_usage();
return -1;
}
for (int i = 1; i < argc; i++)
{
if (argv[i][0] == '-')
{
show_usage();
return -1;
}
}
const char* inparam = argv[1];
const char* inbin = argv[2];
char* lists = argv[3];
const char* outtable = argv[4];
ncnn::Option opt;
opt.num_threads = 1;
opt.use_fp16_packed = false;
opt.use_fp16_storage = false;
opt.use_fp16_arithmetic = false;
QuantNet net;
net.opt = opt;
net.load_param(inparam);
net.load_model(inbin);
net.init();
// load lists
net.listspaths = parse_comma_path_list(lists);
std::string method = "kl";
for (int i = 5; i < argc; i++)
{
// key=value
char* kv = argv[i];
char* eqs = strchr(kv, '=');
if (eqs == NULL)
{
fprintf(stderr, "unrecognized arg %s\n", kv);
continue;
}
// split k v
eqs[0] = '\0';
const char* key = kv;
char* value = eqs + 1;
// load mean norm shape
if (memcmp(key, "mean", 4) == 0)
net.means = parse_comma_float_array_list(value);
if (memcmp(key, "norm", 4) == 0)
net.norms = parse_comma_float_array_list(value);
if (memcmp(key, "shape", 5) == 0)
net.shapes = parse_comma_int_array_list(value);
if (memcmp(key, "pixel", 5) == 0)
net.type_to_pixels = parse_comma_pixel_type_list(value);
if (memcmp(key, "thread", 6) == 0)
net.quantize_num_threads = atoi(value);
if (memcmp(key, "method", 6) == 0)
method = std::string(value);
}
// sanity check
const size_t input_blob_count = net.input_blobs.size();
if (net.listspaths.size() != input_blob_count)
{
fprintf(stderr, "expect %d lists, but got %d\n", (int)input_blob_count, (int)net.listspaths.size());
return -1;
}
if (net.means.size() != input_blob_count)
{
fprintf(stderr, "expect %d means, but got %d\n", (int)input_blob_count, (int)net.means.size());
return -1;
}
if (net.norms.size() != input_blob_count)
{
fprintf(stderr, "expect %d norms, but got %d\n", (int)input_blob_count, (int)net.norms.size());
return -1;
}
if (net.shapes.size() != input_blob_count)
{
fprintf(stderr, "expect %d shapes, but got %d\n", (int)input_blob_count, (int)net.shapes.size());
return -1;
}
if (net.type_to_pixels.size() != input_blob_count)
{
fprintf(stderr, "expect %d pixels, but got %d\n", (int)input_blob_count, (int)net.type_to_pixels.size());
return -1;
}
if (net.quantize_num_threads < 0)
{
fprintf(stderr, "malformed thread %d\n", net.quantize_num_threads);
return -1;
}
// print quantnet config
{
fprintf(stderr, "mean = ");
print_float_array_list(net.means);
fprintf(stderr, "\n");
fprintf(stderr, "norm = ");
print_float_array_list(net.norms);
fprintf(stderr, "\n");
fprintf(stderr, "shape = ");
print_int_array_list(net.shapes);
fprintf(stderr, "\n");
fprintf(stderr, "pixel = ");
print_pixel_type_list(net.type_to_pixels);
fprintf(stderr, "\n");
fprintf(stderr, "thread = %d\n", net.quantize_num_threads);
fprintf(stderr, "method = %s\n", method.c_str());
fprintf(stderr, "---------------------------------------\n");
}
if (method == "kl")
{
net.quantize_KL();
}
else if (method == "aciq")
{
net.quantize_ACIQ();
}
else if (method == "eq")
{
net.quantize_EQ();
}
else
{
fprintf(stderr, "not implemented yet !\n");
fprintf(stderr, "unknown method %s, expect kl / aciq / eq\n", method.c_str());
return -1;
}
net.print_quant_info();
net.save_table(outtable);
return 0;
}