feat: 切换后端至PaddleOCR-NCNN,切换工程为CMake
1.项目后端整体迁移至PaddleOCR-NCNN算法,已通过基本的兼容性测试 2.工程改为使用CMake组织,后续为了更好地兼容第三方库,不再提供QMake工程 3.重整权利声明文件,重整代码工程,确保最小化侵权风险 Log: 切换后端至PaddleOCR-NCNN,切换工程为CMake Change-Id: I4d5d2c5d37505a4a24b389b1a4c5d12f17bfa38c
This commit is contained in:
parent
ecdd171c6f
commit
718c41634f
4
.gitignore
vendored
4
.gitignore
vendored
@ -13,7 +13,7 @@
|
||||
*.a
|
||||
|
||||
build*/
|
||||
include*/
|
||||
#include*/
|
||||
|
||||
*.txt.user*
|
||||
*.pro.user*
|
||||
@ -21,7 +21,7 @@ include*/
|
||||
*.qm
|
||||
*.autosave
|
||||
|
||||
vendor/include/*
|
||||
#vendor/include/*
|
||||
vendor/lib/*
|
||||
|
||||
.vscode/*
|
||||
|
4631
3rdparty/clipper/clipper.cpp
vendored
Normal file
4631
3rdparty/clipper/clipper.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
406
3rdparty/clipper/clipper.hpp
vendored
Normal file
406
3rdparty/clipper/clipper.hpp
vendored
Normal file
@ -0,0 +1,406 @@
|
||||
/*******************************************************************************
|
||||
* *
|
||||
* Author : Angus Johnson *
|
||||
* Version : 6.4.2 *
|
||||
* Date : 27 February 2017 *
|
||||
* Website : http://www.angusj.com *
|
||||
* Copyright : Angus Johnson 2010-2017 *
|
||||
* *
|
||||
* License: *
|
||||
* Use, modification & distribution is subject to Boost Software License Ver 1. *
|
||||
* http://www.boost.org/LICENSE_1_0.txt *
|
||||
* *
|
||||
* Attributions: *
|
||||
* The code in this library is an extension of Bala Vatti's clipping algorithm: *
|
||||
* "A generic solution to polygon clipping" *
|
||||
* Communications of the ACM, Vol 35, Issue 7 (July 1992) pp 56-63. *
|
||||
* http://portal.acm.org/citation.cfm?id=129906 *
|
||||
* *
|
||||
* Computer graphics and geometric modeling: implementation and algorithms *
|
||||
* By Max K. Agoston *
|
||||
* Springer; 1 edition (January 4, 2005) *
|
||||
* http://books.google.com/books?q=vatti+clipping+agoston *
|
||||
* *
|
||||
* See also: *
|
||||
* "Polygon Offsetting by Computing Winding Numbers" *
|
||||
* Paper no. DETC2005-85513 pp. 565-575 *
|
||||
* ASME 2005 International Design Engineering Technical Conferences *
|
||||
* and Computers and Information in Engineering Conference (IDETC/CIE2005) *
|
||||
* September 24-28, 2005 , Long Beach, California, USA *
|
||||
* http://www.me.berkeley.edu/~mcmains/pubs/DAC05OffsetPolygon.pdf *
|
||||
* *
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef clipper_hpp
|
||||
#define clipper_hpp
|
||||
|
||||
#define CLIPPER_VERSION "6.4.2"
|
||||
|
||||
//use_int32: When enabled 32bit ints are used instead of 64bit ints. This
|
||||
//improve performance but coordinate values are limited to the range +/- 46340
|
||||
//#define use_int32
|
||||
|
||||
//use_xyz: adds a Z member to IntPoint. Adds a minor cost to perfomance.
|
||||
//#define use_xyz
|
||||
|
||||
//use_lines: Enables line clipping. Adds a very minor cost to performance.
|
||||
#define use_lines
|
||||
|
||||
//use_deprecated: Enables temporary support for the obsolete functions
|
||||
//#define use_deprecated
|
||||
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <ostream>
|
||||
#include <functional>
|
||||
#include <queue>
|
||||
|
||||
namespace ClipperLib {
|
||||
|
||||
enum ClipType { ctIntersection, ctUnion, ctDifference, ctXor };
|
||||
enum PolyType { ptSubject, ptClip };
|
||||
//By far the most widely used winding rules for polygon filling are
|
||||
//EvenOdd & NonZero (GDI, GDI+, XLib, OpenGL, Cairo, AGG, Quartz, SVG, Gr32)
|
||||
//Others rules include Positive, Negative and ABS_GTR_EQ_TWO (only in OpenGL)
|
||||
//see http://glprogramming.com/red/chapter11.html
|
||||
enum PolyFillType { pftEvenOdd, pftNonZero, pftPositive, pftNegative };
|
||||
|
||||
#ifdef use_int32
|
||||
typedef int cInt;
|
||||
static cInt const loRange = 0x7FFF;
|
||||
static cInt const hiRange = 0x7FFF;
|
||||
#else
|
||||
typedef signed long long cInt;
|
||||
static cInt const loRange = 0x3FFFFFFF;
|
||||
static cInt const hiRange = 0x3FFFFFFFFFFFFFFFLL;
|
||||
typedef signed long long long64; //used by Int128 class
|
||||
typedef unsigned long long ulong64;
|
||||
|
||||
#endif
|
||||
|
||||
struct IntPoint {
|
||||
cInt X;
|
||||
cInt Y;
|
||||
#ifdef use_xyz
|
||||
cInt Z;
|
||||
IntPoint(cInt x = 0, cInt y = 0, cInt z = 0): X(x), Y(y), Z(z) {};
|
||||
#else
|
||||
IntPoint(cInt x = 0, cInt y = 0): X(x), Y(y) {};
|
||||
#endif
|
||||
|
||||
friend inline bool operator== (const IntPoint& a, const IntPoint& b)
|
||||
{
|
||||
return a.X == b.X && a.Y == b.Y;
|
||||
}
|
||||
friend inline bool operator!= (const IntPoint& a, const IntPoint& b)
|
||||
{
|
||||
return a.X != b.X || a.Y != b.Y;
|
||||
}
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
typedef std::vector< IntPoint > Path;
|
||||
typedef std::vector< Path > Paths;
|
||||
|
||||
inline Path& operator <<(Path& poly, const IntPoint& p) {poly.push_back(p); return poly;}
|
||||
inline Paths& operator <<(Paths& polys, const Path& p) {polys.push_back(p); return polys;}
|
||||
|
||||
std::ostream& operator <<(std::ostream &s, const IntPoint &p);
|
||||
std::ostream& operator <<(std::ostream &s, const Path &p);
|
||||
std::ostream& operator <<(std::ostream &s, const Paths &p);
|
||||
|
||||
struct DoublePoint
|
||||
{
|
||||
double X;
|
||||
double Y;
|
||||
DoublePoint(double x = 0, double y = 0) : X(x), Y(y) {}
|
||||
DoublePoint(IntPoint ip) : X((double)ip.X), Y((double)ip.Y) {}
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifdef use_xyz
|
||||
typedef void (*ZFillCallback)(IntPoint& e1bot, IntPoint& e1top, IntPoint& e2bot, IntPoint& e2top, IntPoint& pt);
|
||||
#endif
|
||||
|
||||
enum InitOptions {ioReverseSolution = 1, ioStrictlySimple = 2, ioPreserveCollinear = 4};
|
||||
enum JoinType {jtSquare, jtRound, jtMiter};
|
||||
enum EndType {etClosedPolygon, etClosedLine, etOpenButt, etOpenSquare, etOpenRound};
|
||||
|
||||
class PolyNode;
|
||||
typedef std::vector< PolyNode* > PolyNodes;
|
||||
|
||||
class PolyNode
|
||||
{
|
||||
public:
|
||||
PolyNode();
|
||||
virtual ~PolyNode(){};
|
||||
Path Contour;
|
||||
PolyNodes Childs;
|
||||
PolyNode* Parent;
|
||||
PolyNode* GetNext() const;
|
||||
bool IsHole() const;
|
||||
bool IsOpen() const;
|
||||
int ChildCount() const;
|
||||
private:
|
||||
//PolyNode& operator =(PolyNode& other);
|
||||
unsigned Index; //node index in Parent.Childs
|
||||
bool m_IsOpen;
|
||||
JoinType m_jointype;
|
||||
EndType m_endtype;
|
||||
PolyNode* GetNextSiblingUp() const;
|
||||
void AddChild(PolyNode& child);
|
||||
friend class Clipper; //to access Index
|
||||
friend class ClipperOffset;
|
||||
};
|
||||
|
||||
class PolyTree: public PolyNode
|
||||
{
|
||||
public:
|
||||
~PolyTree(){ Clear(); };
|
||||
PolyNode* GetFirst() const;
|
||||
void Clear();
|
||||
int Total() const;
|
||||
private:
|
||||
//PolyTree& operator =(PolyTree& other);
|
||||
PolyNodes AllNodes;
|
||||
friend class Clipper; //to access AllNodes
|
||||
};
|
||||
|
||||
bool Orientation(const Path &poly);
|
||||
double Area(const Path &poly);
|
||||
int PointInPolygon(const IntPoint &pt, const Path &path);
|
||||
|
||||
void SimplifyPolygon(const Path &in_poly, Paths &out_polys, PolyFillType fillType = pftEvenOdd);
|
||||
void SimplifyPolygons(const Paths &in_polys, Paths &out_polys, PolyFillType fillType = pftEvenOdd);
|
||||
void SimplifyPolygons(Paths &polys, PolyFillType fillType = pftEvenOdd);
|
||||
|
||||
void CleanPolygon(const Path& in_poly, Path& out_poly, double distance = 1.415);
|
||||
void CleanPolygon(Path& poly, double distance = 1.415);
|
||||
void CleanPolygons(const Paths& in_polys, Paths& out_polys, double distance = 1.415);
|
||||
void CleanPolygons(Paths& polys, double distance = 1.415);
|
||||
|
||||
void MinkowskiSum(const Path& pattern, const Path& path, Paths& solution, bool pathIsClosed);
|
||||
void MinkowskiSum(const Path& pattern, const Paths& paths, Paths& solution, bool pathIsClosed);
|
||||
void MinkowskiDiff(const Path& poly1, const Path& poly2, Paths& solution);
|
||||
|
||||
void PolyTreeToPaths(const PolyTree& polytree, Paths& paths);
|
||||
void ClosedPathsFromPolyTree(const PolyTree& polytree, Paths& paths);
|
||||
void OpenPathsFromPolyTree(PolyTree& polytree, Paths& paths);
|
||||
|
||||
void ReversePath(Path& p);
|
||||
void ReversePaths(Paths& p);
|
||||
|
||||
struct IntRect { cInt left; cInt top; cInt right; cInt bottom; };
|
||||
|
||||
//enums that are used internally ...
|
||||
enum EdgeSide { esLeft = 1, esRight = 2};
|
||||
|
||||
//forward declarations (for stuff used internally) ...
|
||||
struct TEdge;
|
||||
struct IntersectNode;
|
||||
struct LocalMinimum;
|
||||
struct OutPt;
|
||||
struct OutRec;
|
||||
struct Join;
|
||||
|
||||
typedef std::vector < OutRec* > PolyOutList;
|
||||
typedef std::vector < TEdge* > EdgeList;
|
||||
typedef std::vector < Join* > JoinList;
|
||||
typedef std::vector < IntersectNode* > IntersectList;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
//ClipperBase is the ancestor to the Clipper class. It should not be
|
||||
//instantiated directly. This class simply abstracts the conversion of sets of
|
||||
//polygon coordinates into edge objects that are stored in a LocalMinima list.
|
||||
class ClipperBase
|
||||
{
|
||||
public:
|
||||
ClipperBase();
|
||||
virtual ~ClipperBase();
|
||||
virtual bool AddPath(const Path &pg, PolyType PolyTyp, bool Closed);
|
||||
bool AddPaths(const Paths &ppg, PolyType PolyTyp, bool Closed);
|
||||
virtual void Clear();
|
||||
IntRect GetBounds();
|
||||
bool PreserveCollinear() {return m_PreserveCollinear;};
|
||||
void PreserveCollinear(bool value) {m_PreserveCollinear = value;};
|
||||
protected:
|
||||
void DisposeLocalMinimaList();
|
||||
TEdge* AddBoundsToLML(TEdge *e, bool IsClosed);
|
||||
virtual void Reset();
|
||||
TEdge* ProcessBound(TEdge* E, bool IsClockwise);
|
||||
void InsertScanbeam(const cInt Y);
|
||||
bool PopScanbeam(cInt &Y);
|
||||
bool LocalMinimaPending();
|
||||
bool PopLocalMinima(cInt Y, const LocalMinimum *&locMin);
|
||||
OutRec* CreateOutRec();
|
||||
void DisposeAllOutRecs();
|
||||
void DisposeOutRec(PolyOutList::size_type index);
|
||||
void SwapPositionsInAEL(TEdge *edge1, TEdge *edge2);
|
||||
void DeleteFromAEL(TEdge *e);
|
||||
void UpdateEdgeIntoAEL(TEdge *&e);
|
||||
|
||||
typedef std::vector<LocalMinimum> MinimaList;
|
||||
MinimaList::iterator m_CurrentLM;
|
||||
MinimaList m_MinimaList;
|
||||
|
||||
bool m_UseFullRange;
|
||||
EdgeList m_edges;
|
||||
bool m_PreserveCollinear;
|
||||
bool m_HasOpenPaths;
|
||||
PolyOutList m_PolyOuts;
|
||||
TEdge *m_ActiveEdges;
|
||||
|
||||
typedef std::priority_queue<cInt> ScanbeamList;
|
||||
ScanbeamList m_Scanbeam;
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class Clipper : public virtual ClipperBase
|
||||
{
|
||||
public:
|
||||
Clipper(int initOptions = 0);
|
||||
bool Execute(ClipType clipType,
|
||||
Paths &solution,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
bool Execute(ClipType clipType,
|
||||
Paths &solution,
|
||||
PolyFillType subjFillType,
|
||||
PolyFillType clipFillType);
|
||||
bool Execute(ClipType clipType,
|
||||
PolyTree &polytree,
|
||||
PolyFillType fillType = pftEvenOdd);
|
||||
bool Execute(ClipType clipType,
|
||||
PolyTree &polytree,
|
||||
PolyFillType subjFillType,
|
||||
PolyFillType clipFillType);
|
||||
bool ReverseSolution() { return m_ReverseOutput; };
|
||||
void ReverseSolution(bool value) {m_ReverseOutput = value;};
|
||||
bool StrictlySimple() {return m_StrictSimple;};
|
||||
void StrictlySimple(bool value) {m_StrictSimple = value;};
|
||||
//set the callback function for z value filling on intersections (otherwise Z is 0)
|
||||
#ifdef use_xyz
|
||||
void ZFillFunction(ZFillCallback zFillFunc);
|
||||
#endif
|
||||
protected:
|
||||
virtual bool ExecuteInternal();
|
||||
private:
|
||||
JoinList m_Joins;
|
||||
JoinList m_GhostJoins;
|
||||
IntersectList m_IntersectList;
|
||||
ClipType m_ClipType;
|
||||
typedef std::list<cInt> MaximaList;
|
||||
MaximaList m_Maxima;
|
||||
TEdge *m_SortedEdges;
|
||||
bool m_ExecuteLocked;
|
||||
PolyFillType m_ClipFillType;
|
||||
PolyFillType m_SubjFillType;
|
||||
bool m_ReverseOutput;
|
||||
bool m_UsingPolyTree;
|
||||
bool m_StrictSimple;
|
||||
#ifdef use_xyz
|
||||
ZFillCallback m_ZFill; //custom callback
|
||||
#endif
|
||||
void SetWindingCount(TEdge& edge);
|
||||
bool IsEvenOddFillType(const TEdge& edge) const;
|
||||
bool IsEvenOddAltFillType(const TEdge& edge) const;
|
||||
void InsertLocalMinimaIntoAEL(const cInt botY);
|
||||
void InsertEdgeIntoAEL(TEdge *edge, TEdge* startEdge);
|
||||
void AddEdgeToSEL(TEdge *edge);
|
||||
bool PopEdgeFromSEL(TEdge *&edge);
|
||||
void CopyAELToSEL();
|
||||
void DeleteFromSEL(TEdge *e);
|
||||
void SwapPositionsInSEL(TEdge *edge1, TEdge *edge2);
|
||||
bool IsContributing(const TEdge& edge) const;
|
||||
bool IsTopHorz(const cInt XPos);
|
||||
void DoMaxima(TEdge *e);
|
||||
void ProcessHorizontals();
|
||||
void ProcessHorizontal(TEdge *horzEdge);
|
||||
void AddLocalMaxPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
|
||||
OutPt* AddLocalMinPoly(TEdge *e1, TEdge *e2, const IntPoint &pt);
|
||||
OutRec* GetOutRec(int idx);
|
||||
void AppendPolygon(TEdge *e1, TEdge *e2);
|
||||
void IntersectEdges(TEdge *e1, TEdge *e2, IntPoint &pt);
|
||||
OutPt* AddOutPt(TEdge *e, const IntPoint &pt);
|
||||
OutPt* GetLastOutPt(TEdge *e);
|
||||
bool ProcessIntersections(const cInt topY);
|
||||
void BuildIntersectList(const cInt topY);
|
||||
void ProcessIntersectList();
|
||||
void ProcessEdgesAtTopOfScanbeam(const cInt topY);
|
||||
void BuildResult(Paths& polys);
|
||||
void BuildResult2(PolyTree& polytree);
|
||||
void SetHoleState(TEdge *e, OutRec *outrec);
|
||||
void DisposeIntersectNodes();
|
||||
bool FixupIntersectionOrder();
|
||||
void FixupOutPolygon(OutRec &outrec);
|
||||
void FixupOutPolyline(OutRec &outrec);
|
||||
bool IsHole(TEdge *e);
|
||||
bool FindOwnerFromSplitRecs(OutRec &outRec, OutRec *&currOrfl);
|
||||
void FixHoleLinkage(OutRec &outrec);
|
||||
void AddJoin(OutPt *op1, OutPt *op2, const IntPoint offPt);
|
||||
void ClearJoins();
|
||||
void ClearGhostJoins();
|
||||
void AddGhostJoin(OutPt *op, const IntPoint offPt);
|
||||
bool JoinPoints(Join *j, OutRec* outRec1, OutRec* outRec2);
|
||||
void JoinCommonEdges();
|
||||
void DoSimplePolygons();
|
||||
void FixupFirstLefts1(OutRec* OldOutRec, OutRec* NewOutRec);
|
||||
void FixupFirstLefts2(OutRec* InnerOutRec, OutRec* OuterOutRec);
|
||||
void FixupFirstLefts3(OutRec* OldOutRec, OutRec* NewOutRec);
|
||||
#ifdef use_xyz
|
||||
void SetZ(IntPoint& pt, TEdge& e1, TEdge& e2);
|
||||
#endif
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class ClipperOffset
|
||||
{
|
||||
public:
|
||||
ClipperOffset(double miterLimit = 2.0, double roundPrecision = 0.25);
|
||||
~ClipperOffset();
|
||||
void AddPath(const Path& path, JoinType joinType, EndType endType);
|
||||
void AddPaths(const Paths& paths, JoinType joinType, EndType endType);
|
||||
void Execute(Paths& solution, double delta);
|
||||
void Execute(PolyTree& solution, double delta);
|
||||
void Clear();
|
||||
double MiterLimit;
|
||||
double ArcTolerance;
|
||||
private:
|
||||
Paths m_destPolys;
|
||||
Path m_srcPoly;
|
||||
Path m_destPoly;
|
||||
std::vector<DoublePoint> m_normals;
|
||||
double m_delta, m_sinA, m_sin, m_cos;
|
||||
double m_miterLim, m_StepsPerRad;
|
||||
IntPoint m_lowest;
|
||||
PolyNode m_polyNodes;
|
||||
|
||||
void FixOrientations();
|
||||
void DoOffset(double delta);
|
||||
void OffsetPoint(int j, int& k, JoinType jointype);
|
||||
void DoSquare(int j, int k);
|
||||
void DoMiter(int j, int k, double r);
|
||||
void DoRound(int j, int k);
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
class clipperException : public std::exception
|
||||
{
|
||||
public:
|
||||
clipperException(const char* description): m_descr(description) {}
|
||||
virtual ~clipperException() throw() {}
|
||||
virtual const char* what() const throw() {return m_descr.c_str();}
|
||||
private:
|
||||
std::string m_descr;
|
||||
};
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
} //ClipperLib namespace
|
||||
|
||||
#endif //clipper_hpp
|
||||
|
||||
|
414
3rdparty/ncnn/CMakeLists.txt
vendored
Normal file
414
3rdparty/ncnn/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,414 @@
|
||||
if(CMAKE_TOOLCHAIN_FILE)
|
||||
set(LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_BINARY_DIR} CACHE PATH "root for library output, set this to change where android libs are compiled to")
|
||||
# get absolute path, but get_filename_component ABSOLUTE only refer with source dir, so find_file here :(
|
||||
get_filename_component(CMAKE_TOOLCHAIN_FILE_NAME ${CMAKE_TOOLCHAIN_FILE} NAME)
|
||||
find_file(CMAKE_TOOLCHAIN_FILE ${CMAKE_TOOLCHAIN_FILE_NAME} PATHS ${CMAKE_SOURCE_DIR} NO_DEFAULT_PATH)
|
||||
message(STATUS "CMAKE_TOOLCHAIN_FILE = ${CMAKE_TOOLCHAIN_FILE}")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
|
||||
set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE PATH "Installation Directory")
|
||||
endif()
|
||||
message(STATUS "CMAKE_INSTALL_PREFIX = ${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
if(NOT DEFINED NCNN_VERSION)
|
||||
string(TIMESTAMP NCNN_VERSION "%Y%m%d")
|
||||
endif()
|
||||
|
||||
set(NCNN_VERSION_MAJOR 1)
|
||||
set(NCNN_VERSION_MINOR 0)
|
||||
set(NCNN_VERSION_PATCH ${NCNN_VERSION})
|
||||
set(NCNN_VERSION_STRING ${NCNN_VERSION_MAJOR}.${NCNN_VERSION_MINOR}.${NCNN_VERSION_PATCH})
|
||||
if(APPLE OR IOS)
|
||||
# macos / ios only accepts a.b.c.d.e where a=24bit b/c/d/e=10bit
|
||||
# 20201228 to 20.12.28
|
||||
string(SUBSTRING ${NCNN_VERSION} 2 2 NCNN_VERSION_YEAR)
|
||||
string(SUBSTRING ${NCNN_VERSION} 4 2 NCNN_VERSION_MONTH)
|
||||
string(SUBSTRING ${NCNN_VERSION} 6 2 NCNN_VERSION_DAY)
|
||||
set(NCNN_VERSION_STRING ${NCNN_VERSION_MAJOR}.${NCNN_VERSION_MINOR}.${NCNN_VERSION_YEAR}.${NCNN_VERSION_MONTH}.${NCNN_VERSION_DAY})
|
||||
endif()
|
||||
message(STATUS "NCNN_VERSION_STRING = ${NCNN_VERSION_STRING}")
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE release CACHE STRING "Choose the type of build" FORCE)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_VERSION VERSION_LESS "3.15")
|
||||
# enable CMAKE_MSVC_RUNTIME_LIBRARY
|
||||
cmake_policy(SET CMP0091 NEW)
|
||||
endif()
|
||||
|
||||
if(POLICY CMP0025)
|
||||
# reference from https://cmake.org/cmake/help/latest/policy/CMP0025.html
|
||||
cmake_policy(SET CMP0025 NEW)
|
||||
endif()
|
||||
|
||||
project(ncnn)
|
||||
|
||||
if(MSVC AND NOT CMAKE_VERSION VERSION_LESS "3.15")
|
||||
option(NCNN_BUILD_WITH_STATIC_CRT "Enables use of statically linked CRT for statically linked ncnn" OFF)
|
||||
if(NCNN_BUILD_WITH_STATIC_CRT)
|
||||
# cmake before version 3.15 not work
|
||||
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(NCNN_SHARED_LIB "shared library support" OFF)
|
||||
option(NCNN_ENABLE_LTO "enable link-time optimization" OFF)
|
||||
option(NCNN_OPENMP "openmp support" ON)
|
||||
option(NCNN_STDIO "load model from external file" ON)
|
||||
option(NCNN_STRING "plain and verbose string" ON)
|
||||
option(NCNN_INSTALL_SDK "install ncnn library and headers" ON)
|
||||
option(NCNN_SIMPLEOCV "minimal opencv structure emulation" OFF)
|
||||
option(NCNN_SIMPLEOMP "minimal openmp runtime emulation" OFF)
|
||||
option(NCNN_SIMPLESTL "minimal cpp stl structure emulation" OFF)
|
||||
option(NCNN_THREADS "build with threads" ON)
|
||||
option(NCNN_BENCHMARK "print benchmark information for every layer" OFF)
|
||||
option(NCNN_C_API "build with C api" ON)
|
||||
option(NCNN_PLATFORM_API "build with platform api candy" ON)
|
||||
option(NCNN_PIXEL "convert and resize from/to image pixel" ON)
|
||||
option(NCNN_PIXEL_ROTATE "rotate image pixel orientation" ON)
|
||||
option(NCNN_PIXEL_AFFINE "warp affine image pixel" ON)
|
||||
option(NCNN_PIXEL_DRAWING "draw basic figure and text" ON)
|
||||
option(NCNN_CMAKE_VERBOSE "print verbose cmake messages" OFF)
|
||||
option(NCNN_VULKAN "vulkan compute support" OFF)
|
||||
option(NCNN_SYSTEM_GLSLANG "use system glslang library" OFF)
|
||||
option(NCNN_RUNTIME_CPU "runtime dispatch cpu routines" ON)
|
||||
option(NCNN_DISABLE_PIC "disable position-independent code" OFF)
|
||||
option(NCNN_BUILD_TESTS "build tests" OFF)
|
||||
option(NCNN_COVERAGE "build for coverage" OFF)
|
||||
option(NCNN_BUILD_BENCHMARK "build benchmark" ON)
|
||||
option(NCNN_PYTHON "build python api" OFF)
|
||||
option(NCNN_INT8 "int8 inference" ON)
|
||||
option(NCNN_BF16 "bf16 inference" ON)
|
||||
option(NCNN_FORCE_INLINE "force inline some function" ON)
|
||||
|
||||
if(ANDROID OR IOS OR NCNN_SIMPLESTL OR CMAKE_CROSSCOMPILING)
|
||||
option(NCNN_DISABLE_RTTI "disable rtti" ON)
|
||||
option(NCNN_BUILD_TOOLS "build tools" OFF)
|
||||
option(NCNN_BUILD_EXAMPLES "build examples" OFF)
|
||||
else()
|
||||
option(NCNN_DISABLE_RTTI "disable rtti" OFF)
|
||||
option(NCNN_BUILD_TOOLS "build tools" ON)
|
||||
option(NCNN_BUILD_EXAMPLES "build examples" ON)
|
||||
endif()
|
||||
|
||||
if(ANDROID OR IOS OR LINUX OR NCNN_SIMPLESTL)
|
||||
option(NCNN_DISABLE_EXCEPTION "disable exception" ON)
|
||||
else()
|
||||
option(NCNN_DISABLE_EXCEPTION "disable exception" OFF)
|
||||
endif()
|
||||
|
||||
if(NCNN_SHARED_LIB)
|
||||
if(NCNN_BUILD_TESTS)
|
||||
message(WARNING "NCNN_SHARED_LIB must be OFF to build tests! NCNN_BUILD_TESTS will be turned off.")
|
||||
set(NCNN_BUILD_TESTS OFF)
|
||||
endif()
|
||||
|
||||
if(NCNN_ENABLE_LTO)
|
||||
# enable global link time optimization
|
||||
cmake_policy(SET CMP0069 NEW)
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
|
||||
include(CheckIPOSupported)
|
||||
check_ipo_supported(RESULT ipo_supported OUTPUT ipo_supported_output)
|
||||
if(ipo_supported)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||
else()
|
||||
message(WARNING "IPO is not supported: ${ipo_supported_output}")
|
||||
set(NCNN_ENABLE_LTO OFF)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT NCNN_STDIO OR NOT NCNN_STRING)
|
||||
if(NCNN_BUILD_TOOLS)
|
||||
message(WARNING "NCNN_STDIO or NCNN_STRING disabled, NCNN_BUILD_TOOLS will be turned off.")
|
||||
set(NCNN_BUILD_TOOLS OFF)
|
||||
endif()
|
||||
if(NCNN_BUILD_EXAMPLES)
|
||||
message(WARNING "NCNN_STDIO or NCNN_STRING disabled, NCNN_BUILD_EXAMPLES will be turned off.")
|
||||
set(NCNN_BUILD_EXAMPLES OFF)
|
||||
endif()
|
||||
if(NCNN_BUILD_BENCHMARK)
|
||||
message(WARNING "NCNN_STDIO or NCNN_STRING disabled, NCNN_BUILD_BENCHMARK will be turned off.")
|
||||
set(NCNN_BUILD_BENCHMARK OFF)
|
||||
endif()
|
||||
if(NCNN_BUILD_TESTS)
|
||||
message(WARNING "NCNN_STDIO or NCNN_STRING disabled, NCNN_BUILD_TESTS will be turned off.")
|
||||
set(NCNN_BUILD_TESTS OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
##############################################
|
||||
|
||||
if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
|
||||
OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||
OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")
|
||||
OR ((CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")) AND (${CMAKE_GENERATOR_PLATFORM} MATCHES "^(arm|arm64)")))
|
||||
set(NCNN_TARGET_ARCH arm)
|
||||
|
||||
if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")))
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("-march=armv8.2-a+fp16" NCNN_COMPILER_SUPPORT_ARM82_FP16)
|
||||
check_cxx_compiler_flag("-march=armv8.2-a+fp16+dotprod" NCNN_COMPILER_SUPPORT_ARM82_FP16_DOTPROD)
|
||||
endif()
|
||||
|
||||
if(NCNN_COMPILER_SUPPORT_ARM82_FP16)
|
||||
option(NCNN_ARM82 "optimize aarch64 platform with armv8.2" ON)
|
||||
if(NCNN_COMPILER_SUPPORT_ARM82_FP16_DOTPROD)
|
||||
if(NCNN_ARM82)
|
||||
option(NCNN_ARM82DOT "optimize aarch64 platform with armv8.2 dotprod" ON)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support armv8.2 dotprod. NCNN_ARM82DOT will be OFF.")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support armv8.2. NCNN_ARM82 will be OFF.")
|
||||
endif()
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(mips)")
|
||||
set(NCNN_TARGET_ARCH mips)
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("-mmsa" NCNN_COMPILER_SUPPORT_MIPS_MSA)
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS "-mloongson-mmi -I${CMAKE_CURRENT_SOURCE_DIR}/src/layer/mips")
|
||||
check_cxx_source_compiles("#include \"loongson_mmi.h\"\nint main() { int16x4_t _a, _b; int32x2_t _s = __mmi_pmaddhw(_a, _b); return 0; }" NCNN_COMPILER_SUPPORT_LOONGSON_MMI)
|
||||
|
||||
if(NCNN_COMPILER_SUPPORT_MIPS_MSA)
|
||||
option(NCNN_MSA "optimize mips platform with msa extension" ON)
|
||||
else()
|
||||
message(WARNING "The compiler does not support msa extension. NCNN_MSA will be OFF.")
|
||||
endif()
|
||||
if(NCNN_COMPILER_SUPPORT_LOONGSON_MMI)
|
||||
option(NCNN_MMI "optimize mips platform with loongson mmi extension" ON)
|
||||
else()
|
||||
message(WARNING "The compiler does not support loongson mmi extension. NCNN_MMI will be OFF.")
|
||||
endif()
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
|
||||
set(NCNN_TARGET_ARCH riscv)
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("-march=rv64gcv" NCNN_COMPILER_SUPPORT_RVV)
|
||||
check_cxx_compiler_flag("-march=rv64gcv_zfh" NCNN_COMPILER_SUPPORT_RVV_FP16)
|
||||
|
||||
if(NCNN_COMPILER_SUPPORT_RVV)
|
||||
option(NCNN_RVV "optimize risc-v platform with v extension" ON)
|
||||
if(NOT NCNN_COMPILER_SUPPORT_RVV_FP16)
|
||||
message(WARNING "The compiler does not support risc-v zfh extension. Upgrading your toolchain is strongly recommended.")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support risc-v v extension. NCNN_RVV will be OFF.")
|
||||
endif()
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
|
||||
set(NCNN_TARGET_ARCH powerpc)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(loongarch)")
|
||||
set(NCNN_TARGET_ARCH loongarch)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(xtensa)")
|
||||
set(NCNN_TARGET_ARCH xtensa)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x)")
|
||||
set(NCNN_TARGET_ARCH s390x)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(sw_64)")
|
||||
set(NCNN_TARGET_ARCH sw_64)
|
||||
#sw_64 is alpha-like platform
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mieee")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mieee")
|
||||
else()
|
||||
set(NCNN_TARGET_ARCH x86)
|
||||
|
||||
option(NCNN_SSE2 "optimize x86 platform with sse2 extension" ON)
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
|
||||
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_AVX)
|
||||
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_FMA)
|
||||
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_XOP)
|
||||
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_F16C)
|
||||
check_cxx_compiler_flag("/arch:AVX2" NCNN_COMPILER_SUPPORT_X86_AVX2)
|
||||
check_cxx_compiler_flag("/arch:AVX2" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
|
||||
check_cxx_compiler_flag("/arch:AVX512" NCNN_COMPILER_SUPPORT_X86_AVX512)
|
||||
check_cxx_compiler_flag("/arch:AVX512" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.16)
|
||||
# vs2017+ supports avx512 and vnni
|
||||
set(NCNN_COMPILER_SUPPORT_X86_AVX_VNNI OFF)
|
||||
set(NCNN_COMPILER_SUPPORT_X86_AVX512 OFF)
|
||||
set(NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI OFF)
|
||||
endif()
|
||||
else()
|
||||
check_cxx_compiler_flag("-mavx" NCNN_COMPILER_SUPPORT_X86_AVX)
|
||||
check_cxx_compiler_flag("-mfma" NCNN_COMPILER_SUPPORT_X86_FMA)
|
||||
check_cxx_compiler_flag("-mxop" NCNN_COMPILER_SUPPORT_X86_XOP)
|
||||
check_cxx_compiler_flag("-mf16c" NCNN_COMPILER_SUPPORT_X86_F16C)
|
||||
check_cxx_compiler_flag("-mfma -mf16c -mavx2" NCNN_COMPILER_SUPPORT_X86_AVX2)
|
||||
check_cxx_compiler_flag("-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512)
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx2 -mavxvnni")
|
||||
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
|
||||
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
|
||||
|
||||
unset(CMAKE_REQUIRED_FLAGS)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NCNN_COMPILER_SUPPORT_X86_AVX)
|
||||
option(NCNN_AVX "optimize x86 platform with avx extension" ON)
|
||||
if(NCNN_COMPILER_SUPPORT_X86_FMA)
|
||||
if(NCNN_AVX)
|
||||
option(NCNN_FMA "optimize x86 platform with fma extension" ON)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support fma extension. NCNN_FMA will be OFF.")
|
||||
endif()
|
||||
if(NCNN_COMPILER_SUPPORT_X86_XOP)
|
||||
if(NCNN_AVX)
|
||||
option(NCNN_XOP "optimize x86 platform with xop extension" ON)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support xop extension. NCNN_XOP will be OFF.")
|
||||
endif()
|
||||
if(NCNN_COMPILER_SUPPORT_X86_F16C)
|
||||
if(NCNN_AVX)
|
||||
option(NCNN_F16C "optimize x86 platform with f16c extension" ON)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support f16c extension. NCNN_F16C will be OFF.")
|
||||
endif()
|
||||
if(NCNN_COMPILER_SUPPORT_X86_AVX2)
|
||||
if(NCNN_AVX)
|
||||
option(NCNN_AVX2 "optimize x86 platform with avx2 extension" ON)
|
||||
endif()
|
||||
if(NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
|
||||
if(NCNN_AVX2)
|
||||
option(NCNN_AVXVNNI "optimize x86 platform with avx vnni extension" ON)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support avx vnni extension. NCNN_AVXVNNI will be OFF.")
|
||||
endif()
|
||||
if(NCNN_COMPILER_SUPPORT_X86_AVX512)
|
||||
if(NCNN_AVX2)
|
||||
option(NCNN_AVX512 "optimize x86 platform with avx512 extension" ON)
|
||||
endif()
|
||||
if(NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
|
||||
if(NCNN_AVX512)
|
||||
option(NCNN_AVX512VNNI "optimize x86 platform with avx512 vnni extension" ON)
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support avx512 vnni extension. NCNN_AVX512VNNI will be OFF.")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support avx512 extension. NCNN_AVX512 will be OFF.")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support avx2 extension. NCNN_AVX2 will be OFF.")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "The compiler does not support avx extension. NCNN_AVX will be OFF.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
message(STATUS "Target arch: ${NCNN_TARGET_ARCH}")
|
||||
|
||||
##############################################
|
||||
|
||||
# set cmake default folder name
|
||||
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
||||
set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "cmake")
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
|
||||
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=256MB -s EXIT_RUNTIME=1")
|
||||
|
||||
if(NCNN_OPENMP AND NCNN_SIMPLEOMP)
|
||||
# TODO better flags for emscripten
|
||||
# node --experimental-wasm-threads xxx.js
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=15")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=15")
|
||||
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=15")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NCNN_VULKAN)
|
||||
if(NCNN_SYSTEM_GLSLANG)
|
||||
set(GLSLANG_TARGET_DIR "GLSLANG-NOTFOUND" CACHE PATH "Absolute path to glslangTargets.cmake directory")
|
||||
if(NOT GLSLANG_TARGET_DIR AND NOT DEFINED ENV{GLSLANG_TARGET_DIR})
|
||||
message(WARNING "GLSLANG_TARGET_DIR must be defined! NCNN_SYSTEM_GLSLANG will be turned off.")
|
||||
set(NCNN_SYSTEM_GLSLANG OFF)
|
||||
else()
|
||||
message(STATUS "Using glslang install located at ${GLSLANG_TARGET_DIR}")
|
||||
|
||||
find_package(Threads)
|
||||
|
||||
include("${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake")
|
||||
include("${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake")
|
||||
if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
|
||||
# hlsl support can be optional
|
||||
include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
|
||||
endif()
|
||||
include("${GLSLANG_TARGET_DIR}/glslangTargets.cmake")
|
||||
include("${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake")
|
||||
|
||||
if (NOT TARGET glslang OR NOT TARGET SPIRV)
|
||||
message(WARNING "glslang or SPIRV target not found! NCNN_SYSTEM_GLSLANG will be turned off.")
|
||||
set(NCNN_SYSTEM_GLSLANG OFF)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT NCNN_SYSTEM_GLSLANG)
|
||||
if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/glslang/CMakeLists.txt")
|
||||
message(FATAL_ERROR "The submodules were not downloaded! Please update submodules with \"git submodule update --init\" and try again.")
|
||||
else()
|
||||
# glslang requires c++11
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
option(BUILD_EXTERNAL "" OFF)
|
||||
option(ENABLE_SPVREMAPPER "" OFF)
|
||||
option(ENABLE_GLSLANG_BINARIES "" OFF)
|
||||
option(ENABLE_HLSL "" OFF)
|
||||
option(ENABLE_RTTI "" OFF)
|
||||
option(ENABLE_EXCEPTIONS "" OFF)
|
||||
option(ENABLE_OPT "" OFF)
|
||||
option(ENABLE_PCH "" OFF)
|
||||
option(ENABLE_CTEST "" OFF)
|
||||
if(NCNN_SHARED_LIB)
|
||||
option(SKIP_GLSLANG_INSTALL "" ON)
|
||||
endif()
|
||||
add_subdirectory(glslang)
|
||||
if(NCNN_SHARED_LIB)
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
|
||||
target_compile_options(glslang PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
|
||||
target_compile_options(OGLCompiler PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
|
||||
target_compile_options(OSDependent PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
|
||||
target_compile_options(SPIRV PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
|
||||
endif()
|
||||
if(NCNN_ENABLE_LTO)
|
||||
set_target_properties(glslang PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
set_target_properties(OGLCompiler PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
set_target_properties(OSDependent PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
set_target_properties(SPIRV PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_subdirectory(src)
|
||||
if(NCNN_BUILD_BENCHMARK)
|
||||
add_subdirectory(benchmark)
|
||||
endif()
|
||||
if(NCNN_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
||||
if(NCNN_BUILD_TOOLS)
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
if(NCNN_BUILD_TESTS)
|
||||
enable_testing()
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
if(NCNN_PYTHON)
|
||||
add_subdirectory(python)
|
||||
endif()
|
5
3rdparty/ncnn/CONTRIBUTING.md
vendored
Normal file
5
3rdparty/ncnn/CONTRIBUTING.md
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
## Acknowledgements
|
||||
- Thanks to bug1989 [https://github.com/bug1989] for contributing the initial quantized int8 inference code and a large variety of device benchmark
|
||||
- Thanks to zhiliu6 [https://github.com/zhiliu6] for contributing the darknet conversion tool, operators and YOLO examples
|
||||
- Thanks to Tijmen Verhulsdonck [https://github.com/Timen] for contributing the massive AVX optimization for x86 platform
|
18
3rdparty/ncnn/Info.plist
vendored
Normal file
18
3rdparty/ncnn/Info.plist
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleName</key>
|
||||
<string>__NAME__</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>__IDENTIFIER__</string>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>__VERSION__</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>__VERSION__</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>FMWK</string>
|
||||
</dict>
|
||||
</plist>
|
86
3rdparty/ncnn/LICENSE.txt
vendored
Normal file
86
3rdparty/ncnn/LICENSE.txt
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
Tencent is pleased to support the open source community by making ncnn available.
|
||||
Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
If you have downloaded a copy of the ncnn binary from Tencent, please note that the ncnn binary is licensed under the BSD 3-Clause License.
|
||||
If you have downloaded a copy of the ncnn source code from Tencent, please note that ncnn source code is licensed under the BSD 3-Clause License, except for the third-party components listed below which are subject to different license terms. Your integration of ncnn into your own projects may require compliance with the BSD 3-Clause License, as well as the other licenses applicable to the third-party components included within ncnn.
|
||||
A copy of the BSD 3-Clause License is included in this file.
|
||||
|
||||
Other dependencies and licenses:
|
||||
|
||||
Open Source Software Licensed Under the zlib License:
|
||||
The below software in this distribution may have been modified by THL A29 Limited (“Tencent Modifications”). All Tencent Modifications are Copyright (C) 2017 THL A29 Limited.
|
||||
----------------------------------------------------------------------------------------
|
||||
1. neon_mathfun.h
|
||||
Copyright (C) 2011 Julien Pommier
|
||||
|
||||
2. sse_mathfun.h
|
||||
Copyright (C) 2007 Julien Pommier
|
||||
|
||||
3. avx_mathfun.h
|
||||
Copyright (C) 2012 Giovanni Garberoglio
|
||||
Interdisciplinary Laboratory for Computational Science (LISC)
|
||||
Fondazione Bruno Kessler and University of Trento
|
||||
via Sommarive, 18
|
||||
I-38123 Trento (Italy)
|
||||
|
||||
|
||||
Terms of the zlib License:
|
||||
---------------------------------------------------
|
||||
Copyright (c) <year> <copyright holders>
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
|
||||
|
||||
Open Source Software Licensed Under the BSD 2-Clause License:
|
||||
The below software in this distribution may have been modified by THL A29 Limited (“Tencent Modifications”). All Tencent Modifications are Copyright (C) 2017 THL A29 Limited.
|
||||
----------------------------------------------------------------------------------------
|
||||
1. squeezenet 1.1
|
||||
Copyright (c) 2016 Forrest N. Iandola and Matthew W. Moskewicz and Khalid Ashraf and Song Han and William J. Dally and Kurt Keutzer
|
||||
All rights reserved.
|
||||
|
||||
2. caffe.proto master
|
||||
All contributions by the University of California:
|
||||
Copyright (c) 2014-2017 The Regents of the University of California (Regents)
|
||||
All rights reserved.
|
||||
|
||||
All other contributions:
|
||||
Copyright (c) 2014-2017, the respective contributors
|
||||
All rights reserved.
|
||||
|
||||
|
||||
Terms of the BSD 2-Clause License:
|
||||
--------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
|
||||
Open Source Software Licensed Under the BSD 3-Clause License:
|
||||
The below software in this distribution may have been modified by THL A29 Limited (“Tencent Modifications”). All Tencent Modifications are Copyright (C) 2017 THL A29 Limited.
|
||||
----------------------------------------------------------------------------------------
|
||||
1. android.toolchain.cmake master
|
||||
Copyright (c) 2010-2011, Ethan Rublee
|
||||
Copyright (c) 2011-2014, Andrey Kamaev
|
||||
All rights reserved.
|
||||
|
||||
|
||||
Terms of the BSD 3-Clause License:
|
||||
--------------------------------------------------------------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
Neither the name of [copyright holder] nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
183
3rdparty/ncnn/README.md
vendored
Normal file
183
3rdparty/ncnn/README.md
vendored
Normal file
@ -0,0 +1,183 @@
|
||||

|
||||
# ncnn
|
||||
|
||||
[](https://raw.githubusercontent.com/Tencent/ncnn/master/LICENSE.txt)
|
||||
[](https://github.com/Tencent/ncnn/releases)
|
||||
[](https://codecov.io/gh/Tencent/ncnn)
|
||||
[](https://lgtm.com/projects/g/Tencent/ncnn/context:cpp)
|
||||
|
||||
ncnn is a high-performance neural network inference computing framework optimized for mobile platforms. ncnn is deeply considerate about deployment and uses on mobile phones from the beginning of design. ncnn does not have third party dependencies. It is cross-platform, and runs faster than all known open source frameworks on mobile phone cpu. Developers can easily deploy deep learning algorithm models to the mobile platform by using efficient ncnn implementation, create intelligent APPs, and bring the artificial intelligence to your fingertips. ncnn is currently being used in many Tencent applications, such as QQ, Qzone, WeChat, Pitu and so on.
|
||||
|
||||
ncnn 是一个为手机端极致优化的高性能神经网络前向计算框架。ncnn 从设计之初深刻考虑手机端的部署和使用。无第三方依赖,跨平台,手机端 cpu 的速度快于目前所有已知的开源框架。基于 ncnn,开发者能够将深度学习算法轻松移植到手机端高效执行,开发出人工智能 APP,将 AI 带到你的指尖。ncnn 目前已在腾讯多款应用中使用,如 QQ,Qzone,微信,天天P图等。
|
||||
|
||||
---
|
||||
|
||||
### 技术交流QQ群:637093648(超多大佬) 答案:卷卷卷卷卷
|
||||
### Pocky群(MLIR YES!): 677104663(超多大佬) 答案:multi-level intermediate representation
|
||||
|
||||
### Telegram Group https://t.me/ncnnyes
|
||||
|
||||
### Discord Channel https://discord.gg/YRsxgmF
|
||||
|
||||
---
|
||||
|
||||
### Current building status matrix
|
||||
|
||||
| System | CPU (32bit) | CPU (64bit) | GPU (32bit) | GPU (64bit) |
|
||||
| :---: | :---: | :---: | :--: | :--: |
|
||||
| Linux (GCC) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-x86-cpu-gcc) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-x64-cpu-gcc) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-x64-gpu-gcc) |
|
||||
| Linux (Clang) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-x86-cpu-clang) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-x64-cpu-clang) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-x64-gpu-clang) |
|
||||
| Linux (ARM) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-arm-cpu-gcc) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-aarch64-cpu-gcc) | — | — |
|
||||
| Linux (MIPS) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-mips-cpu-gcc) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-mips64-cpu-gcc) | — | — |
|
||||
| Linux (RISC-V) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-riscv64-cpu-gcc) | — | — |
|
||||
| Linux (LoongArch) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Alinux-loongarch64-cpu-gcc) | — | — |
|
||||
| Windows | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Awindows-x86-cpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Awindows-x64-cpu) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Awindows-x64-gpu) |
|
||||
| Windows (ARM) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Awindows-arm-cpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Awindows-arm64-cpu) | — | — |
|
||||
| macOS | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Amacos-x64-cpu) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Amacos-x64-gpu) |
|
||||
| macOS (ARM) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Amacos-arm64-cpu) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Amacos-arm64-gpu) |
|
||||
| Android | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aandroid-armv7-cpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aandroid-armv8-cpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aandroid-armv7-gpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aandroid-armv8-gpu) |
|
||||
| Android-x86 | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aandroid-x86-cpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aandroid-x64-cpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aandroid-x86-gpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aandroid-x64-gpu) |
|
||||
| iOS | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aios-cpu) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aios-cpu) | — | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aios-arm64-gpu) |
|
||||
| iOS Simulator | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aios-simulator) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aios-simulator) | — | — |
|
||||
| WebAssembly | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aweb-assembly) | — | — | — |
|
||||
| RISC-V GCC/Newlib | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aelf-riscv32-cpu-gcc) | [](https://github.com/Tencent/ncnn/actions?query=workflow%3Aelf-riscv64-cpu-gcc) | — | — |
|
||||
|
||||
---
|
||||
|
||||
### Support most commonly used CNN network
|
||||
### 支持大部分常用的 CNN 网络
|
||||
|
||||
* Classical CNN: [VGG](https://github.com/BVLC/caffe/wiki/Model-Zoo#models-used-by-the-vgg-team-in-ilsvrc-2014) [AlexNet](https://github.com/BVLC/caffe/tree/9b891540183ddc834a02b2bd81b31afae71b2153/models/bvlc_alexnet) [GoogleNet](https://github.com/BVLC/caffe/tree/9b891540183ddc834a02b2bd81b31afae71b2153/models/bvlc_googlenet) Inception ...
|
||||
* Practical CNN: [ResNet](https://github.com/tornadomeet/ResNet) [DenseNet](https://github.com/liuzhuang13/DenseNet) [SENet](https://github.com/hujie-frank/SENet) [FPN](https://github.com/unsky/FPN) ...
|
||||
* Light-weight CNN: [SqueezeNet](https://github.com/forresti/SqueezeNet) [MobileNetV1](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md)/[V2/V3](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/README.md) [ShuffleNetV1](https://github.com/farmingyard/ShuffleNet)/[V2](https://github.com/opconty/keras-shufflenetV2) [MNasNet](https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet) ...
|
||||
* Face Detection: [MTCNN](https://github.com/ipazc/mtcnn) [RetinaFace](https://github.com/biubug6/Pytorch_Retinaface) [scrfd](https://github.com/nihui/ncnn-android-scrfd) ...
|
||||
* Detection: [VGG-SSD](https://github.com/lzx1413/CAFFE_SSD) [MobileNet-SSD](https://github.com/chuanqi305/MobileNet-SSD) [SqueezeNet-SSD](https://github.com/chuanqi305/SqueezeNet-SSD) [MobileNetV2-SSDLite](https://github.com/chuanqi305/MobileNetv2-SSDLite) [MobileNetV3-SSDLite](https://github.com/XiaoyuHuang96/MobilenetV3SSDLite-tfkeras) ...
|
||||
* Detection: [Faster-RCNN](https://github.com/rbgirshick/py-faster-rcnn) [R-FCN](https://github.com/daijifeng001/R-FCN) ...
|
||||
* Detection: [YOLOV2](https://github.com/longcw/yolo2-pytorch) [YOLOV3](https://github.com/ultralytics/yolov3) [MobileNet-YOLOV3](https://github.com/eric612/MobileNet-YOLO) [YOLOV4](https://github.com/Tianxiaomo/pytorch-YOLOv4) [YOLOV5](https://github.com/ultralytics/yolov5) [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) ...
|
||||
* Detection: [NanoDet](https://github.com/RangiLyu/nanodet)
|
||||
* Segmentation: [FCN](https://github.com/unsky/FPN) [PSPNet](https://github.com/hszhao/PSPNet) [UNet](https://github.com/zhixuhao/unet) [YOLACT](https://github.com/dbolya/yolact) ...
|
||||
* Pose Estimation: [SimplePose](https://github.com/dog-qiuqiu/Ultralight-SimplePose) ...
|
||||
|
||||
---
|
||||
|
||||
### HowTo
|
||||
|
||||
**[how to build ncnn library](https://github.com/Tencent/ncnn/wiki/how-to-build) on Linux / Windows / macOS / Raspberry Pi3 / Android / NVIDIA Jetson / iOS / WebAssembly / AllWinner D1 / Loongson 2K1000**
|
||||
|
||||
* [Build for Linux / NVIDIA Jetson / Raspberry Pi3](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux)
|
||||
* [Build for Windows x64 using VS2017](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-windows-x64-using-visual-studio-community-2017)
|
||||
* [Build for macOS](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-macos)
|
||||
* [Build for ARM Cortex-A family with cross-compiling](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-arm-cortex-a-family-with-cross-compiling)
|
||||
* [Build for Hisilicon platform with cross-compiling](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-hisilicon-platform-with-cross-compiling)
|
||||
* [Build for Android](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-android)
|
||||
* [Build for iOS on macOS with xcode](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-ios-on-macos-with-xcode)
|
||||
* [Build for WebAssembly](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-webassembly)
|
||||
* [Build for AllWinner D1](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-allwinner-d1)
|
||||
* [Build for Loongson 2K1000](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-loongson-2k1000)
|
||||
* [Build for termux on android](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-termux-on-android)
|
||||
|
||||
|
||||
**[download prebuild binary package for android and ios](https://github.com/Tencent/ncnn/releases)**
|
||||
|
||||
**[use ncnn with alexnet](https://github.com/Tencent/ncnn/wiki/use-ncnn-with-alexnet) with detailed steps, recommended for beginners :)**
|
||||
|
||||
**[ncnn 组件使用指北 alexnet](https://github.com/Tencent/ncnn/wiki/use-ncnn-with-alexnet.zh) 附带详细步骤,新人强烈推荐 :)**
|
||||
|
||||
**[use netron for ncnn model visualization](https://netron.app)**
|
||||
|
||||
**[out-of-the-box web model conversion](https://convertmodel.com/#outputFormat=ncnn)**
|
||||
|
||||
[ncnn low-level operation api](https://github.com/Tencent/ncnn/wiki/low-level-operation-api)
|
||||
|
||||
[ncnn param and model file spec](https://github.com/Tencent/ncnn/wiki/param-and-model-file-structure)
|
||||
|
||||
[ncnn operation param weight table](https://github.com/Tencent/ncnn/wiki/operation-param-weight-table)
|
||||
|
||||
[how to implement custom layer step by step](https://github.com/Tencent/ncnn/wiki/how-to-implement-custom-layer-step-by-step)
|
||||
|
||||
---
|
||||
|
||||
### FAQ
|
||||
|
||||
**[ncnn throw error](https://github.com/Tencent/ncnn/wiki/FAQ-ncnn-throw-error)**
|
||||
|
||||
**[ncnn produce wrong result](https://github.com/Tencent/ncnn/wiki/FAQ-ncnn-produce-wrong-result)**
|
||||
|
||||
**[ncnn vulkan](https://github.com/Tencent/ncnn/wiki/FAQ-ncnn-vulkan)**
|
||||
|
||||
---
|
||||
|
||||
### Features
|
||||
|
||||
* Supports convolutional neural networks, supports multiple input and multi-branch structure, can calculate part of the branch
|
||||
* No third-party library dependencies, does not rely on BLAS / NNPACK or any other computing framework
|
||||
* Pure C++ implementation, cross-platform, supports android, ios and so on
|
||||
* ARM NEON assembly level of careful optimization, calculation speed is extremely high
|
||||
* Sophisticated memory management and data structure design, very low memory footprint
|
||||
* Supports multi-core parallel computing acceleration, ARM big.LITTLE cpu scheduling optimization
|
||||
* Supports GPU acceleration via the next-generation low-overhead vulkan api
|
||||
* Extensible model design, supports 8bit quantization and half-precision floating point storage, can import caffe/pytorch/mxnet/onnx/darknet/keras/tensorflow(mlir) models
|
||||
* Support direct memory zero copy reference load network model
|
||||
* Can be registered with custom layer implementation and extended
|
||||
* Well, it is strong, not afraid of being stuffed with 卷 QvQ
|
||||
|
||||
### 功能概述
|
||||
|
||||
* 支持卷积神经网络,支持多输入和多分支结构,可计算部分分支
|
||||
* 无任何第三方库依赖,不依赖 BLAS/NNPACK 等计算框架
|
||||
* 纯 C++ 实现,跨平台,支持 android ios 等
|
||||
* ARM NEON 汇编级良心优化,计算速度极快
|
||||
* 精细的内存管理和数据结构设计,内存占用极低
|
||||
* 支持多核并行计算加速,ARM big.LITTLE cpu 调度优化
|
||||
* 支持基于全新低消耗的 vulkan api GPU 加速
|
||||
* 可扩展的模型设计,支持 8bit [量化](tools/quantize) 和半精度浮点存储,可导入 caffe/pytorch/mxnet/onnx/darknet/keras/tensorflow(mlir) 模型
|
||||
* 支持直接内存零拷贝引用加载网络模型
|
||||
* 可注册自定义层实现并扩展
|
||||
* 恩,很强就是了,不怕被塞卷 QvQ
|
||||
|
||||
---
|
||||
|
||||
### supported platform matrix
|
||||
|
||||
* ✅ = known work and runs fast with good optimization
|
||||
* ✔️ = known work, but speed may not be fast enough
|
||||
* ❔ = shall work, not confirmed
|
||||
* / = not applied
|
||||
|
||||
| |Windows|Linux|Android|macOS|iOS|
|
||||
|---|---|---|---|---|---|
|
||||
|intel-cpu|✔️|✔️|❔|✔️|/|
|
||||
|intel-gpu|✔️|✔️|❔|❔|/|
|
||||
|amd-cpu|✔️|✔️|❔|✔️|/|
|
||||
|amd-gpu|✔️|✔️|❔|❔|/|
|
||||
|nvidia-gpu|✔️|✔️|❔|❔|/|
|
||||
|qcom-cpu|❔|✔️|✅|/|/|
|
||||
|qcom-gpu|❔|✔️|✔️|/|/|
|
||||
|arm-cpu|❔|❔|✅|/|/|
|
||||
|arm-gpu|❔|❔|✔️|/|/|
|
||||
|apple-cpu|/|/|/|✔️|✅|
|
||||
|apple-gpu|/|/|/|✔️|✔️|
|
||||
|
||||
|
||||
---
|
||||
|
||||
### Example project
|
||||
|
||||
* https://github.com/nihui/ncnn-android-squeezenet
|
||||
* https://github.com/nihui/ncnn-android-styletransfer
|
||||
* https://github.com/nihui/ncnn-android-mobilenetssd
|
||||
* https://github.com/moli232777144/mtcnn_ncnn
|
||||
* https://github.com/nihui/ncnn-android-yolov5
|
||||
* https://github.com/nihui/ncnn-android-scrfd 🤩
|
||||
|
||||
<img src="https://github.com/nihui/ncnn-assets/raw/master/20181217/ncnn-2.jpg" width="360" height="640"/> <img src="https://github.com/nihui/ncnn-assets/raw/master/20181217/4.jpg" width="360" height="640"/>
|
||||
<img src="https://github.com/nihui/ncnn-assets/raw/master/20181217/ncnn-33.jpg" width="360" height="640"/> <img src="https://github.com/nihui/ncnn-assets/raw/master/20181217/ncnn-m.png" width="360" height="640"/>
|
||||
<img src="https://github.com/nihui/ncnn-android-yolov5/raw/master/screenshot.jpg" width="360" height="800"/> <img src="https://github.com/nihui/ncnn-android-scrfd/raw/master/screenshot.jpg" width="360" height="800"/>
|
||||
|
||||
|
||||
---
|
||||
|
||||
### License
|
||||
|
||||
[BSD 3 Clause](LICENSE.txt)
|
||||
|
15
3rdparty/ncnn/benchmark/CMakeLists.txt
vendored
Normal file
15
3rdparty/ncnn/benchmark/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
|
||||
if(MSVC)
|
||||
# warning C4996: 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details.
|
||||
add_definitions(/wd4996)
|
||||
endif()
|
||||
|
||||
add_executable(benchncnn benchncnn.cpp)
|
||||
target_link_libraries(benchncnn PRIVATE ncnn)
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
|
||||
target_link_libraries(benchncnn PRIVATE nodefs.js)
|
||||
endif()
|
||||
|
||||
# add benchncnn to a virtual project group
|
||||
set_property(TARGET benchncnn PROPERTY FOLDER "benchmark")
|
3431
3rdparty/ncnn/benchmark/README.md
vendored
Normal file
3431
3rdparty/ncnn/benchmark/README.md
vendored
Normal file
File diff suppressed because it is too large
Load Diff
17
3rdparty/ncnn/benchmark/alexnet.param
vendored
Normal file
17
3rdparty/ncnn/benchmark/alexnet.param
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
7767517
|
||||
15 15
|
||||
Input data 0 1 data -23330=4,3,227,227,3 0=227 1=227 2=3
|
||||
Convolution conv1 1 1 data conv1_relu1 -23330=4,3,55,55,96 0=96 1=11 3=4 5=1 6=34848 9=1
|
||||
LRN norm1 1 1 conv1_relu1 norm1 -23330=4,3,55,55,96 2=1.000000e-04
|
||||
Pooling pool1 1 1 norm1 pool1 -23330=4,3,27,27,96 1=3 2=2
|
||||
ConvolutionDepthWise conv2 1 1 pool1 conv2_relu2 -23330=4,3,27,27,256 0=256 1=5 4=2 5=1 6=307200 7=2 9=1
|
||||
LRN norm2 1 1 conv2_relu2 norm2 -23330=4,3,27,27,256 2=1.000000e-04
|
||||
Pooling pool2 1 1 norm2 pool2 -23330=4,3,13,13,256 1=3 2=2
|
||||
Convolution conv3 1 1 pool2 conv3_relu3 -23330=4,3,13,13,384 0=384 1=3 4=1 5=1 6=884736 9=1
|
||||
ConvolutionDepthWise conv4 1 1 conv3_relu3 conv4_relu4 -23330=4,3,13,13,384 0=384 1=3 4=1 5=1 6=663552 7=2 9=1
|
||||
ConvolutionDepthWise conv5 1 1 conv4_relu4 conv5_relu5 -23330=4,3,13,13,256 0=256 1=3 4=1 5=1 6=442368 7=2 9=1
|
||||
Pooling pool5 1 1 conv5_relu5 pool5 -23330=4,3,6,6,256 1=3 2=2
|
||||
InnerProduct fc6 1 1 pool5 fc6_drop6 -23330=4,1,4096,1,1 0=4096 1=1 2=37748736 9=1
|
||||
InnerProduct fc7 1 1 fc6_drop6 fc7_drop7 -23330=4,1,4096,1,1 0=4096 1=1 2=16777216 9=1
|
||||
InnerProduct fc8 1 1 fc7_drop7 fc8 -23330=4,1,1000,1,1 0=1000 1=1 2=4096000
|
||||
Softmax prob 1 1 fc8 output -23330=4,1,1000,1,1
|
327
3rdparty/ncnn/benchmark/benchncnn.cpp
vendored
Normal file
327
3rdparty/ncnn/benchmark/benchncnn.cpp
vendored
Normal file
@ -0,0 +1,327 @@
|
||||
// Tencent is pleased to support the open source community by making ncnn available.
|
||||
//
|
||||
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
//
|
||||
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// https://opensource.org/licenses/BSD-3-Clause
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#include <float.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <algorithm>
|
||||
#include <windows.h> // Sleep()
|
||||
#else
|
||||
#include <unistd.h> // sleep()
|
||||
#endif
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
#include <emscripten.h>
|
||||
#endif
|
||||
|
||||
#include "benchmark.h"
|
||||
#include "cpu.h"
|
||||
#include "datareader.h"
|
||||
#include "net.h"
|
||||
#include "gpu.h"
|
||||
|
||||
class DataReaderFromEmpty : public ncnn::DataReader
|
||||
{
|
||||
public:
|
||||
virtual int scan(const char* format, void* p) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
virtual size_t read(void* buf, size_t size) const
|
||||
{
|
||||
memset(buf, 0, size);
|
||||
return size;
|
||||
}
|
||||
};
|
||||
|
||||
static int g_warmup_loop_count = 8;
|
||||
static int g_loop_count = 4;
|
||||
static bool g_enable_cooling_down = true;
|
||||
|
||||
static ncnn::UnlockedPoolAllocator g_blob_pool_allocator;
|
||||
static ncnn::PoolAllocator g_workspace_pool_allocator;
|
||||
|
||||
#if NCNN_VULKAN
|
||||
static ncnn::VulkanDevice* g_vkdev = 0;
|
||||
static ncnn::VkAllocator* g_blob_vkallocator = 0;
|
||||
static ncnn::VkAllocator* g_staging_vkallocator = 0;
|
||||
#endif // NCNN_VULKAN
|
||||
|
||||
void benchmark(const char* comment, const ncnn::Mat& _in, const ncnn::Option& opt)
|
||||
{
|
||||
ncnn::Mat in = _in;
|
||||
in.fill(0.01f);
|
||||
|
||||
g_blob_pool_allocator.clear();
|
||||
g_workspace_pool_allocator.clear();
|
||||
|
||||
#if NCNN_VULKAN
|
||||
if (opt.use_vulkan_compute)
|
||||
{
|
||||
g_blob_vkallocator->clear();
|
||||
g_staging_vkallocator->clear();
|
||||
}
|
||||
#endif // NCNN_VULKAN
|
||||
|
||||
ncnn::Net net;
|
||||
|
||||
net.opt = opt;
|
||||
|
||||
#if NCNN_VULKAN
|
||||
if (net.opt.use_vulkan_compute)
|
||||
{
|
||||
net.set_vulkan_device(g_vkdev);
|
||||
}
|
||||
#endif // NCNN_VULKAN
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
#define MODEL_DIR "/working/"
|
||||
#else
|
||||
#define MODEL_DIR ""
|
||||
#endif
|
||||
|
||||
char parampath[256];
|
||||
sprintf(parampath, MODEL_DIR "%s.param", comment);
|
||||
net.load_param(parampath);
|
||||
|
||||
DataReaderFromEmpty dr;
|
||||
net.load_model(dr);
|
||||
|
||||
const std::vector<const char*>& input_names = net.input_names();
|
||||
const std::vector<const char*>& output_names = net.output_names();
|
||||
|
||||
if (g_enable_cooling_down)
|
||||
{
|
||||
// sleep 10 seconds for cooling down SOC :(
|
||||
#ifdef _WIN32
|
||||
Sleep(10 * 1000);
|
||||
#elif defined(__unix__) || defined(__APPLE__)
|
||||
sleep(10);
|
||||
#elif _POSIX_TIMERS
|
||||
struct timespec ts;
|
||||
ts.tv_sec = 10;
|
||||
ts.tv_nsec = 0;
|
||||
nanosleep(&ts, &ts);
|
||||
#else
|
||||
// TODO How to handle it ?
|
||||
#endif
|
||||
}
|
||||
|
||||
ncnn::Mat out;
|
||||
|
||||
// warm up
|
||||
for (int i = 0; i < g_warmup_loop_count; i++)
|
||||
{
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
ex.input(input_names[0], in);
|
||||
ex.extract(output_names[0], out);
|
||||
}
|
||||
|
||||
double time_min = DBL_MAX;
|
||||
double time_max = -DBL_MAX;
|
||||
double time_avg = 0;
|
||||
|
||||
for (int i = 0; i < g_loop_count; i++)
|
||||
{
|
||||
double start = ncnn::get_current_time();
|
||||
|
||||
{
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
ex.input(input_names[0], in);
|
||||
ex.extract(output_names[0], out);
|
||||
}
|
||||
|
||||
double end = ncnn::get_current_time();
|
||||
|
||||
double time = end - start;
|
||||
|
||||
time_min = std::min(time_min, time);
|
||||
time_max = std::max(time_max, time);
|
||||
time_avg += time;
|
||||
}
|
||||
|
||||
time_avg /= g_loop_count;
|
||||
|
||||
fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", comment, time_min, time_max, time_avg);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
int loop_count = 4;
|
||||
int num_threads = ncnn::get_cpu_count();
|
||||
int powersave = 0;
|
||||
int gpu_device = -1;
|
||||
int cooling_down = 1;
|
||||
|
||||
if (argc >= 2)
|
||||
{
|
||||
loop_count = atoi(argv[1]);
|
||||
}
|
||||
if (argc >= 3)
|
||||
{
|
||||
num_threads = atoi(argv[2]);
|
||||
}
|
||||
if (argc >= 4)
|
||||
{
|
||||
powersave = atoi(argv[3]);
|
||||
}
|
||||
if (argc >= 5)
|
||||
{
|
||||
gpu_device = atoi(argv[4]);
|
||||
}
|
||||
if (argc >= 6)
|
||||
{
|
||||
cooling_down = atoi(argv[5]);
|
||||
}
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
EM_ASM(
|
||||
FS.mkdir('/working');
|
||||
FS.mount(NODEFS, {root: '.'}, '/working'););
|
||||
#endif // __EMSCRIPTEN__
|
||||
|
||||
bool use_vulkan_compute = gpu_device != -1;
|
||||
|
||||
g_enable_cooling_down = cooling_down != 0;
|
||||
|
||||
g_loop_count = loop_count;
|
||||
|
||||
g_blob_pool_allocator.set_size_compare_ratio(0.0f);
|
||||
g_workspace_pool_allocator.set_size_compare_ratio(0.5f);
|
||||
|
||||
#if NCNN_VULKAN
|
||||
if (use_vulkan_compute)
|
||||
{
|
||||
g_warmup_loop_count = 10;
|
||||
|
||||
g_vkdev = ncnn::get_gpu_device(gpu_device);
|
||||
|
||||
g_blob_vkallocator = new ncnn::VkBlobAllocator(g_vkdev);
|
||||
g_staging_vkallocator = new ncnn::VkStagingAllocator(g_vkdev);
|
||||
}
|
||||
#endif // NCNN_VULKAN
|
||||
|
||||
// default option
|
||||
ncnn::Option opt;
|
||||
opt.lightmode = true;
|
||||
opt.num_threads = num_threads;
|
||||
opt.blob_allocator = &g_blob_pool_allocator;
|
||||
opt.workspace_allocator = &g_workspace_pool_allocator;
|
||||
#if NCNN_VULKAN
|
||||
opt.blob_vkallocator = g_blob_vkallocator;
|
||||
opt.workspace_vkallocator = g_blob_vkallocator;
|
||||
opt.staging_vkallocator = g_staging_vkallocator;
|
||||
#endif // NCNN_VULKAN
|
||||
opt.use_winograd_convolution = true;
|
||||
opt.use_sgemm_convolution = true;
|
||||
opt.use_int8_inference = true;
|
||||
opt.use_vulkan_compute = use_vulkan_compute;
|
||||
opt.use_fp16_packed = true;
|
||||
opt.use_fp16_storage = true;
|
||||
opt.use_fp16_arithmetic = true;
|
||||
opt.use_int8_storage = true;
|
||||
opt.use_int8_arithmetic = true;
|
||||
opt.use_packing_layout = true;
|
||||
opt.use_shader_pack8 = false;
|
||||
opt.use_image_storage = false;
|
||||
|
||||
ncnn::set_cpu_powersave(powersave);
|
||||
|
||||
ncnn::set_omp_dynamic(0);
|
||||
ncnn::set_omp_num_threads(num_threads);
|
||||
|
||||
fprintf(stderr, "loop_count = %d\n", g_loop_count);
|
||||
fprintf(stderr, "num_threads = %d\n", num_threads);
|
||||
fprintf(stderr, "powersave = %d\n", ncnn::get_cpu_powersave());
|
||||
fprintf(stderr, "gpu_device = %d\n", gpu_device);
|
||||
fprintf(stderr, "cooling_down = %d\n", (int)g_enable_cooling_down);
|
||||
|
||||
// run
|
||||
benchmark("squeezenet", ncnn::Mat(227, 227, 3), opt);
|
||||
|
||||
benchmark("squeezenet_int8", ncnn::Mat(227, 227, 3), opt);
|
||||
|
||||
benchmark("mobilenet", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("mobilenet_int8", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("mobilenet_v2", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
// benchmark("mobilenet_v2_int8", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("mobilenet_v3", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("shufflenet", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("shufflenet_v2", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("mnasnet", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("proxylessnasnet", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("efficientnet_b0", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("efficientnetv2_b0", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("regnety_400m", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("blazeface", ncnn::Mat(128, 128, 3), opt);
|
||||
|
||||
benchmark("googlenet", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("googlenet_int8", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("resnet18", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("resnet18_int8", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("alexnet", ncnn::Mat(227, 227, 3), opt);
|
||||
|
||||
benchmark("vgg16", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("vgg16_int8", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("resnet50", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("resnet50_int8", ncnn::Mat(224, 224, 3), opt);
|
||||
|
||||
benchmark("squeezenet_ssd", ncnn::Mat(300, 300, 3), opt);
|
||||
|
||||
benchmark("squeezenet_ssd_int8", ncnn::Mat(300, 300, 3), opt);
|
||||
|
||||
benchmark("mobilenet_ssd", ncnn::Mat(300, 300, 3), opt);
|
||||
|
||||
benchmark("mobilenet_ssd_int8", ncnn::Mat(300, 300, 3), opt);
|
||||
|
||||
benchmark("mobilenet_yolo", ncnn::Mat(416, 416, 3), opt);
|
||||
|
||||
benchmark("mobilenetv2_yolov3", ncnn::Mat(352, 352, 3), opt);
|
||||
|
||||
benchmark("yolov4-tiny", ncnn::Mat(416, 416, 3), opt);
|
||||
|
||||
benchmark("nanodet_m", ncnn::Mat(320, 320, 3), opt);
|
||||
|
||||
benchmark("yolo-fastest-1.1", ncnn::Mat(320, 320, 3), opt);
|
||||
|
||||
benchmark("yolo-fastestv2", ncnn::Mat(352, 352, 3), opt);
|
||||
|
||||
#if NCNN_VULKAN
|
||||
delete g_blob_vkallocator;
|
||||
delete g_staging_vkallocator;
|
||||
#endif // NCNN_VULKAN
|
||||
|
||||
return 0;
|
||||
}
|
103
3rdparty/ncnn/benchmark/blazeface.param
vendored
Normal file
103
3rdparty/ncnn/benchmark/blazeface.param
vendored
Normal file
@ -0,0 +1,103 @@
|
||||
7767517
|
||||
101 117
|
||||
Input data 0 1 data 0=128 1=128 2=3
|
||||
Padding 75 1 1 data 75 0=1 1=2 2=1 3=2 4=0 5=0.000000e+00 7=0 8=0
|
||||
Convolution 76 1 1 75 76 0=24 1=5 11=5 2=1 12=1 3=2 13=2 4=0 14=0 15=0 16=0 5=1 6=1800
|
||||
ReLU 77 1 1 76 77
|
||||
Split splitncnn_0 1 2 77 77_splitncnn_0 77_splitncnn_1
|
||||
ConvolutionDepthWise 78 1 1 77_splitncnn_1 78 0=24 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=216 7=24
|
||||
Convolution 79 1 1 78 79 0=24 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=576
|
||||
BinaryOp 80 2 1 79 77_splitncnn_0 80 0=0
|
||||
ReLU 81 1 1 80 81
|
||||
Split splitncnn_1 1 2 81 81_splitncnn_0 81_splitncnn_1
|
||||
Padding 82 1 1 81_splitncnn_1 82 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=4
|
||||
ConvolutionDepthWise 83 1 1 81_splitncnn_0 83 0=24 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=216 7=24
|
||||
Convolution 84 1 1 83 84 0=28 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=672
|
||||
BinaryOp 85 2 1 84 82 85 0=0
|
||||
ReLU 86 1 1 85 86
|
||||
Split splitncnn_2 1 2 86 86_splitncnn_0 86_splitncnn_1
|
||||
Padding 87 1 1 86_splitncnn_1 87 0=0 1=2 2=0 3=2 4=0 5=0.000000e+00 7=0 8=0
|
||||
Pooling 88 1 1 86_splitncnn_0 88 0=0 1=2 11=2 2=2 12=2 3=0 13=0 14=0 15=0 5=1
|
||||
Padding 89 1 1 88 89 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=4
|
||||
ConvolutionDepthWise 90 1 1 87 90 0=28 1=3 11=3 2=1 12=1 3=2 13=2 4=0 14=0 15=0 16=0 5=1 6=252 7=28
|
||||
Convolution 91 1 1 90 91 0=32 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=896
|
||||
BinaryOp 92 2 1 91 89 92 0=0
|
||||
ReLU 93 1 1 92 93
|
||||
Split splitncnn_3 1 2 93 93_splitncnn_0 93_splitncnn_1
|
||||
Padding 94 1 1 93_splitncnn_1 94 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=4
|
||||
ConvolutionDepthWise 95 1 1 93_splitncnn_0 95 0=32 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=288 7=32
|
||||
Convolution 96 1 1 95 96 0=36 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=1152
|
||||
BinaryOp 97 2 1 96 94 97 0=0
|
||||
ReLU 98 1 1 97 98
|
||||
Split splitncnn_4 1 2 98 98_splitncnn_0 98_splitncnn_1
|
||||
Padding 99 1 1 98_splitncnn_1 99 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=6
|
||||
ConvolutionDepthWise 100 1 1 98_splitncnn_0 100 0=36 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=324 7=36
|
||||
Convolution 101 1 1 100 101 0=42 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=1512
|
||||
BinaryOp 102 2 1 101 99 102 0=0
|
||||
ReLU 103 1 1 102 103
|
||||
Split splitncnn_5 1 2 103 103_splitncnn_0 103_splitncnn_1
|
||||
Padding 104 1 1 103_splitncnn_1 104 0=0 1=2 2=0 3=2 4=0 5=0.000000e+00 7=0 8=0
|
||||
Pooling 105 1 1 103_splitncnn_0 105 0=0 1=2 11=2 2=2 12=2 3=0 13=0 14=0 15=0 5=1
|
||||
Padding 106 1 1 105 106 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=6
|
||||
ConvolutionDepthWise 107 1 1 104 107 0=42 1=3 11=3 2=1 12=1 3=2 13=2 4=0 14=0 15=0 16=0 5=1 6=378 7=42
|
||||
Convolution 108 1 1 107 108 0=48 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=2016
|
||||
BinaryOp 109 2 1 108 106 109 0=0
|
||||
ReLU 110 1 1 109 110
|
||||
Split splitncnn_6 1 2 110 110_splitncnn_0 110_splitncnn_1
|
||||
Padding 111 1 1 110_splitncnn_1 111 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=8
|
||||
ConvolutionDepthWise 112 1 1 110_splitncnn_0 112 0=48 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=432 7=48
|
||||
Convolution 113 1 1 112 113 0=56 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=2688
|
||||
BinaryOp 114 2 1 113 111 114 0=0
|
||||
ReLU 115 1 1 114 115
|
||||
Split splitncnn_7 1 2 115 115_splitncnn_0 115_splitncnn_1
|
||||
Padding 116 1 1 115_splitncnn_1 116 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=8
|
||||
ConvolutionDepthWise 117 1 1 115_splitncnn_0 117 0=56 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=504 7=56
|
||||
Convolution 118 1 1 117 118 0=64 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=3584
|
||||
BinaryOp 119 2 1 118 116 119 0=0
|
||||
ReLU 120 1 1 119 120
|
||||
Split splitncnn_8 1 2 120 120_splitncnn_0 120_splitncnn_1
|
||||
Padding 121 1 1 120_splitncnn_1 121 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=8
|
||||
ConvolutionDepthWise 122 1 1 120_splitncnn_0 122 0=64 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=576 7=64
|
||||
Convolution 123 1 1 122 123 0=72 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=4608
|
||||
BinaryOp 124 2 1 123 121 124 0=0
|
||||
ReLU 125 1 1 124 125
|
||||
Split splitncnn_9 1 2 125 125_splitncnn_0 125_splitncnn_1
|
||||
Padding 126 1 1 125_splitncnn_1 126 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=8
|
||||
ConvolutionDepthWise 127 1 1 125_splitncnn_0 127 0=72 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=648 7=72
|
||||
Convolution 128 1 1 127 128 0=80 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=5760
|
||||
BinaryOp 129 2 1 128 126 129 0=0
|
||||
ReLU 130 1 1 129 130
|
||||
Split splitncnn_10 1 2 130 130_splitncnn_0 130_splitncnn_1
|
||||
Padding 131 1 1 130_splitncnn_1 131 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=8
|
||||
ConvolutionDepthWise 132 1 1 130_splitncnn_0 132 0=80 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=720 7=80
|
||||
Convolution 133 1 1 132 133 0=88 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=7040
|
||||
BinaryOp 134 2 1 133 131 134 0=0
|
||||
ReLU 135 1 1 134 135
|
||||
Split splitncnn_11 1 2 135 135_splitncnn_0 135_splitncnn_1
|
||||
Padding 136 1 1 135_splitncnn_1 136 0=0 1=2 2=0 3=2 4=0 5=0.000000e+00 7=0 8=0
|
||||
Pooling 137 1 1 135_splitncnn_0 137 0=0 1=2 11=2 2=2 12=2 3=0 13=0 14=0 15=0 5=1
|
||||
Padding 138 1 1 137 138 0=0 1=0 2=0 3=0 4=0 5=0.000000e+00 7=0 8=8
|
||||
ConvolutionDepthWise 139 1 1 136 139 0=88 1=3 11=3 2=1 12=1 3=2 13=2 4=0 14=0 15=0 16=0 5=1 6=792 7=88
|
||||
Convolution 140 1 1 139 140 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=8448
|
||||
BinaryOp 141 2 1 140 138 141 0=0
|
||||
ReLU 142 1 1 141 142
|
||||
Split splitncnn_12 1 2 142 142_splitncnn_0 142_splitncnn_1
|
||||
ConvolutionDepthWise 143 1 1 142_splitncnn_1 143 0=96 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=864 7=96
|
||||
Convolution 144 1 1 143 144 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=9216
|
||||
BinaryOp 145 2 1 144 142_splitncnn_0 145 0=0
|
||||
ReLU 146 1 1 145 146
|
||||
Split splitncnn_13 1 2 146 146_splitncnn_0 146_splitncnn_1
|
||||
ConvolutionDepthWise 147 1 1 146_splitncnn_1 147 0=96 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=864 7=96
|
||||
Convolution 148 1 1 147 148 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=9216
|
||||
BinaryOp 149 2 1 148 146_splitncnn_0 149 0=0
|
||||
ReLU 150 1 1 149 150
|
||||
Split splitncnn_14 1 2 150 150_splitncnn_0 150_splitncnn_1
|
||||
ConvolutionDepthWise 151 1 1 150_splitncnn_1 151 0=96 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=864 7=96
|
||||
Convolution 152 1 1 151 152 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=9216
|
||||
BinaryOp 153 2 1 152 150_splitncnn_0 153 0=0
|
||||
ReLU 154 1 1 153 154
|
||||
Split splitncnn_15 1 2 154 154_splitncnn_0 154_splitncnn_1
|
||||
ConvolutionDepthWise 155 1 1 154_splitncnn_1 155 0=96 1=3 11=3 2=1 12=1 3=1 13=1 4=1 14=1 15=1 16=1 5=1 6=864 7=96
|
||||
Convolution 156 1 1 155 156 0=96 1=1 11=1 2=1 12=1 3=1 13=1 4=0 14=0 15=0 16=0 5=1 6=9216
|
||||
BinaryOp 157 2 1 156 154_splitncnn_0 157 0=0
|
||||
ReLU output 1 1 157 output
|
202
3rdparty/ncnn/benchmark/efficientnet_b0.param
vendored
Normal file
202
3rdparty/ncnn/benchmark/efficientnet_b0.param
vendored
Normal file
@ -0,0 +1,202 @@
|
||||
7767517
|
||||
200 225
|
||||
Input input.1 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution Conv_0 1 1 data 362 -23330=4,3,112,112,32 0=32 1=3 3=2 4=1 5=1 6=864
|
||||
Swish Mul_3 1 1 362 364 -23330=4,3,112,112,32
|
||||
ConvolutionDepthWise Conv_4 1 1 364 366 -23330=4,3,112,112,32 0=32 1=3 4=1 5=1 6=288 7=32
|
||||
Swish Mul_7 1 1 366 368 -23330=4,3,112,112,32
|
||||
Split splitncnn_0 1 2 368 368_splitncnn_0 368_splitncnn_1 -23330=8,3,112,112,32,3,112,112,32
|
||||
Pooling GlobalAveragePool_8 1 1 368_splitncnn_1 369 -23330=4,1,32,1,1 0=1 4=1
|
||||
InnerProduct Conv_9 1 1 369 370 -23330=4,1,8,1,1 0=8 1=1 2=256
|
||||
Swish Mul_11 1 1 370 372 -23330=4,1,8,1,1
|
||||
Convolution Conv_12 1 1 372 374 -23330=4,1,32,1,1 0=32 1=1 5=1 6=256 9=4
|
||||
BinaryOp Mul_14 2 1 368_splitncnn_0 374 375 -23330=4,3,112,112,32 0=2
|
||||
Convolution Conv_15 1 1 375 377 -23330=4,3,112,112,16 0=16 1=1 5=1 6=512
|
||||
Convolution Conv_17 1 1 377 379 -23330=4,3,112,112,96 0=96 1=1 5=1 6=1536
|
||||
Swish Mul_20 1 1 379 381 -23330=4,3,112,112,96
|
||||
ConvolutionDepthWise Conv_21 1 1 381 383 -23330=4,3,56,56,96 0=96 1=3 3=2 4=1 5=1 6=864 7=96
|
||||
Swish Mul_24 1 1 383 385 -23330=4,3,56,56,96
|
||||
Split splitncnn_1 1 2 385 385_splitncnn_0 385_splitncnn_1 -23330=8,3,56,56,96,3,56,56,96
|
||||
Pooling GlobalAveragePool_25 1 1 385_splitncnn_1 386 -23330=4,1,96,1,1 0=1 4=1
|
||||
InnerProduct Conv_26 1 1 386 387 -23330=4,1,4,1,1 0=4 1=1 2=384
|
||||
Swish Mul_28 1 1 387 389 -23330=4,1,4,1,1
|
||||
Convolution Conv_29 1 1 389 391 -23330=4,1,96,1,1 0=96 1=1 5=1 6=384 9=4
|
||||
BinaryOp Mul_31 2 1 385_splitncnn_0 391 392 -23330=4,3,56,56,96 0=2
|
||||
Convolution Conv_32 1 1 392 394 -23330=4,3,56,56,24 0=24 1=1 5=1 6=2304
|
||||
Split splitncnn_2 1 2 394 394_splitncnn_0 394_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
|
||||
Convolution Conv_34 1 1 394_splitncnn_1 396 -23330=4,3,56,56,144 0=144 1=1 5=1 6=3456
|
||||
Swish Mul_37 1 1 396 398 -23330=4,3,56,56,144
|
||||
ConvolutionDepthWise Conv_38 1 1 398 400 -23330=4,3,56,56,144 0=144 1=3 4=1 5=1 6=1296 7=144
|
||||
Swish Mul_41 1 1 400 402 -23330=4,3,56,56,144
|
||||
Split splitncnn_3 1 2 402 402_splitncnn_0 402_splitncnn_1 -23330=8,3,56,56,144,3,56,56,144
|
||||
Pooling GlobalAveragePool_42 1 1 402_splitncnn_1 403 -23330=4,1,144,1,1 0=1 4=1
|
||||
InnerProduct Conv_43 1 1 403 404 -23330=4,1,6,1,1 0=6 1=1 2=864
|
||||
Swish Mul_45 1 1 404 406 -23330=4,1,6,1,1
|
||||
Convolution Conv_46 1 1 406 408 -23330=4,1,144,1,1 0=144 1=1 5=1 6=864 9=4
|
||||
BinaryOp Mul_48 2 1 402_splitncnn_0 408 409 -23330=4,3,56,56,144 0=2
|
||||
Convolution Conv_49 1 1 409 411 -23330=4,3,56,56,24 0=24 1=1 5=1 6=3456
|
||||
BinaryOp Add_51 2 1 394_splitncnn_0 411 412 -23330=4,3,56,56,24
|
||||
Convolution Conv_52 1 1 412 414 -23330=4,3,56,56,144 0=144 1=1 5=1 6=3456
|
||||
Swish Mul_55 1 1 414 416 -23330=4,3,56,56,144
|
||||
ConvolutionDepthWise Conv_56 1 1 416 418 -23330=4,3,28,28,144 0=144 1=5 3=2 4=2 5=1 6=3600 7=144
|
||||
Swish Mul_59 1 1 418 420 -23330=4,3,28,28,144
|
||||
Split splitncnn_4 1 2 420 420_splitncnn_0 420_splitncnn_1 -23330=8,3,28,28,144,3,28,28,144
|
||||
Pooling GlobalAveragePool_60 1 1 420_splitncnn_1 421 -23330=4,1,144,1,1 0=1 4=1
|
||||
InnerProduct Conv_61 1 1 421 422 -23330=4,1,6,1,1 0=6 1=1 2=864
|
||||
Swish Mul_63 1 1 422 424 -23330=4,1,6,1,1
|
||||
Convolution Conv_64 1 1 424 426 -23330=4,1,144,1,1 0=144 1=1 5=1 6=864 9=4
|
||||
BinaryOp Mul_66 2 1 420_splitncnn_0 426 427 -23330=4,3,28,28,144 0=2
|
||||
Convolution Conv_67 1 1 427 429 -23330=4,3,28,28,40 0=40 1=1 5=1 6=5760
|
||||
Split splitncnn_5 1 2 429 429_splitncnn_0 429_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
|
||||
Convolution Conv_69 1 1 429_splitncnn_1 431 -23330=4,3,28,28,240 0=240 1=1 5=1 6=9600
|
||||
Swish Mul_72 1 1 431 433 -23330=4,3,28,28,240
|
||||
ConvolutionDepthWise Conv_73 1 1 433 435 -23330=4,3,28,28,240 0=240 1=5 4=2 5=1 6=6000 7=240
|
||||
Swish Mul_76 1 1 435 437 -23330=4,3,28,28,240
|
||||
Split splitncnn_6 1 2 437 437_splitncnn_0 437_splitncnn_1 -23330=8,3,28,28,240,3,28,28,240
|
||||
Pooling GlobalAveragePool_77 1 1 437_splitncnn_1 438 -23330=4,1,240,1,1 0=1 4=1
|
||||
InnerProduct Conv_78 1 1 438 439 -23330=4,1,10,1,1 0=10 1=1 2=2400
|
||||
Swish Mul_80 1 1 439 441 -23330=4,1,10,1,1
|
||||
Convolution Conv_81 1 1 441 443 -23330=4,1,240,1,1 0=240 1=1 5=1 6=2400 9=4
|
||||
BinaryOp Mul_83 2 1 437_splitncnn_0 443 444 -23330=4,3,28,28,240 0=2
|
||||
Convolution Conv_84 1 1 444 446 -23330=4,3,28,28,40 0=40 1=1 5=1 6=9600
|
||||
BinaryOp Add_86 2 1 429_splitncnn_0 446 447 -23330=4,3,28,28,40
|
||||
Convolution Conv_87 1 1 447 449 -23330=4,3,28,28,240 0=240 1=1 5=1 6=9600
|
||||
Swish Mul_90 1 1 449 451 -23330=4,3,28,28,240
|
||||
ConvolutionDepthWise Conv_91 1 1 451 453 -23330=4,3,14,14,240 0=240 1=3 3=2 4=1 5=1 6=2160 7=240
|
||||
Swish Mul_94 1 1 453 455 -23330=4,3,14,14,240
|
||||
Split splitncnn_7 1 2 455 455_splitncnn_0 455_splitncnn_1 -23330=8,3,14,14,240,3,14,14,240
|
||||
Pooling GlobalAveragePool_95 1 1 455_splitncnn_1 456 -23330=4,1,240,1,1 0=1 4=1
|
||||
InnerProduct Conv_96 1 1 456 457 -23330=4,1,10,1,1 0=10 1=1 2=2400
|
||||
Swish Mul_98 1 1 457 459 -23330=4,1,10,1,1
|
||||
Convolution Conv_99 1 1 459 461 -23330=4,1,240,1,1 0=240 1=1 5=1 6=2400 9=4
|
||||
BinaryOp Mul_101 2 1 455_splitncnn_0 461 462 -23330=4,3,14,14,240 0=2
|
||||
Convolution Conv_102 1 1 462 464 -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
|
||||
Split splitncnn_8 1 2 464 464_splitncnn_0 464_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution Conv_104 1 1 464_splitncnn_1 466 -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400
|
||||
Swish Mul_107 1 1 466 468 -23330=4,3,14,14,480
|
||||
ConvolutionDepthWise Conv_108 1 1 468 470 -23330=4,3,14,14,480 0=480 1=3 4=1 5=1 6=4320 7=480
|
||||
Swish Mul_111 1 1 470 472 -23330=4,3,14,14,480
|
||||
Split splitncnn_9 1 2 472 472_splitncnn_0 472_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
Pooling GlobalAveragePool_112 1 1 472_splitncnn_1 473 -23330=4,1,480,1,1 0=1 4=1
|
||||
InnerProduct Conv_113 1 1 473 474 -23330=4,1,20,1,1 0=20 1=1 2=9600
|
||||
Swish Mul_115 1 1 474 476 -23330=4,1,20,1,1
|
||||
Convolution Conv_116 1 1 476 478 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
|
||||
BinaryOp Mul_118 2 1 472_splitncnn_0 478 479 -23330=4,3,14,14,480 0=2
|
||||
Convolution Conv_119 1 1 479 481 -23330=4,3,14,14,80 0=80 1=1 5=1 6=38400
|
||||
BinaryOp Add_121 2 1 464_splitncnn_0 481 482 -23330=4,3,14,14,80
|
||||
Split splitncnn_10 1 2 482 482_splitncnn_0 482_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution Conv_122 1 1 482_splitncnn_1 484 -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400
|
||||
Swish Mul_125 1 1 484 486 -23330=4,3,14,14,480
|
||||
ConvolutionDepthWise Conv_126 1 1 486 488 -23330=4,3,14,14,480 0=480 1=3 4=1 5=1 6=4320 7=480
|
||||
Swish Mul_129 1 1 488 490 -23330=4,3,14,14,480
|
||||
Split splitncnn_11 1 2 490 490_splitncnn_0 490_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
Pooling GlobalAveragePool_130 1 1 490_splitncnn_1 491 -23330=4,1,480,1,1 0=1 4=1
|
||||
InnerProduct Conv_131 1 1 491 492 -23330=4,1,20,1,1 0=20 1=1 2=9600
|
||||
Swish Mul_133 1 1 492 494 -23330=4,1,20,1,1
|
||||
Convolution Conv_134 1 1 494 496 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
|
||||
BinaryOp Mul_136 2 1 490_splitncnn_0 496 497 -23330=4,3,14,14,480 0=2
|
||||
Convolution Conv_137 1 1 497 499 -23330=4,3,14,14,80 0=80 1=1 5=1 6=38400
|
||||
BinaryOp Add_139 2 1 482_splitncnn_0 499 500 -23330=4,3,14,14,80
|
||||
Convolution Conv_140 1 1 500 502 -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400
|
||||
Swish Mul_143 1 1 502 504 -23330=4,3,14,14,480
|
||||
ConvolutionDepthWise Conv_144 1 1 504 506 -23330=4,3,14,14,480 0=480 1=5 4=2 5=1 6=12000 7=480
|
||||
Swish Mul_147 1 1 506 508 -23330=4,3,14,14,480
|
||||
Split splitncnn_12 1 2 508 508_splitncnn_0 508_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
Pooling GlobalAveragePool_148 1 1 508_splitncnn_1 509 -23330=4,1,480,1,1 0=1 4=1
|
||||
InnerProduct Conv_149 1 1 509 510 -23330=4,1,20,1,1 0=20 1=1 2=9600
|
||||
Swish Mul_151 1 1 510 512 -23330=4,1,20,1,1
|
||||
Convolution Conv_152 1 1 512 514 -23330=4,1,480,1,1 0=480 1=1 5=1 6=9600 9=4
|
||||
BinaryOp Mul_154 2 1 508_splitncnn_0 514 515 -23330=4,3,14,14,480 0=2
|
||||
Convolution Conv_155 1 1 515 517 -23330=4,3,14,14,112 0=112 1=1 5=1 6=53760
|
||||
Split splitncnn_13 1 2 517 517_splitncnn_0 517_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
|
||||
Convolution Conv_157 1 1 517_splitncnn_1 519 -23330=4,3,14,14,672 0=672 1=1 5=1 6=75264
|
||||
Swish Mul_160 1 1 519 521 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise Conv_161 1 1 521 523 -23330=4,3,14,14,672 0=672 1=5 4=2 5=1 6=16800 7=672
|
||||
Swish Mul_164 1 1 523 525 -23330=4,3,14,14,672
|
||||
Split splitncnn_14 1 2 525 525_splitncnn_0 525_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
|
||||
Pooling GlobalAveragePool_165 1 1 525_splitncnn_1 526 -23330=4,1,672,1,1 0=1 4=1
|
||||
InnerProduct Conv_166 1 1 526 527 -23330=4,1,28,1,1 0=28 1=1 2=18816
|
||||
Swish Mul_168 1 1 527 529 -23330=4,1,28,1,1
|
||||
Convolution Conv_169 1 1 529 531 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
|
||||
BinaryOp Mul_171 2 1 525_splitncnn_0 531 532 -23330=4,3,14,14,672 0=2
|
||||
Convolution Conv_172 1 1 532 534 -23330=4,3,14,14,112 0=112 1=1 5=1 6=75264
|
||||
BinaryOp Add_174 2 1 517_splitncnn_0 534 535 -23330=4,3,14,14,112
|
||||
Split splitncnn_15 1 2 535 535_splitncnn_0 535_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
|
||||
Convolution Conv_175 1 1 535_splitncnn_1 537 -23330=4,3,14,14,672 0=672 1=1 5=1 6=75264
|
||||
Swish Mul_178 1 1 537 539 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise Conv_179 1 1 539 541 -23330=4,3,14,14,672 0=672 1=5 4=2 5=1 6=16800 7=672
|
||||
Swish Mul_182 1 1 541 543 -23330=4,3,14,14,672
|
||||
Split splitncnn_16 1 2 543 543_splitncnn_0 543_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
|
||||
Pooling GlobalAveragePool_183 1 1 543_splitncnn_1 544 -23330=4,1,672,1,1 0=1 4=1
|
||||
InnerProduct Conv_184 1 1 544 545 -23330=4,1,28,1,1 0=28 1=1 2=18816
|
||||
Swish Mul_186 1 1 545 547 -23330=4,1,28,1,1
|
||||
Convolution Conv_187 1 1 547 549 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
|
||||
BinaryOp Mul_189 2 1 543_splitncnn_0 549 550 -23330=4,3,14,14,672 0=2
|
||||
Convolution Conv_190 1 1 550 552 -23330=4,3,14,14,112 0=112 1=1 5=1 6=75264
|
||||
BinaryOp Add_192 2 1 535_splitncnn_0 552 553 -23330=4,3,14,14,112
|
||||
Convolution Conv_193 1 1 553 555 -23330=4,3,14,14,672 0=672 1=1 5=1 6=75264
|
||||
Swish Mul_196 1 1 555 557 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise Conv_197 1 1 557 559 -23330=4,3,7,7,672 0=672 1=5 3=2 4=2 5=1 6=16800 7=672
|
||||
Swish Mul_200 1 1 559 561 -23330=4,3,7,7,672
|
||||
Split splitncnn_17 1 2 561 561_splitncnn_0 561_splitncnn_1 -23330=8,3,7,7,672,3,7,7,672
|
||||
Pooling GlobalAveragePool_201 1 1 561_splitncnn_1 562 -23330=4,1,672,1,1 0=1 4=1
|
||||
InnerProduct Conv_202 1 1 562 563 -23330=4,1,28,1,1 0=28 1=1 2=18816
|
||||
Swish Mul_204 1 1 563 565 -23330=4,1,28,1,1
|
||||
Convolution Conv_205 1 1 565 567 -23330=4,1,672,1,1 0=672 1=1 5=1 6=18816 9=4
|
||||
BinaryOp Mul_207 2 1 561_splitncnn_0 567 568 -23330=4,3,7,7,672 0=2
|
||||
Convolution Conv_208 1 1 568 570 -23330=4,3,7,7,192 0=192 1=1 5=1 6=129024
|
||||
Split splitncnn_18 1 2 570 570_splitncnn_0 570_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution Conv_210 1 1 570_splitncnn_1 572 -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184
|
||||
Swish Mul_213 1 1 572 574 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise Conv_214 1 1 574 576 -23330=4,3,7,7,1152 0=1152 1=5 4=2 5=1 6=28800 7=1152
|
||||
Swish Mul_217 1 1 576 578 -23330=4,3,7,7,1152
|
||||
Split splitncnn_19 1 2 578 578_splitncnn_0 578_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Pooling GlobalAveragePool_218 1 1 578_splitncnn_1 579 -23330=4,1,1152,1,1 0=1 4=1
|
||||
InnerProduct Conv_219 1 1 579 580 -23330=4,1,48,1,1 0=48 1=1 2=55296
|
||||
Swish Mul_221 1 1 580 582 -23330=4,1,48,1,1
|
||||
Convolution Conv_222 1 1 582 584 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
|
||||
BinaryOp Mul_224 2 1 578_splitncnn_0 584 585 -23330=4,3,7,7,1152 0=2
|
||||
Convolution Conv_225 1 1 585 587 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
|
||||
BinaryOp Add_227 2 1 570_splitncnn_0 587 588 -23330=4,3,7,7,192
|
||||
Split splitncnn_20 1 2 588 588_splitncnn_0 588_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution Conv_228 1 1 588_splitncnn_1 590 -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184
|
||||
Swish Mul_231 1 1 590 592 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise Conv_232 1 1 592 594 -23330=4,3,7,7,1152 0=1152 1=5 4=2 5=1 6=28800 7=1152
|
||||
Swish Mul_235 1 1 594 596 -23330=4,3,7,7,1152
|
||||
Split splitncnn_21 1 2 596 596_splitncnn_0 596_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Pooling GlobalAveragePool_236 1 1 596_splitncnn_1 597 -23330=4,1,1152,1,1 0=1 4=1
|
||||
InnerProduct Conv_237 1 1 597 598 -23330=4,1,48,1,1 0=48 1=1 2=55296
|
||||
Swish Mul_239 1 1 598 600 -23330=4,1,48,1,1
|
||||
Convolution Conv_240 1 1 600 602 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
|
||||
BinaryOp Mul_242 2 1 596_splitncnn_0 602 603 -23330=4,3,7,7,1152 0=2
|
||||
Convolution Conv_243 1 1 603 605 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
|
||||
BinaryOp Add_245 2 1 588_splitncnn_0 605 606 -23330=4,3,7,7,192
|
||||
Split splitncnn_22 1 2 606 606_splitncnn_0 606_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution Conv_246 1 1 606_splitncnn_1 608 -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184
|
||||
Swish Mul_249 1 1 608 610 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise Conv_250 1 1 610 612 -23330=4,3,7,7,1152 0=1152 1=5 4=2 5=1 6=28800 7=1152
|
||||
Swish Mul_253 1 1 612 614 -23330=4,3,7,7,1152
|
||||
Split splitncnn_23 1 2 614 614_splitncnn_0 614_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Pooling GlobalAveragePool_254 1 1 614_splitncnn_1 615 -23330=4,1,1152,1,1 0=1 4=1
|
||||
InnerProduct Conv_255 1 1 615 616 -23330=4,1,48,1,1 0=48 1=1 2=55296
|
||||
Swish Mul_257 1 1 616 618 -23330=4,1,48,1,1
|
||||
Convolution Conv_258 1 1 618 620 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
|
||||
BinaryOp Mul_260 2 1 614_splitncnn_0 620 621 -23330=4,3,7,7,1152 0=2
|
||||
Convolution Conv_261 1 1 621 623 -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
|
||||
BinaryOp Add_263 2 1 606_splitncnn_0 623 624 -23330=4,3,7,7,192
|
||||
Convolution Conv_264 1 1 624 626 -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184
|
||||
Swish Mul_267 1 1 626 628 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise Conv_268 1 1 628 630 -23330=4,3,7,7,1152 0=1152 1=3 4=1 5=1 6=10368 7=1152
|
||||
Swish Mul_271 1 1 630 632 -23330=4,3,7,7,1152
|
||||
Split splitncnn_24 1 2 632 632_splitncnn_0 632_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Pooling GlobalAveragePool_272 1 1 632_splitncnn_1 633 -23330=4,1,1152,1,1 0=1 4=1
|
||||
InnerProduct Conv_273 1 1 633 634 -23330=4,1,48,1,1 0=48 1=1 2=55296
|
||||
Swish Mul_275 1 1 634 636 -23330=4,1,48,1,1
|
||||
Convolution Conv_276 1 1 636 638 -23330=4,1,1152,1,1 0=1152 1=1 5=1 6=55296 9=4
|
||||
BinaryOp Mul_278 2 1 632_splitncnn_0 638 639 -23330=4,3,7,7,1152 0=2
|
||||
Convolution Conv_279 1 1 639 641 -23330=4,3,7,7,320 0=320 1=1 5=1 6=368640
|
||||
Convolution Conv_281 1 1 641 643 -23330=4,3,7,7,1280 0=1280 1=1 5=1 6=409600
|
||||
Swish Mul_284 1 1 643 645 -23330=4,3,7,7,1280
|
||||
Pooling GlobalAveragePool_285 1 1 645 654 -23330=4,1,1280,1,1 0=1 4=1
|
||||
InnerProduct Gemm_292 1 1 654 655 -23330=4,1,1000,1,1 0=1000 1=1 2=1280000
|
||||
Softmax prob 1 1 655 output -23330=4,1,1000,1,1
|
259
3rdparty/ncnn/benchmark/efficientnetv2_b0.param
vendored
Normal file
259
3rdparty/ncnn/benchmark/efficientnetv2_b0.param
vendored
Normal file
@ -0,0 +1,259 @@
|
||||
7767517
|
||||
257 288
|
||||
MemoryData 110:12 0 1 110:12 -23330=4,1,112,1,1 0=112
|
||||
MemoryData 133:12 0 1 133:12 -23330=4,1,192,1,1 0=192
|
||||
MemoryData 144:12 0 1 144:12 -23330=4,1,192,1,1 0=192
|
||||
MemoryData 14:11 0 1 14:11 -23330=4,1,32,1,1 0=32
|
||||
MemoryData 155:12 0 1 155:12 -23330=4,1,192,1,1 0=192
|
||||
MemoryData 166:12 0 1 166:12 -23330=4,1,192,1,1 0=192
|
||||
MemoryData 177:12 0 1 177:12 -23330=4,1,192,1,1 0=192
|
||||
MemoryData 188:12 0 1 188:12 -23330=4,1,192,1,1 0=192
|
||||
MemoryData 199:12 0 1 199:12 -23330=4,1,192,1,1 0=192
|
||||
MemoryData 22:11 0 1 22:11 -23330=4,1,48,1,1 0=48
|
||||
MemoryData 33:11 0 1 33:11 -23330=4,1,112,1,1 0=112
|
||||
MemoryData 44:11 0 1 44:11 -23330=4,1,112,1,1 0=112
|
||||
MemoryData 55:11 0 1 55:11 -23330=4,1,112,1,1 0=112
|
||||
MemoryData 77:11 0 1 77:11 -23330=4,1,96,1,1 0=96
|
||||
MemoryData 88:11 0 1 88:11 -23330=4,1,96,1,1 0=96
|
||||
Input op_201 0 1 204:12 -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution op_202 1 1 204:12 206:12 -23330=4,3,112,112,32 0=32 1=3 3=2 4=-233 5=1 6=864
|
||||
Swish op_203 1 1 206:12 208:12 -23330=4,3,112,112,32
|
||||
Convolution op_204 1 1 208:12 210:12 -23330=4,3,112,112,16 0=16 1=3 4=-233 5=1 6=4608
|
||||
Swish op_205 1 1 210:12 212:12_splitncnn_0 -23330=4,3,112,112,16
|
||||
Convolution op_207 1 1 212:12_splitncnn_0 215:12 -23330=4,3,56,56,64 0=64 1=3 3=2 4=-233 5=1 6=9216
|
||||
Swish op_208 1 1 215:12 217:12 -23330=4,3,56,56,64
|
||||
Convolution op_209 1 1 217:12 219:12 -23330=4,3,56,56,32 0=32 1=1 4=-233 5=1 6=2048
|
||||
Split splitncnn_1 1 2 219:12 219:12_splitncnn_0 219:12_splitncnn_1 -23330=8,3,56,56,32,3,56,56,32
|
||||
Convolution op_210 1 1 219:12_splitncnn_1 221:12 -23330=4,3,56,56,128 0=128 1=3 4=-233 5=1 6=36864
|
||||
Swish op_211 1 1 221:12 223:12 -23330=4,3,56,56,128
|
||||
Convolution op_212 1 1 223:12 224:12 -23330=4,3,56,56,32 0=32 1=1 4=-233 6=4096
|
||||
Eltwise op_213 2 1 219:12_splitncnn_0 224:12 225:12 -23330=4,3,56,56,32 0=1
|
||||
BinaryOp op_214 2 1 225:12 14:11 226:12_splitncnn_0 -23330=4,3,56,56,32
|
||||
Convolution op_216 1 1 226:12_splitncnn_0 229:12 -23330=4,3,28,28,128 0=128 1=3 3=2 4=-233 5=1 6=36864
|
||||
Swish op_217 1 1 229:12 231:12 -23330=4,3,28,28,128
|
||||
Convolution op_218 1 1 231:12 233:12 -23330=4,3,28,28,48 0=48 1=1 4=-233 5=1 6=6144
|
||||
Split splitncnn_3 1 2 233:12 233:12_splitncnn_0 233:12_splitncnn_1 -23330=8,3,28,28,48,3,28,28,48
|
||||
Convolution op_219 1 1 233:12_splitncnn_1 235:12 -23330=4,3,28,28,192 0=192 1=3 4=-233 5=1 6=82944
|
||||
Swish op_220 1 1 235:12 237:12 -23330=4,3,28,28,192
|
||||
Convolution op_221 1 1 237:12 238:12 -23330=4,3,28,28,48 0=48 1=1 4=-233 6=9216
|
||||
Eltwise op_222 2 1 233:12_splitncnn_0 238:12 239:12 -23330=4,3,28,28,48 0=1
|
||||
BinaryOp op_223 2 1 239:12 22:11 240:12_splitncnn_0 -23330=4,3,28,28,48
|
||||
Convolution op_225 1 1 240:12_splitncnn_0 243:12 -23330=4,3,28,28,192 0=192 1=1 4=-233 5=1 6=9216
|
||||
Swish op_226 1 1 243:12 245:12 -23330=4,3,28,28,192
|
||||
ConvolutionDepthWise op_227 1 1 245:12 248:12 -23330=4,3,14,14,192 0=192 1=3 3=2 4=-233 5=1 6=1728 7=192
|
||||
Swish op_229 1 1 248:12 250:12 -23330=4,3,14,14,192
|
||||
Split splitncnn_5 1 2 250:12 250:12_splitncnn_0 250:12_splitncnn_1 -23330=8,3,14,14,192,3,14,14,192
|
||||
Reduction op_230 1 1 250:12_splitncnn_1 251:12 -23330=4,3,1,1,192 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_231 1 1 251:12 253:12 -23330=4,3,1,1,12 0=12 1=1 4=-233 5=1 6=2304
|
||||
Swish op_232 1 1 253:12 255:12 -23330=4,3,1,1,12
|
||||
Convolution op_233 1 1 255:12 258:12 -23330=4,3,1,1,192 0=192 1=1 4=-233 5=1 6=2304 9=4
|
||||
BinaryOp op_235 2 1 250:12_splitncnn_0 258:12 259:12 -23330=4,3,14,14,192 0=2
|
||||
Convolution op_236 1 1 259:12 261:12 -23330=4,3,14,14,96 0=96 1=1 4=-233 5=1 6=18432
|
||||
Split splitncnn_6 1 2 261:12 261:12_splitncnn_0 261:12_splitncnn_1 -23330=8,3,14,14,96,3,14,14,96
|
||||
Convolution op_237 1 1 261:12_splitncnn_1 263:12 -23330=4,3,14,14,384 0=384 1=1 4=-233 5=1 6=36864
|
||||
Swish op_238 1 1 263:12 265:12 -23330=4,3,14,14,384
|
||||
ConvolutionDepthWise op_239 1 1 265:12 268:12 -23330=4,3,14,14,384 0=384 1=3 4=-233 5=1 6=3456 7=384
|
||||
Swish op_241 1 1 268:12 270:12 -23330=4,3,14,14,384
|
||||
Split splitncnn_7 1 2 270:12 270:12_splitncnn_0 270:12_splitncnn_1 -23330=8,3,14,14,384,3,14,14,384
|
||||
Reduction op_242 1 1 270:12_splitncnn_1 271:12 -23330=4,3,1,1,384 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_243 1 1 271:12 273:12 -23330=4,3,1,1,24 0=24 1=1 4=-233 5=1 6=9216
|
||||
Swish op_244 1 1 273:12 275:12 -23330=4,3,1,1,24
|
||||
Convolution op_245 1 1 275:12 278:12 -23330=4,3,1,1,384 0=384 1=1 4=-233 5=1 6=9216 9=4
|
||||
BinaryOp op_247 2 1 270:12_splitncnn_0 278:12 279:12 -23330=4,3,14,14,384 0=2
|
||||
Convolution op_248 1 1 279:12 280:12 -23330=4,3,14,14,96 0=96 1=1 4=-233 6=36864
|
||||
Eltwise op_249 2 1 261:12_splitncnn_0 280:12 281:12 -23330=4,3,14,14,96 0=1
|
||||
BinaryOp op_250 2 1 281:12 77:11 282:12 -23330=4,3,14,14,96
|
||||
Split splitncnn_8 1 2 282:12 282:12_splitncnn_0 282:12_splitncnn_1 -23330=8,3,14,14,96,3,14,14,96
|
||||
Convolution op_251 1 1 282:12_splitncnn_1 284:12 -23330=4,3,14,14,384 0=384 1=1 4=-233 5=1 6=36864
|
||||
Swish op_252 1 1 284:12 286:12 -23330=4,3,14,14,384
|
||||
ConvolutionDepthWise op_253 1 1 286:12 289:12 -23330=4,3,14,14,384 0=384 1=3 4=-233 5=1 6=3456 7=384
|
||||
Swish op_255 1 1 289:12 291:12 -23330=4,3,14,14,384
|
||||
Split splitncnn_9 1 2 291:12 291:12_splitncnn_0 291:12_splitncnn_1 -23330=8,3,14,14,384,3,14,14,384
|
||||
Reduction op_256 1 1 291:12_splitncnn_1 292:12 -23330=4,3,1,1,384 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_257 1 1 292:12 294:12 -23330=4,3,1,1,24 0=24 1=1 4=-233 5=1 6=9216
|
||||
Swish op_258 1 1 294:12 296:12 -23330=4,3,1,1,24
|
||||
Convolution op_259 1 1 296:12 299:12 -23330=4,3,1,1,384 0=384 1=1 4=-233 5=1 6=9216 9=4
|
||||
BinaryOp op_261 2 1 291:12_splitncnn_0 299:12 300:12 -23330=4,3,14,14,384 0=2
|
||||
Convolution op_262 1 1 300:12 301:12 -23330=4,3,14,14,96 0=96 1=1 4=-233 6=36864
|
||||
Eltwise op_263 2 1 282:12_splitncnn_0 301:12 302:12 -23330=4,3,14,14,96 0=1
|
||||
BinaryOp op_264 2 1 302:12 88:11 303:12 -23330=4,3,14,14,96
|
||||
Convolution op_265 1 1 303:12 305:12 -23330=4,3,14,14,576 0=576 1=1 4=-233 5=1 6=55296
|
||||
Swish op_266 1 1 305:12 307:12 -23330=4,3,14,14,576
|
||||
ConvolutionDepthWise op_267 1 1 307:12 310:12 -23330=4,3,14,14,576 0=576 1=3 4=-233 5=1 6=5184 7=576
|
||||
Swish op_269 1 1 310:12 312:12 -23330=4,3,14,14,576
|
||||
Split splitncnn_10 1 2 312:12 312:12_splitncnn_0 312:12_splitncnn_1 -23330=8,3,14,14,576,3,14,14,576
|
||||
Reduction op_270 1 1 312:12_splitncnn_1 313:12 -23330=4,3,1,1,576 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_271 1 1 313:12 315:12 -23330=4,3,1,1,24 0=24 1=1 4=-233 5=1 6=13824
|
||||
Swish op_272 1 1 315:12 317:12 -23330=4,3,1,1,24
|
||||
Convolution op_273 1 1 317:12 320:12 -23330=4,3,1,1,576 0=576 1=1 4=-233 5=1 6=13824 9=4
|
||||
BinaryOp op_275 2 1 312:12_splitncnn_0 320:12 321:12 -23330=4,3,14,14,576 0=2
|
||||
Convolution op_276 1 1 321:12 323:12 -23330=4,3,14,14,112 0=112 1=1 4=-233 5=1 6=64512
|
||||
Split splitncnn_11 1 2 323:12 323:12_splitncnn_0 323:12_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
|
||||
Convolution op_277 1 1 323:12_splitncnn_1 325:12 -23330=4,3,14,14,672 0=672 1=1 4=-233 5=1 6=75264
|
||||
Swish op_278 1 1 325:12 327:12 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise op_279 1 1 327:12 330:12 -23330=4,3,14,14,672 0=672 1=3 4=-233 5=1 6=6048 7=672
|
||||
Swish op_281 1 1 330:12 332:12 -23330=4,3,14,14,672
|
||||
Split splitncnn_12 1 2 332:12 332:12_splitncnn_0 332:12_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
|
||||
Reduction op_282 1 1 332:12_splitncnn_1 333:12 -23330=4,3,1,1,672 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_283 1 1 333:12 335:12 -23330=4,3,1,1,28 0=28 1=1 4=-233 5=1 6=18816
|
||||
Swish op_284 1 1 335:12 337:12 -23330=4,3,1,1,28
|
||||
Convolution op_285 1 1 337:12 340:12 -23330=4,3,1,1,672 0=672 1=1 4=-233 5=1 6=18816 9=4
|
||||
BinaryOp op_287 2 1 332:12_splitncnn_0 340:12 341:12 -23330=4,3,14,14,672 0=2
|
||||
Convolution op_288 1 1 341:12 342:12 -23330=4,3,14,14,112 0=112 1=1 4=-233 6=75264
|
||||
Eltwise op_289 2 1 323:12_splitncnn_0 342:12 343:12 -23330=4,3,14,14,112 0=1
|
||||
BinaryOp op_290 2 1 343:12 110:12 344:12 -23330=4,3,14,14,112
|
||||
Split splitncnn_13 1 2 344:12 344:12_splitncnn_0 344:12_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
|
||||
Convolution op_291 1 1 344:12_splitncnn_1 346:12 -23330=4,3,14,14,672 0=672 1=1 4=-233 5=1 6=75264
|
||||
Swish op_292 1 1 346:12 348:12 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise op_293 1 1 348:12 351:12 -23330=4,3,14,14,672 0=672 1=3 4=-233 5=1 6=6048 7=672
|
||||
Swish op_295 1 1 351:12 353:12 -23330=4,3,14,14,672
|
||||
Split splitncnn_14 1 2 353:12 353:12_splitncnn_0 353:12_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
|
||||
Reduction op_296 1 1 353:12_splitncnn_1 354:12 -23330=4,3,1,1,672 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_297 1 1 354:12 356:12 -23330=4,3,1,1,28 0=28 1=1 4=-233 5=1 6=18816
|
||||
Swish op_298 1 1 356:12 358:12 -23330=4,3,1,1,28
|
||||
Convolution op_299 1 1 358:12 361:12 -23330=4,3,1,1,672 0=672 1=1 4=-233 5=1 6=18816 9=4
|
||||
BinaryOp op_301 2 1 353:12_splitncnn_0 361:12 362:12 -23330=4,3,14,14,672 0=2
|
||||
Convolution op_302 1 1 362:12 363:12 -23330=4,3,14,14,112 0=112 1=1 4=-233 6=75264
|
||||
Eltwise op_303 2 1 363:12 344:12_splitncnn_0 364:12 -23330=4,3,14,14,112 0=1
|
||||
BinaryOp op_304 2 1 364:12 33:11 365:12 -23330=4,3,14,14,112
|
||||
Split splitncnn_15 1 2 365:12 365:12_splitncnn_0 365:12_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
|
||||
Convolution op_305 1 1 365:12_splitncnn_1 367:12 -23330=4,3,14,14,672 0=672 1=1 4=-233 5=1 6=75264
|
||||
Swish op_306 1 1 367:12 369:12 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise op_307 1 1 369:12 372:12 -23330=4,3,14,14,672 0=672 1=3 4=-233 5=1 6=6048 7=672
|
||||
Swish op_309 1 1 372:12 374:12 -23330=4,3,14,14,672
|
||||
Split splitncnn_16 1 2 374:12 374:12_splitncnn_0 374:12_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
|
||||
Reduction op_310 1 1 374:12_splitncnn_1 375:12 -23330=4,3,1,1,672 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_311 1 1 375:12 377:12 -23330=4,3,1,1,28 0=28 1=1 4=-233 5=1 6=18816
|
||||
Swish op_312 1 1 377:12 379:12 -23330=4,3,1,1,28
|
||||
Convolution op_313 1 1 379:12 382:12 -23330=4,3,1,1,672 0=672 1=1 4=-233 5=1 6=18816 9=4
|
||||
BinaryOp op_315 2 1 374:12_splitncnn_0 382:12 383:12 -23330=4,3,14,14,672 0=2
|
||||
Convolution op_316 1 1 383:12 384:12 -23330=4,3,14,14,112 0=112 1=1 4=-233 6=75264
|
||||
Eltwise op_317 2 1 365:12_splitncnn_0 384:12 385:12 -23330=4,3,14,14,112 0=1
|
||||
BinaryOp op_318 2 1 385:12 44:11 386:12 -23330=4,3,14,14,112
|
||||
Split splitncnn_17 1 2 386:12 386:12_splitncnn_0 386:12_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
|
||||
Convolution op_319 1 1 386:12_splitncnn_1 388:12 -23330=4,3,14,14,672 0=672 1=1 4=-233 5=1 6=75264
|
||||
Swish op_320 1 1 388:12 390:12 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise op_321 1 1 390:12 393:12 -23330=4,3,14,14,672 0=672 1=3 4=-233 5=1 6=6048 7=672
|
||||
Swish op_323 1 1 393:12 395:12 -23330=4,3,14,14,672
|
||||
Split splitncnn_18 1 2 395:12 395:12_splitncnn_0 395:12_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
|
||||
Reduction op_324 1 1 395:12_splitncnn_1 396:12 -23330=4,3,1,1,672 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_325 1 1 396:12 398:12 -23330=4,3,1,1,28 0=28 1=1 4=-233 5=1 6=18816
|
||||
Swish op_326 1 1 398:12 400:12 -23330=4,3,1,1,28
|
||||
Convolution op_327 1 1 400:12 403:12 -23330=4,3,1,1,672 0=672 1=1 4=-233 5=1 6=18816 9=4
|
||||
BinaryOp op_329 2 1 395:12_splitncnn_0 403:12 404:12 -23330=4,3,14,14,672 0=2
|
||||
Convolution op_330 1 1 404:12 405:12 -23330=4,3,14,14,112 0=112 1=1 4=-233 6=75264
|
||||
Eltwise op_331 2 1 386:12_splitncnn_0 405:12 406:12 -23330=4,3,14,14,112 0=1
|
||||
BinaryOp op_332 2 1 406:12 55:11 407:12_splitncnn_0 -23330=4,3,14,14,112
|
||||
Convolution op_334 1 1 407:12_splitncnn_0 410:12 -23330=4,3,14,14,672 0=672 1=1 4=-233 5=1 6=75264
|
||||
Swish op_335 1 1 410:12 412:12 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise op_336 1 1 412:12 415:12 -23330=4,3,7,7,672 0=672 1=3 3=2 4=-233 5=1 6=6048 7=672
|
||||
Swish op_338 1 1 415:12 417:12 -23330=4,3,7,7,672
|
||||
Split splitncnn_20 1 2 417:12 417:12_splitncnn_0 417:12_splitncnn_1 -23330=8,3,7,7,672,3,7,7,672
|
||||
Reduction op_339 1 1 417:12_splitncnn_1 418:12 -23330=4,3,1,1,672 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_340 1 1 418:12 420:12 -23330=4,3,1,1,28 0=28 1=1 4=-233 5=1 6=18816
|
||||
Swish op_341 1 1 420:12 422:12 -23330=4,3,1,1,28
|
||||
Convolution op_342 1 1 422:12 425:12 -23330=4,3,1,1,672 0=672 1=1 4=-233 5=1 6=18816 9=4
|
||||
BinaryOp op_344 2 1 417:12_splitncnn_0 425:12 426:12 -23330=4,3,7,7,672 0=2
|
||||
Convolution op_345 1 1 426:12 428:12 -23330=4,3,7,7,192 0=192 1=1 4=-233 5=1 6=129024
|
||||
Split splitncnn_21 1 2 428:12 428:12_splitncnn_0 428:12_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution op_346 1 1 428:12_splitncnn_1 430:12 -23330=4,3,7,7,1152 0=1152 1=1 4=-233 5=1 6=221184
|
||||
Swish op_347 1 1 430:12 432:12 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise op_348 1 1 432:12 435:12 -23330=4,3,7,7,1152 0=1152 1=3 4=-233 5=1 6=10368 7=1152
|
||||
Swish op_350 1 1 435:12 437:12 -23330=4,3,7,7,1152
|
||||
Split splitncnn_22 1 2 437:12 437:12_splitncnn_0 437:12_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Reduction op_351 1 1 437:12_splitncnn_1 438:12 -23330=4,3,1,1,1152 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_352 1 1 438:12 440:12 -23330=4,3,1,1,48 0=48 1=1 4=-233 5=1 6=55296
|
||||
Swish op_353 1 1 440:12 442:12 -23330=4,3,1,1,48
|
||||
Convolution op_354 1 1 442:12 445:12 -23330=4,3,1,1,1152 0=1152 1=1 4=-233 5=1 6=55296 9=4
|
||||
BinaryOp op_356 2 1 437:12_splitncnn_0 445:12 446:12 -23330=4,3,7,7,1152 0=2
|
||||
Convolution op_357 1 1 446:12 447:12 -23330=4,3,7,7,192 0=192 1=1 4=-233 6=221184
|
||||
Eltwise op_358 2 1 428:12_splitncnn_0 447:12 448:12 -23330=4,3,7,7,192 0=1
|
||||
BinaryOp op_359 2 1 448:12 133:12 449:12 -23330=4,3,7,7,192
|
||||
Split splitncnn_23 1 2 449:12 449:12_splitncnn_0 449:12_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution op_360 1 1 449:12_splitncnn_1 451:12 -23330=4,3,7,7,1152 0=1152 1=1 4=-233 5=1 6=221184
|
||||
Swish op_361 1 1 451:12 453:12 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise op_362 1 1 453:12 456:12 -23330=4,3,7,7,1152 0=1152 1=3 4=-233 5=1 6=10368 7=1152
|
||||
Swish op_364 1 1 456:12 458:12 -23330=4,3,7,7,1152
|
||||
Split splitncnn_24 1 2 458:12 458:12_splitncnn_0 458:12_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Reduction op_365 1 1 458:12_splitncnn_1 459:12 -23330=4,3,1,1,1152 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_366 1 1 459:12 461:12 -23330=4,3,1,1,48 0=48 1=1 4=-233 5=1 6=55296
|
||||
Swish op_367 1 1 461:12 463:12 -23330=4,3,1,1,48
|
||||
Convolution op_368 1 1 463:12 466:12 -23330=4,3,1,1,1152 0=1152 1=1 4=-233 5=1 6=55296 9=4
|
||||
BinaryOp op_370 2 1 458:12_splitncnn_0 466:12 467:12 -23330=4,3,7,7,1152 0=2
|
||||
Convolution op_371 1 1 467:12 468:12 -23330=4,3,7,7,192 0=192 1=1 4=-233 6=221184
|
||||
Eltwise op_372 2 1 449:12_splitncnn_0 468:12 469:12 -23330=4,3,7,7,192 0=1
|
||||
BinaryOp op_373 2 1 469:12 144:12 470:12 -23330=4,3,7,7,192
|
||||
Split splitncnn_25 1 2 470:12 470:12_splitncnn_0 470:12_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution op_374 1 1 470:12_splitncnn_1 472:12 -23330=4,3,7,7,1152 0=1152 1=1 4=-233 5=1 6=221184
|
||||
Swish op_375 1 1 472:12 474:12 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise op_376 1 1 474:12 477:12 -23330=4,3,7,7,1152 0=1152 1=3 4=-233 5=1 6=10368 7=1152
|
||||
Swish op_378 1 1 477:12 479:12 -23330=4,3,7,7,1152
|
||||
Split splitncnn_26 1 2 479:12 479:12_splitncnn_0 479:12_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Reduction op_379 1 1 479:12_splitncnn_1 480:12 -23330=4,3,1,1,1152 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_380 1 1 480:12 482:12 -23330=4,3,1,1,48 0=48 1=1 4=-233 5=1 6=55296
|
||||
Swish op_381 1 1 482:12 484:12 -23330=4,3,1,1,48
|
||||
Convolution op_382 1 1 484:12 487:12 -23330=4,3,1,1,1152 0=1152 1=1 4=-233 5=1 6=55296 9=4
|
||||
BinaryOp op_384 2 1 479:12_splitncnn_0 487:12 488:12 -23330=4,3,7,7,1152 0=2
|
||||
Convolution op_385 1 1 488:12 489:12 -23330=4,3,7,7,192 0=192 1=1 4=-233 6=221184
|
||||
Eltwise op_386 2 1 470:12_splitncnn_0 489:12 490:12 -23330=4,3,7,7,192 0=1
|
||||
BinaryOp op_387 2 1 490:12 155:12 491:12 -23330=4,3,7,7,192
|
||||
Split splitncnn_27 1 2 491:12 491:12_splitncnn_0 491:12_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution op_388 1 1 491:12_splitncnn_1 493:12 -23330=4,3,7,7,1152 0=1152 1=1 4=-233 5=1 6=221184
|
||||
Swish op_389 1 1 493:12 495:12 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise op_390 1 1 495:12 498:12 -23330=4,3,7,7,1152 0=1152 1=3 4=-233 5=1 6=10368 7=1152
|
||||
Swish op_392 1 1 498:12 500:12 -23330=4,3,7,7,1152
|
||||
Split splitncnn_28 1 2 500:12 500:12_splitncnn_0 500:12_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Reduction op_393 1 1 500:12_splitncnn_1 501:12 -23330=4,3,1,1,1152 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_394 1 1 501:12 503:12 -23330=4,3,1,1,48 0=48 1=1 4=-233 5=1 6=55296
|
||||
Swish op_395 1 1 503:12 505:12 -23330=4,3,1,1,48
|
||||
Convolution op_396 1 1 505:12 508:12 -23330=4,3,1,1,1152 0=1152 1=1 4=-233 5=1 6=55296 9=4
|
||||
BinaryOp op_398 2 1 500:12_splitncnn_0 508:12 509:12 -23330=4,3,7,7,1152 0=2
|
||||
Convolution op_399 1 1 509:12 510:12 -23330=4,3,7,7,192 0=192 1=1 4=-233 6=221184
|
||||
Eltwise op_400 2 1 491:12_splitncnn_0 510:12 511:12 -23330=4,3,7,7,192 0=1
|
||||
BinaryOp op_401 2 1 511:12 166:12 512:12 -23330=4,3,7,7,192
|
||||
Split splitncnn_29 1 2 512:12 512:12_splitncnn_0 512:12_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution op_402 1 1 512:12_splitncnn_1 514:12 -23330=4,3,7,7,1152 0=1152 1=1 4=-233 5=1 6=221184
|
||||
Swish op_403 1 1 514:12 516:12 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise op_404 1 1 516:12 519:12 -23330=4,3,7,7,1152 0=1152 1=3 4=-233 5=1 6=10368 7=1152
|
||||
Swish op_406 1 1 519:12 521:12 -23330=4,3,7,7,1152
|
||||
Split splitncnn_30 1 2 521:12 521:12_splitncnn_0 521:12_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Reduction op_407 1 1 521:12_splitncnn_1 522:12 -23330=4,3,1,1,1152 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_408 1 1 522:12 524:12 -23330=4,3,1,1,48 0=48 1=1 4=-233 5=1 6=55296
|
||||
Swish op_409 1 1 524:12 526:12 -23330=4,3,1,1,48
|
||||
Convolution op_410 1 1 526:12 529:12 -23330=4,3,1,1,1152 0=1152 1=1 4=-233 5=1 6=55296 9=4
|
||||
BinaryOp op_412 2 1 521:12_splitncnn_0 529:12 530:12 -23330=4,3,7,7,1152 0=2
|
||||
Convolution op_413 1 1 530:12 531:12 -23330=4,3,7,7,192 0=192 1=1 4=-233 6=221184
|
||||
Eltwise op_414 2 1 512:12_splitncnn_0 531:12 532:12 -23330=4,3,7,7,192 0=1
|
||||
BinaryOp op_415 2 1 532:12 177:12 533:12 -23330=4,3,7,7,192
|
||||
Split splitncnn_31 1 2 533:12 533:12_splitncnn_0 533:12_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution op_416 1 1 533:12_splitncnn_1 535:12 -23330=4,3,7,7,1152 0=1152 1=1 4=-233 5=1 6=221184
|
||||
Swish op_417 1 1 535:12 537:12 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise op_418 1 1 537:12 540:12 -23330=4,3,7,7,1152 0=1152 1=3 4=-233 5=1 6=10368 7=1152
|
||||
Swish op_420 1 1 540:12 542:12 -23330=4,3,7,7,1152
|
||||
Split splitncnn_32 1 2 542:12 542:12_splitncnn_0 542:12_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Reduction op_421 1 1 542:12_splitncnn_1 543:12 -23330=4,3,1,1,1152 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_422 1 1 543:12 545:12 -23330=4,3,1,1,48 0=48 1=1 4=-233 5=1 6=55296
|
||||
Swish op_423 1 1 545:12 547:12 -23330=4,3,1,1,48
|
||||
Convolution op_424 1 1 547:12 550:12 -23330=4,3,1,1,1152 0=1152 1=1 4=-233 5=1 6=55296 9=4
|
||||
BinaryOp op_426 2 1 542:12_splitncnn_0 550:12 551:12 -23330=4,3,7,7,1152 0=2
|
||||
Convolution op_427 1 1 551:12 552:12 -23330=4,3,7,7,192 0=192 1=1 4=-233 6=221184
|
||||
Eltwise op_428 2 1 533:12_splitncnn_0 552:12 553:12 -23330=4,3,7,7,192 0=1
|
||||
BinaryOp op_429 2 1 553:12 188:12 554:12 -23330=4,3,7,7,192
|
||||
Split splitncnn_33 1 2 554:12 554:12_splitncnn_0 554:12_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution op_430 1 1 554:12_splitncnn_1 556:12 -23330=4,3,7,7,1152 0=1152 1=1 4=-233 5=1 6=221184
|
||||
Swish op_431 1 1 556:12 558:12 -23330=4,3,7,7,1152
|
||||
ConvolutionDepthWise op_432 1 1 558:12 561:12 -23330=4,3,7,7,1152 0=1152 1=3 4=-233 5=1 6=10368 7=1152
|
||||
Swish op_434 1 1 561:12 563:12 -23330=4,3,7,7,1152
|
||||
Split splitncnn_34 1 2 563:12 563:12_splitncnn_0 563:12_splitncnn_1 -23330=8,3,7,7,1152,3,7,7,1152
|
||||
Reduction op_435 1 1 563:12_splitncnn_1 564:12 -23330=4,3,1,1,1152 0=3 1=0 -23303=2,1,2 4=1 5=1
|
||||
Convolution op_436 1 1 564:12 566:12 -23330=4,3,1,1,48 0=48 1=1 4=-233 5=1 6=55296
|
||||
Swish op_437 1 1 566:12 568:12 -23330=4,3,1,1,48
|
||||
Convolution op_438 1 1 568:12 571:12 -23330=4,3,1,1,1152 0=1152 1=1 4=-233 5=1 6=55296 9=4
|
||||
BinaryOp op_440 2 1 563:12_splitncnn_0 571:12 572:12 -23330=4,3,7,7,1152 0=2
|
||||
Convolution op_441 1 1 572:12 573:12 -23330=4,3,7,7,192 0=192 1=1 4=-233 6=221184
|
||||
Eltwise op_442 2 1 554:12_splitncnn_0 573:12 574:12 -23330=4,3,7,7,192 0=1
|
||||
BinaryOp op_443 2 1 574:12 199:12 575:12_splitncnn_0 -23330=4,3,7,7,192
|
||||
Convolution op_445 1 1 575:12_splitncnn_0 578:12 -23330=4,3,7,7,1280 0=1280 1=1 4=-233 5=1 6=245760
|
||||
Swish op_446 1 1 578:12 580:12 -23330=4,3,7,7,1280
|
||||
Pooling op_447 1 1 580:12 581:12 -23330=4,1,1280,1,1 0=1 4=1
|
||||
InnerProduct op_448 1 1 581:12 584:12 -23330=4,1,1000,1,1 0=1000 1=1 2=1280000
|
96
3rdparty/ncnn/benchmark/googlenet.param
vendored
Normal file
96
3rdparty/ncnn/benchmark/googlenet.param
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
7767517
|
||||
94 121
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution conv1/7x7_s2 1 1 data conv1/7x7_s2_conv1/relu_7x7 -23330=4,3,112,112,64 0=64 1=7 3=2 4=3 5=1 6=9408 9=1
|
||||
Pooling pool1/3x3_s2 1 1 conv1/7x7_s2_conv1/relu_7x7 pool1/3x3_s2 -23330=4,3,56,56,64 1=3 2=2
|
||||
LRN pool1/norm1 1 1 pool1/3x3_s2 pool1/norm1 -23330=4,3,56,56,64 2=1.000000e-04
|
||||
Convolution conv2/3x3_reduce 1 1 pool1/norm1 conv2/3x3_reduce_conv2/relu_3x3_reduce -23330=4,3,56,56,64 0=64 1=1 5=1 6=4096 9=1
|
||||
Convolution conv2/3x3 1 1 conv2/3x3_reduce_conv2/relu_3x3_reduce conv2/3x3_conv2/relu_3x3 -23330=4,3,56,56,192 0=192 1=3 4=1 5=1 6=110592 9=1
|
||||
LRN conv2/norm2 1 1 conv2/3x3_conv2/relu_3x3 conv2/norm2 -23330=4,3,56,56,192 2=1.000000e-04
|
||||
Pooling pool2/3x3_s2 1 1 conv2/norm2 pool2/3x3_s2 -23330=4,3,28,28,192 1=3 2=2
|
||||
Split splitncnn_0 1 4 pool2/3x3_s2 pool2/3x3_s2_splitncnn_0 pool2/3x3_s2_splitncnn_1 pool2/3x3_s2_splitncnn_2 pool2/3x3_s2_splitncnn_3 -23330=16,3,28,28,192,3,28,28,192,3,28,28,192,3,28,28,192
|
||||
Convolution inception_3a/1x1 1 1 pool2/3x3_s2_splitncnn_3 inception_3a/1x1_inception_3a/relu_1x1 -23330=4,3,28,28,64 0=64 1=1 5=1 6=12288 9=1
|
||||
Convolution inception_3a/3x3_reduce 1 1 pool2/3x3_s2_splitncnn_2 inception_3a/3x3_reduce_inception_3a/relu_3x3_reduce -23330=4,3,28,28,96 0=96 1=1 5=1 6=18432 9=1
|
||||
Convolution inception_3a/3x3 1 1 inception_3a/3x3_reduce_inception_3a/relu_3x3_reduce inception_3a/3x3_inception_3a/relu_3x3 -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=110592 9=1
|
||||
Convolution inception_3a/5x5_reduce 1 1 pool2/3x3_s2_splitncnn_1 inception_3a/5x5_reduce_inception_3a/relu_5x5_reduce -23330=4,3,28,28,16 0=16 1=1 5=1 6=3072 9=1
|
||||
Convolution inception_3a/5x5 1 1 inception_3a/5x5_reduce_inception_3a/relu_5x5_reduce inception_3a/5x5_inception_3a/relu_5x5 -23330=4,3,28,28,32 0=32 1=5 4=2 5=1 6=12800 9=1
|
||||
Pooling inception_3a/pool 1 1 pool2/3x3_s2_splitncnn_0 inception_3a/pool -23330=4,3,28,28,192 1=3 3=1
|
||||
Convolution inception_3a/pool_proj 1 1 inception_3a/pool inception_3a/pool_proj_inception_3a/relu_pool_proj -23330=4,3,28,28,32 0=32 1=1 5=1 6=6144 9=1
|
||||
Concat inception_3a/output 4 1 inception_3a/1x1_inception_3a/relu_1x1 inception_3a/3x3_inception_3a/relu_3x3 inception_3a/5x5_inception_3a/relu_5x5 inception_3a/pool_proj_inception_3a/relu_pool_proj inception_3a/output -23330=4,3,28,28,256
|
||||
Split splitncnn_1 1 4 inception_3a/output inception_3a/output_splitncnn_0 inception_3a/output_splitncnn_1 inception_3a/output_splitncnn_2 inception_3a/output_splitncnn_3 -23330=16,3,28,28,256,3,28,28,256,3,28,28,256,3,28,28,256
|
||||
Convolution inception_3b/1x1 1 1 inception_3a/output_splitncnn_3 inception_3b/1x1_inception_3b/relu_1x1 -23330=4,3,28,28,128 0=128 1=1 5=1 6=32768 9=1
|
||||
Convolution inception_3b/3x3_reduce 1 1 inception_3a/output_splitncnn_2 inception_3b/3x3_reduce_inception_3b/relu_3x3_reduce -23330=4,3,28,28,128 0=128 1=1 5=1 6=32768 9=1
|
||||
Convolution inception_3b/3x3 1 1 inception_3b/3x3_reduce_inception_3b/relu_3x3_reduce inception_3b/3x3_inception_3b/relu_3x3 -23330=4,3,28,28,192 0=192 1=3 4=1 5=1 6=221184 9=1
|
||||
Convolution inception_3b/5x5_reduce 1 1 inception_3a/output_splitncnn_1 inception_3b/5x5_reduce_inception_3b/relu_5x5_reduce -23330=4,3,28,28,32 0=32 1=1 5=1 6=8192 9=1
|
||||
Convolution inception_3b/5x5 1 1 inception_3b/5x5_reduce_inception_3b/relu_5x5_reduce inception_3b/5x5_inception_3b/relu_5x5 -23330=4,3,28,28,96 0=96 1=5 4=2 5=1 6=76800 9=1
|
||||
Pooling inception_3b/pool 1 1 inception_3a/output_splitncnn_0 inception_3b/pool -23330=4,3,28,28,256 1=3 3=1
|
||||
Convolution inception_3b/pool_proj 1 1 inception_3b/pool inception_3b/pool_proj_inception_3b/relu_pool_proj -23330=4,3,28,28,64 0=64 1=1 5=1 6=16384 9=1
|
||||
Concat inception_3b/output 4 1 inception_3b/1x1_inception_3b/relu_1x1 inception_3b/3x3_inception_3b/relu_3x3 inception_3b/5x5_inception_3b/relu_5x5 inception_3b/pool_proj_inception_3b/relu_pool_proj inception_3b/output -23330=4,3,28,28,480
|
||||
Pooling pool3/3x3_s2 1 1 inception_3b/output pool3/3x3_s2 -23330=4,3,14,14,480 1=3 2=2
|
||||
Split splitncnn_2 1 4 pool3/3x3_s2 pool3/3x3_s2_splitncnn_0 pool3/3x3_s2_splitncnn_1 pool3/3x3_s2_splitncnn_2 pool3/3x3_s2_splitncnn_3 -23330=16,3,14,14,480,3,14,14,480,3,14,14,480,3,14,14,480
|
||||
Convolution inception_4a/1x1 1 1 pool3/3x3_s2_splitncnn_3 inception_4a/1x1_inception_4a/relu_1x1 -23330=4,3,14,14,192 0=192 1=1 5=1 6=92160 9=1
|
||||
Convolution inception_4a/3x3_reduce 1 1 pool3/3x3_s2_splitncnn_2 inception_4a/3x3_reduce_inception_4a/relu_3x3_reduce -23330=4,3,14,14,96 0=96 1=1 5=1 6=46080 9=1
|
||||
Convolution inception_4a/3x3 1 1 inception_4a/3x3_reduce_inception_4a/relu_3x3_reduce inception_4a/3x3_inception_4a/relu_3x3 -23330=4,3,14,14,208 0=208 1=3 4=1 5=1 6=179712 9=1
|
||||
Convolution inception_4a/5x5_reduce 1 1 pool3/3x3_s2_splitncnn_1 inception_4a/5x5_reduce_inception_4a/relu_5x5_reduce -23330=4,3,14,14,16 0=16 1=1 5=1 6=7680 9=1
|
||||
Convolution inception_4a/5x5 1 1 inception_4a/5x5_reduce_inception_4a/relu_5x5_reduce inception_4a/5x5_inception_4a/relu_5x5 -23330=4,3,14,14,48 0=48 1=5 4=2 5=1 6=19200 9=1
|
||||
Pooling inception_4a/pool 1 1 pool3/3x3_s2_splitncnn_0 inception_4a/pool -23330=4,3,14,14,480 1=3 3=1
|
||||
Convolution inception_4a/pool_proj 1 1 inception_4a/pool inception_4a/pool_proj_inception_4a/relu_pool_proj -23330=4,3,14,14,64 0=64 1=1 5=1 6=30720 9=1
|
||||
Concat inception_4a/output 4 1 inception_4a/1x1_inception_4a/relu_1x1 inception_4a/3x3_inception_4a/relu_3x3 inception_4a/5x5_inception_4a/relu_5x5 inception_4a/pool_proj_inception_4a/relu_pool_proj inception_4a/output -23330=4,3,14,14,512
|
||||
Split splitncnn_3 1 4 inception_4a/output inception_4a/output_splitncnn_0 inception_4a/output_splitncnn_1 inception_4a/output_splitncnn_2 inception_4a/output_splitncnn_3 -23330=16,3,14,14,512,3,14,14,512,3,14,14,512,3,14,14,512
|
||||
Convolution inception_4b/1x1 1 1 inception_4a/output_splitncnn_3 inception_4b/1x1_inception_4b/relu_1x1 -23330=4,3,14,14,160 0=160 1=1 5=1 6=81920 9=1
|
||||
Convolution inception_4b/3x3_reduce 1 1 inception_4a/output_splitncnn_2 inception_4b/3x3_reduce_inception_4b/relu_3x3_reduce -23330=4,3,14,14,112 0=112 1=1 5=1 6=57344 9=1
|
||||
Convolution inception_4b/3x3 1 1 inception_4b/3x3_reduce_inception_4b/relu_3x3_reduce inception_4b/3x3_inception_4b/relu_3x3 -23330=4,3,14,14,224 0=224 1=3 4=1 5=1 6=225792 9=1
|
||||
Convolution inception_4b/5x5_reduce 1 1 inception_4a/output_splitncnn_1 inception_4b/5x5_reduce_inception_4b/relu_5x5_reduce -23330=4,3,14,14,24 0=24 1=1 5=1 6=12288 9=1
|
||||
Convolution inception_4b/5x5 1 1 inception_4b/5x5_reduce_inception_4b/relu_5x5_reduce inception_4b/5x5_inception_4b/relu_5x5 -23330=4,3,14,14,64 0=64 1=5 4=2 5=1 6=38400 9=1
|
||||
Pooling inception_4b/pool 1 1 inception_4a/output_splitncnn_0 inception_4b/pool -23330=4,3,14,14,512 1=3 3=1
|
||||
Convolution inception_4b/pool_proj 1 1 inception_4b/pool inception_4b/pool_proj_inception_4b/relu_pool_proj -23330=4,3,14,14,64 0=64 1=1 5=1 6=32768 9=1
|
||||
Concat inception_4b/output 4 1 inception_4b/1x1_inception_4b/relu_1x1 inception_4b/3x3_inception_4b/relu_3x3 inception_4b/5x5_inception_4b/relu_5x5 inception_4b/pool_proj_inception_4b/relu_pool_proj inception_4b/output -23330=4,3,14,14,512
|
||||
Split splitncnn_4 1 4 inception_4b/output inception_4b/output_splitncnn_0 inception_4b/output_splitncnn_1 inception_4b/output_splitncnn_2 inception_4b/output_splitncnn_3 -23330=16,3,14,14,512,3,14,14,512,3,14,14,512,3,14,14,512
|
||||
Convolution inception_4c/1x1 1 1 inception_4b/output_splitncnn_3 inception_4c/1x1_inception_4c/relu_1x1 -23330=4,3,14,14,128 0=128 1=1 5=1 6=65536 9=1
|
||||
Convolution inception_4c/3x3_reduce 1 1 inception_4b/output_splitncnn_2 inception_4c/3x3_reduce_inception_4c/relu_3x3_reduce -23330=4,3,14,14,128 0=128 1=1 5=1 6=65536 9=1
|
||||
Convolution inception_4c/3x3 1 1 inception_4c/3x3_reduce_inception_4c/relu_3x3_reduce inception_4c/3x3_inception_4c/relu_3x3 -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=294912 9=1
|
||||
Convolution inception_4c/5x5_reduce 1 1 inception_4b/output_splitncnn_1 inception_4c/5x5_reduce_inception_4c/relu_5x5_reduce -23330=4,3,14,14,24 0=24 1=1 5=1 6=12288 9=1
|
||||
Convolution inception_4c/5x5 1 1 inception_4c/5x5_reduce_inception_4c/relu_5x5_reduce inception_4c/5x5_inception_4c/relu_5x5 -23330=4,3,14,14,64 0=64 1=5 4=2 5=1 6=38400 9=1
|
||||
Pooling inception_4c/pool 1 1 inception_4b/output_splitncnn_0 inception_4c/pool -23330=4,3,14,14,512 1=3 3=1
|
||||
Convolution inception_4c/pool_proj 1 1 inception_4c/pool inception_4c/pool_proj_inception_4c/relu_pool_proj -23330=4,3,14,14,64 0=64 1=1 5=1 6=32768 9=1
|
||||
Concat inception_4c/output 4 1 inception_4c/1x1_inception_4c/relu_1x1 inception_4c/3x3_inception_4c/relu_3x3 inception_4c/5x5_inception_4c/relu_5x5 inception_4c/pool_proj_inception_4c/relu_pool_proj inception_4c/output -23330=4,3,14,14,512
|
||||
Split splitncnn_5 1 4 inception_4c/output inception_4c/output_splitncnn_0 inception_4c/output_splitncnn_1 inception_4c/output_splitncnn_2 inception_4c/output_splitncnn_3 -23330=16,3,14,14,512,3,14,14,512,3,14,14,512,3,14,14,512
|
||||
Convolution inception_4d/1x1 1 1 inception_4c/output_splitncnn_3 inception_4d/1x1_inception_4d/relu_1x1 -23330=4,3,14,14,112 0=112 1=1 5=1 6=57344 9=1
|
||||
Convolution inception_4d/3x3_reduce 1 1 inception_4c/output_splitncnn_2 inception_4d/3x3_reduce_inception_4d/relu_3x3_reduce -23330=4,3,14,14,144 0=144 1=1 5=1 6=73728 9=1
|
||||
Convolution inception_4d/3x3 1 1 inception_4d/3x3_reduce_inception_4d/relu_3x3_reduce inception_4d/3x3_inception_4d/relu_3x3 -23330=4,3,14,14,288 0=288 1=3 4=1 5=1 6=373248 9=1
|
||||
Convolution inception_4d/5x5_reduce 1 1 inception_4c/output_splitncnn_1 inception_4d/5x5_reduce_inception_4d/relu_5x5_reduce -23330=4,3,14,14,32 0=32 1=1 5=1 6=16384 9=1
|
||||
Convolution inception_4d/5x5 1 1 inception_4d/5x5_reduce_inception_4d/relu_5x5_reduce inception_4d/5x5_inception_4d/relu_5x5 -23330=4,3,14,14,64 0=64 1=5 4=2 5=1 6=51200 9=1
|
||||
Pooling inception_4d/pool 1 1 inception_4c/output_splitncnn_0 inception_4d/pool -23330=4,3,14,14,512 1=3 3=1
|
||||
Convolution inception_4d/pool_proj 1 1 inception_4d/pool inception_4d/pool_proj_inception_4d/relu_pool_proj -23330=4,3,14,14,64 0=64 1=1 5=1 6=32768 9=1
|
||||
Concat inception_4d/output 4 1 inception_4d/1x1_inception_4d/relu_1x1 inception_4d/3x3_inception_4d/relu_3x3 inception_4d/5x5_inception_4d/relu_5x5 inception_4d/pool_proj_inception_4d/relu_pool_proj inception_4d/output -23330=4,3,14,14,528
|
||||
Split splitncnn_6 1 4 inception_4d/output inception_4d/output_splitncnn_0 inception_4d/output_splitncnn_1 inception_4d/output_splitncnn_2 inception_4d/output_splitncnn_3 -23330=16,3,14,14,528,3,14,14,528,3,14,14,528,3,14,14,528
|
||||
Convolution inception_4e/1x1 1 1 inception_4d/output_splitncnn_3 inception_4e/1x1_inception_4e/relu_1x1 -23330=4,3,14,14,256 0=256 1=1 5=1 6=135168 9=1
|
||||
Convolution inception_4e/3x3_reduce 1 1 inception_4d/output_splitncnn_2 inception_4e/3x3_reduce_inception_4e/relu_3x3_reduce -23330=4,3,14,14,160 0=160 1=1 5=1 6=84480 9=1
|
||||
Convolution inception_4e/3x3 1 1 inception_4e/3x3_reduce_inception_4e/relu_3x3_reduce inception_4e/3x3_inception_4e/relu_3x3 -23330=4,3,14,14,320 0=320 1=3 4=1 5=1 6=460800 9=1
|
||||
Convolution inception_4e/5x5_reduce 1 1 inception_4d/output_splitncnn_1 inception_4e/5x5_reduce_inception_4e/relu_5x5_reduce -23330=4,3,14,14,32 0=32 1=1 5=1 6=16896 9=1
|
||||
Convolution inception_4e/5x5 1 1 inception_4e/5x5_reduce_inception_4e/relu_5x5_reduce inception_4e/5x5_inception_4e/relu_5x5 -23330=4,3,14,14,128 0=128 1=5 4=2 5=1 6=102400 9=1
|
||||
Pooling inception_4e/pool 1 1 inception_4d/output_splitncnn_0 inception_4e/pool -23330=4,3,14,14,528 1=3 3=1
|
||||
Convolution inception_4e/pool_proj 1 1 inception_4e/pool inception_4e/pool_proj_inception_4e/relu_pool_proj -23330=4,3,14,14,128 0=128 1=1 5=1 6=67584 9=1
|
||||
Concat inception_4e/output 4 1 inception_4e/1x1_inception_4e/relu_1x1 inception_4e/3x3_inception_4e/relu_3x3 inception_4e/5x5_inception_4e/relu_5x5 inception_4e/pool_proj_inception_4e/relu_pool_proj inception_4e/output -23330=4,3,14,14,832
|
||||
Pooling pool4/3x3_s2 1 1 inception_4e/output pool4/3x3_s2 -23330=4,3,7,7,832 1=3 2=2
|
||||
Split splitncnn_7 1 4 pool4/3x3_s2 pool4/3x3_s2_splitncnn_0 pool4/3x3_s2_splitncnn_1 pool4/3x3_s2_splitncnn_2 pool4/3x3_s2_splitncnn_3 -23330=16,3,7,7,832,3,7,7,832,3,7,7,832,3,7,7,832
|
||||
Convolution inception_5a/1x1 1 1 pool4/3x3_s2_splitncnn_3 inception_5a/1x1_inception_5a/relu_1x1 -23330=4,3,7,7,256 0=256 1=1 5=1 6=212992 9=1
|
||||
Convolution inception_5a/3x3_reduce 1 1 pool4/3x3_s2_splitncnn_2 inception_5a/3x3_reduce_inception_5a/relu_3x3_reduce -23330=4,3,7,7,160 0=160 1=1 5=1 6=133120 9=1
|
||||
Convolution inception_5a/3x3 1 1 inception_5a/3x3_reduce_inception_5a/relu_3x3_reduce inception_5a/3x3_inception_5a/relu_3x3 -23330=4,3,7,7,320 0=320 1=3 4=1 5=1 6=460800 9=1
|
||||
Convolution inception_5a/5x5_reduce 1 1 pool4/3x3_s2_splitncnn_1 inception_5a/5x5_reduce_inception_5a/relu_5x5_reduce -23330=4,3,7,7,32 0=32 1=1 5=1 6=26624 9=1
|
||||
Convolution inception_5a/5x5 1 1 inception_5a/5x5_reduce_inception_5a/relu_5x5_reduce inception_5a/5x5_inception_5a/relu_5x5 -23330=4,3,7,7,128 0=128 1=5 4=2 5=1 6=102400 9=1
|
||||
Pooling inception_5a/pool 1 1 pool4/3x3_s2_splitncnn_0 inception_5a/pool -23330=4,3,7,7,832 1=3 3=1
|
||||
Convolution inception_5a/pool_proj 1 1 inception_5a/pool inception_5a/pool_proj_inception_5a/relu_pool_proj -23330=4,3,7,7,128 0=128 1=1 5=1 6=106496 9=1
|
||||
Concat inception_5a/output 4 1 inception_5a/1x1_inception_5a/relu_1x1 inception_5a/3x3_inception_5a/relu_3x3 inception_5a/5x5_inception_5a/relu_5x5 inception_5a/pool_proj_inception_5a/relu_pool_proj inception_5a/output -23330=4,3,7,7,832
|
||||
Split splitncnn_8 1 4 inception_5a/output inception_5a/output_splitncnn_0 inception_5a/output_splitncnn_1 inception_5a/output_splitncnn_2 inception_5a/output_splitncnn_3 -23330=16,3,7,7,832,3,7,7,832,3,7,7,832,3,7,7,832
|
||||
Convolution inception_5b/1x1 1 1 inception_5a/output_splitncnn_3 inception_5b/1x1_inception_5b/relu_1x1 -23330=4,3,7,7,384 0=384 1=1 5=1 6=319488 9=1
|
||||
Convolution inception_5b/3x3_reduce 1 1 inception_5a/output_splitncnn_2 inception_5b/3x3_reduce_inception_5b/relu_3x3_reduce -23330=4,3,7,7,192 0=192 1=1 5=1 6=159744 9=1
|
||||
Convolution inception_5b/3x3 1 1 inception_5b/3x3_reduce_inception_5b/relu_3x3_reduce inception_5b/3x3_inception_5b/relu_3x3 -23330=4,3,7,7,384 0=384 1=3 4=1 5=1 6=663552 9=1
|
||||
Convolution inception_5b/5x5_reduce 1 1 inception_5a/output_splitncnn_1 inception_5b/5x5_reduce_inception_5b/relu_5x5_reduce -23330=4,3,7,7,48 0=48 1=1 5=1 6=39936 9=1
|
||||
Convolution inception_5b/5x5 1 1 inception_5b/5x5_reduce_inception_5b/relu_5x5_reduce inception_5b/5x5_inception_5b/relu_5x5 -23330=4,3,7,7,128 0=128 1=5 4=2 5=1 6=153600 9=1
|
||||
Pooling inception_5b/pool 1 1 inception_5a/output_splitncnn_0 inception_5b/pool -23330=4,3,7,7,832 1=3 3=1
|
||||
Convolution inception_5b/pool_proj 1 1 inception_5b/pool inception_5b/pool_proj_inception_5b/relu_pool_proj -23330=4,3,7,7,128 0=128 1=1 5=1 6=106496 9=1
|
||||
Concat inception_5b/output 4 1 inception_5b/1x1_inception_5b/relu_1x1 inception_5b/3x3_inception_5b/relu_3x3 inception_5b/5x5_inception_5b/relu_5x5 inception_5b/pool_proj_inception_5b/relu_pool_proj inception_5b/output -23330=4,3,7,7,1024
|
||||
Pooling pool5/7x7_s1 1 1 inception_5b/output pool5/7x7_s1_pool5/drop_7x7_s1 -23330=4,3,1,1,1024 0=1 1=7
|
||||
InnerProduct loss3/classifier 1 1 pool5/7x7_s1_pool5/drop_7x7_s1 loss3/classifier -23330=4,1,1000,1,1 0=1000 1=1 2=1024000
|
||||
Softmax prob 1 1 loss3/classifier output -23330=4,1,1000,1,1
|
96
3rdparty/ncnn/benchmark/googlenet_int8.param
vendored
Normal file
96
3rdparty/ncnn/benchmark/googlenet_int8.param
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
7767517
|
||||
94 121
|
||||
Input data 0 1 data 0=224 1=224 2=3
|
||||
Convolution conv1/7x7_s2 1 1 data conv1/7x7_s2_conv1/relu_7x7 0=64 1=7 3=2 4=3 5=1 6=9408 8=2 9=1
|
||||
Pooling pool1/3x3_s2 1 1 conv1/7x7_s2_conv1/relu_7x7 pool1/3x3_s2 1=3 2=2
|
||||
LRN pool1/norm1 1 1 pool1/3x3_s2 pool1/norm1 2=0.000100
|
||||
Convolution conv2/3x3_reduce 1 1 pool1/norm1 conv2/3x3_reduce_conv2/relu_3x3_reduce 0=64 1=1 5=1 6=4096 8=102 9=1
|
||||
Convolution conv2/3x3 1 1 conv2/3x3_reduce_conv2/relu_3x3_reduce conv2/3x3_conv2/relu_3x3 0=192 1=3 4=1 5=1 6=110592 8=2 9=1
|
||||
LRN conv2/norm2 1 1 conv2/3x3_conv2/relu_3x3 conv2/norm2 2=0.000100
|
||||
Pooling pool2/3x3_s2 1 1 conv2/norm2 pool2/3x3_s2 1=3 2=2
|
||||
Split splitncnn_0 1 4 pool2/3x3_s2 pool2/3x3_s2_splitncnn_0 pool2/3x3_s2_splitncnn_1 pool2/3x3_s2_splitncnn_2 pool2/3x3_s2_splitncnn_3
|
||||
Convolution inception_3a/1x1 1 1 pool2/3x3_s2_splitncnn_3 inception_3a/1x1_inception_3a/relu_1x1 0=64 1=1 5=1 6=12288 8=2 9=1
|
||||
Convolution inception_3a/3x3_reduce 1 1 pool2/3x3_s2_splitncnn_2 inception_3a/3x3_reduce_inception_3a/relu_3x3_reduce 0=96 1=1 5=1 6=18432 8=102 9=1
|
||||
Convolution inception_3a/3x3 1 1 inception_3a/3x3_reduce_inception_3a/relu_3x3_reduce inception_3a/3x3_inception_3a/relu_3x3 0=128 1=3 4=1 5=1 6=110592 8=2 9=1
|
||||
Convolution inception_3a/5x5_reduce 1 1 pool2/3x3_s2_splitncnn_1 inception_3a/5x5_reduce_inception_3a/relu_5x5_reduce 0=16 1=1 5=1 6=3072 8=102 9=1
|
||||
Convolution inception_3a/5x5 1 1 inception_3a/5x5_reduce_inception_3a/relu_5x5_reduce inception_3a/5x5_inception_3a/relu_5x5 0=32 1=5 4=2 5=1 6=12800 8=2 9=1
|
||||
Pooling inception_3a/pool 1 1 pool2/3x3_s2_splitncnn_0 inception_3a/pool 1=3 3=1
|
||||
Convolution inception_3a/pool_proj 1 1 inception_3a/pool inception_3a/pool_proj_inception_3a/relu_pool_proj 0=32 1=1 5=1 6=6144 8=2 9=1
|
||||
Concat inception_3a/output 4 1 inception_3a/1x1_inception_3a/relu_1x1 inception_3a/3x3_inception_3a/relu_3x3 inception_3a/5x5_inception_3a/relu_5x5 inception_3a/pool_proj_inception_3a/relu_pool_proj inception_3a/output
|
||||
Split splitncnn_1 1 4 inception_3a/output inception_3a/output_splitncnn_0 inception_3a/output_splitncnn_1 inception_3a/output_splitncnn_2 inception_3a/output_splitncnn_3
|
||||
Convolution inception_3b/1x1 1 1 inception_3a/output_splitncnn_3 inception_3b/1x1_inception_3b/relu_1x1 0=128 1=1 5=1 6=32768 8=2 9=1
|
||||
Convolution inception_3b/3x3_reduce 1 1 inception_3a/output_splitncnn_2 inception_3b/3x3_reduce_inception_3b/relu_3x3_reduce 0=128 1=1 5=1 6=32768 8=102 9=1
|
||||
Convolution inception_3b/3x3 1 1 inception_3b/3x3_reduce_inception_3b/relu_3x3_reduce inception_3b/3x3_inception_3b/relu_3x3 0=192 1=3 4=1 5=1 6=221184 8=2 9=1
|
||||
Convolution inception_3b/5x5_reduce 1 1 inception_3a/output_splitncnn_1 inception_3b/5x5_reduce_inception_3b/relu_5x5_reduce 0=32 1=1 5=1 6=8192 8=102 9=1
|
||||
Convolution inception_3b/5x5 1 1 inception_3b/5x5_reduce_inception_3b/relu_5x5_reduce inception_3b/5x5_inception_3b/relu_5x5 0=96 1=5 4=2 5=1 6=76800 8=2 9=1
|
||||
Pooling inception_3b/pool 1 1 inception_3a/output_splitncnn_0 inception_3b/pool 1=3 3=1
|
||||
Convolution inception_3b/pool_proj 1 1 inception_3b/pool inception_3b/pool_proj_inception_3b/relu_pool_proj 0=64 1=1 5=1 6=16384 8=2 9=1
|
||||
Concat inception_3b/output 4 1 inception_3b/1x1_inception_3b/relu_1x1 inception_3b/3x3_inception_3b/relu_3x3 inception_3b/5x5_inception_3b/relu_5x5 inception_3b/pool_proj_inception_3b/relu_pool_proj inception_3b/output
|
||||
Pooling pool3/3x3_s2 1 1 inception_3b/output pool3/3x3_s2 1=3 2=2
|
||||
Split splitncnn_2 1 4 pool3/3x3_s2 pool3/3x3_s2_splitncnn_0 pool3/3x3_s2_splitncnn_1 pool3/3x3_s2_splitncnn_2 pool3/3x3_s2_splitncnn_3
|
||||
Convolution inception_4a/1x1 1 1 pool3/3x3_s2_splitncnn_3 inception_4a/1x1_inception_4a/relu_1x1 0=192 1=1 5=1 6=92160 8=2 9=1
|
||||
Convolution inception_4a/3x3_reduce 1 1 pool3/3x3_s2_splitncnn_2 inception_4a/3x3_reduce_inception_4a/relu_3x3_reduce 0=96 1=1 5=1 6=46080 8=102 9=1
|
||||
Convolution inception_4a/3x3 1 1 inception_4a/3x3_reduce_inception_4a/relu_3x3_reduce inception_4a/3x3_inception_4a/relu_3x3 0=208 1=3 4=1 5=1 6=179712 8=2 9=1
|
||||
Convolution inception_4a/5x5_reduce 1 1 pool3/3x3_s2_splitncnn_1 inception_4a/5x5_reduce_inception_4a/relu_5x5_reduce 0=16 1=1 5=1 6=7680 8=102 9=1
|
||||
Convolution inception_4a/5x5 1 1 inception_4a/5x5_reduce_inception_4a/relu_5x5_reduce inception_4a/5x5_inception_4a/relu_5x5 0=48 1=5 4=2 5=1 6=19200 8=2 9=1
|
||||
Pooling inception_4a/pool 1 1 pool3/3x3_s2_splitncnn_0 inception_4a/pool 1=3 3=1
|
||||
Convolution inception_4a/pool_proj 1 1 inception_4a/pool inception_4a/pool_proj_inception_4a/relu_pool_proj 0=64 1=1 5=1 6=30720 8=2 9=1
|
||||
Concat inception_4a/output 4 1 inception_4a/1x1_inception_4a/relu_1x1 inception_4a/3x3_inception_4a/relu_3x3 inception_4a/5x5_inception_4a/relu_5x5 inception_4a/pool_proj_inception_4a/relu_pool_proj inception_4a/output
|
||||
Split splitncnn_3 1 4 inception_4a/output inception_4a/output_splitncnn_0 inception_4a/output_splitncnn_1 inception_4a/output_splitncnn_2 inception_4a/output_splitncnn_3
|
||||
Convolution inception_4b/1x1 1 1 inception_4a/output_splitncnn_3 inception_4b/1x1_inception_4b/relu_1x1 0=160 1=1 5=1 6=81920 8=2 9=1
|
||||
Convolution inception_4b/3x3_reduce 1 1 inception_4a/output_splitncnn_2 inception_4b/3x3_reduce_inception_4b/relu_3x3_reduce 0=112 1=1 5=1 6=57344 8=102 9=1
|
||||
Convolution inception_4b/3x3 1 1 inception_4b/3x3_reduce_inception_4b/relu_3x3_reduce inception_4b/3x3_inception_4b/relu_3x3 0=224 1=3 4=1 5=1 6=225792 8=2 9=1
|
||||
Convolution inception_4b/5x5_reduce 1 1 inception_4a/output_splitncnn_1 inception_4b/5x5_reduce_inception_4b/relu_5x5_reduce 0=24 1=1 5=1 6=12288 8=102 9=1
|
||||
Convolution inception_4b/5x5 1 1 inception_4b/5x5_reduce_inception_4b/relu_5x5_reduce inception_4b/5x5_inception_4b/relu_5x5 0=64 1=5 4=2 5=1 6=38400 8=2 9=1
|
||||
Pooling inception_4b/pool 1 1 inception_4a/output_splitncnn_0 inception_4b/pool 1=3 3=1
|
||||
Convolution inception_4b/pool_proj 1 1 inception_4b/pool inception_4b/pool_proj_inception_4b/relu_pool_proj 0=64 1=1 5=1 6=32768 8=2 9=1
|
||||
Concat inception_4b/output 4 1 inception_4b/1x1_inception_4b/relu_1x1 inception_4b/3x3_inception_4b/relu_3x3 inception_4b/5x5_inception_4b/relu_5x5 inception_4b/pool_proj_inception_4b/relu_pool_proj inception_4b/output
|
||||
Split splitncnn_4 1 4 inception_4b/output inception_4b/output_splitncnn_0 inception_4b/output_splitncnn_1 inception_4b/output_splitncnn_2 inception_4b/output_splitncnn_3
|
||||
Convolution inception_4c/1x1 1 1 inception_4b/output_splitncnn_3 inception_4c/1x1_inception_4c/relu_1x1 0=128 1=1 5=1 6=65536 8=2 9=1
|
||||
Convolution inception_4c/3x3_reduce 1 1 inception_4b/output_splitncnn_2 inception_4c/3x3_reduce_inception_4c/relu_3x3_reduce 0=128 1=1 5=1 6=65536 8=102 9=1
|
||||
Convolution inception_4c/3x3 1 1 inception_4c/3x3_reduce_inception_4c/relu_3x3_reduce inception_4c/3x3_inception_4c/relu_3x3 0=256 1=3 4=1 5=1 6=294912 8=2 9=1
|
||||
Convolution inception_4c/5x5_reduce 1 1 inception_4b/output_splitncnn_1 inception_4c/5x5_reduce_inception_4c/relu_5x5_reduce 0=24 1=1 5=1 6=12288 8=102 9=1
|
||||
Convolution inception_4c/5x5 1 1 inception_4c/5x5_reduce_inception_4c/relu_5x5_reduce inception_4c/5x5_inception_4c/relu_5x5 0=64 1=5 4=2 5=1 6=38400 8=2 9=1
|
||||
Pooling inception_4c/pool 1 1 inception_4b/output_splitncnn_0 inception_4c/pool 1=3 3=1
|
||||
Convolution inception_4c/pool_proj 1 1 inception_4c/pool inception_4c/pool_proj_inception_4c/relu_pool_proj 0=64 1=1 5=1 6=32768 8=2 9=1
|
||||
Concat inception_4c/output 4 1 inception_4c/1x1_inception_4c/relu_1x1 inception_4c/3x3_inception_4c/relu_3x3 inception_4c/5x5_inception_4c/relu_5x5 inception_4c/pool_proj_inception_4c/relu_pool_proj inception_4c/output
|
||||
Split splitncnn_5 1 4 inception_4c/output inception_4c/output_splitncnn_0 inception_4c/output_splitncnn_1 inception_4c/output_splitncnn_2 inception_4c/output_splitncnn_3
|
||||
Convolution inception_4d/1x1 1 1 inception_4c/output_splitncnn_3 inception_4d/1x1_inception_4d/relu_1x1 0=112 1=1 5=1 6=57344 8=2 9=1
|
||||
Convolution inception_4d/3x3_reduce 1 1 inception_4c/output_splitncnn_2 inception_4d/3x3_reduce_inception_4d/relu_3x3_reduce 0=144 1=1 5=1 6=73728 8=102 9=1
|
||||
Convolution inception_4d/3x3 1 1 inception_4d/3x3_reduce_inception_4d/relu_3x3_reduce inception_4d/3x3_inception_4d/relu_3x3 0=288 1=3 4=1 5=1 6=373248 8=2 9=1
|
||||
Convolution inception_4d/5x5_reduce 1 1 inception_4c/output_splitncnn_1 inception_4d/5x5_reduce_inception_4d/relu_5x5_reduce 0=32 1=1 5=1 6=16384 8=102 9=1
|
||||
Convolution inception_4d/5x5 1 1 inception_4d/5x5_reduce_inception_4d/relu_5x5_reduce inception_4d/5x5_inception_4d/relu_5x5 0=64 1=5 4=2 5=1 6=51200 8=2 9=1
|
||||
Pooling inception_4d/pool 1 1 inception_4c/output_splitncnn_0 inception_4d/pool 1=3 3=1
|
||||
Convolution inception_4d/pool_proj 1 1 inception_4d/pool inception_4d/pool_proj_inception_4d/relu_pool_proj 0=64 1=1 5=1 6=32768 8=2 9=1
|
||||
Concat inception_4d/output 4 1 inception_4d/1x1_inception_4d/relu_1x1 inception_4d/3x3_inception_4d/relu_3x3 inception_4d/5x5_inception_4d/relu_5x5 inception_4d/pool_proj_inception_4d/relu_pool_proj inception_4d/output
|
||||
Split splitncnn_6 1 4 inception_4d/output inception_4d/output_splitncnn_0 inception_4d/output_splitncnn_1 inception_4d/output_splitncnn_2 inception_4d/output_splitncnn_3
|
||||
Convolution inception_4e/1x1 1 1 inception_4d/output_splitncnn_3 inception_4e/1x1_inception_4e/relu_1x1 0=256 1=1 5=1 6=135168 8=2 9=1
|
||||
Convolution inception_4e/3x3_reduce 1 1 inception_4d/output_splitncnn_2 inception_4e/3x3_reduce_inception_4e/relu_3x3_reduce 0=160 1=1 5=1 6=84480 8=102 9=1
|
||||
Convolution inception_4e/3x3 1 1 inception_4e/3x3_reduce_inception_4e/relu_3x3_reduce inception_4e/3x3_inception_4e/relu_3x3 0=320 1=3 4=1 5=1 6=460800 8=2 9=1
|
||||
Convolution inception_4e/5x5_reduce 1 1 inception_4d/output_splitncnn_1 inception_4e/5x5_reduce_inception_4e/relu_5x5_reduce 0=32 1=1 5=1 6=16896 8=102 9=1
|
||||
Convolution inception_4e/5x5 1 1 inception_4e/5x5_reduce_inception_4e/relu_5x5_reduce inception_4e/5x5_inception_4e/relu_5x5 0=128 1=5 4=2 5=1 6=102400 8=2 9=1
|
||||
Pooling inception_4e/pool 1 1 inception_4d/output_splitncnn_0 inception_4e/pool 1=3 3=1
|
||||
Convolution inception_4e/pool_proj 1 1 inception_4e/pool inception_4e/pool_proj_inception_4e/relu_pool_proj 0=128 1=1 5=1 6=67584 8=2 9=1
|
||||
Concat inception_4e/output 4 1 inception_4e/1x1_inception_4e/relu_1x1 inception_4e/3x3_inception_4e/relu_3x3 inception_4e/5x5_inception_4e/relu_5x5 inception_4e/pool_proj_inception_4e/relu_pool_proj inception_4e/output
|
||||
Pooling pool4/3x3_s2 1 1 inception_4e/output pool4/3x3_s2 1=3 2=2
|
||||
Split splitncnn_7 1 4 pool4/3x3_s2 pool4/3x3_s2_splitncnn_0 pool4/3x3_s2_splitncnn_1 pool4/3x3_s2_splitncnn_2 pool4/3x3_s2_splitncnn_3
|
||||
Convolution inception_5a/1x1 1 1 pool4/3x3_s2_splitncnn_3 inception_5a/1x1_inception_5a/relu_1x1 0=256 1=1 5=1 6=212992 8=2 9=1
|
||||
Convolution inception_5a/3x3_reduce 1 1 pool4/3x3_s2_splitncnn_2 inception_5a/3x3_reduce_inception_5a/relu_3x3_reduce 0=160 1=1 5=1 6=133120 8=102 9=1
|
||||
Convolution inception_5a/3x3 1 1 inception_5a/3x3_reduce_inception_5a/relu_3x3_reduce inception_5a/3x3_inception_5a/relu_3x3 0=320 1=3 4=1 5=1 6=460800 8=2 9=1
|
||||
Convolution inception_5a/5x5_reduce 1 1 pool4/3x3_s2_splitncnn_1 inception_5a/5x5_reduce_inception_5a/relu_5x5_reduce 0=32 1=1 5=1 6=26624 8=102 9=1
|
||||
Convolution inception_5a/5x5 1 1 inception_5a/5x5_reduce_inception_5a/relu_5x5_reduce inception_5a/5x5_inception_5a/relu_5x5 0=128 1=5 4=2 5=1 6=102400 8=2 9=1
|
||||
Pooling inception_5a/pool 1 1 pool4/3x3_s2_splitncnn_0 inception_5a/pool 1=3 3=1
|
||||
Convolution inception_5a/pool_proj 1 1 inception_5a/pool inception_5a/pool_proj_inception_5a/relu_pool_proj 0=128 1=1 5=1 6=106496 8=2 9=1
|
||||
Concat inception_5a/output 4 1 inception_5a/1x1_inception_5a/relu_1x1 inception_5a/3x3_inception_5a/relu_3x3 inception_5a/5x5_inception_5a/relu_5x5 inception_5a/pool_proj_inception_5a/relu_pool_proj inception_5a/output
|
||||
Split splitncnn_8 1 4 inception_5a/output inception_5a/output_splitncnn_0 inception_5a/output_splitncnn_1 inception_5a/output_splitncnn_2 inception_5a/output_splitncnn_3
|
||||
Convolution inception_5b/1x1 1 1 inception_5a/output_splitncnn_3 inception_5b/1x1_inception_5b/relu_1x1 0=384 1=1 5=1 6=319488 8=2 9=1
|
||||
Convolution inception_5b/3x3_reduce 1 1 inception_5a/output_splitncnn_2 inception_5b/3x3_reduce_inception_5b/relu_3x3_reduce 0=192 1=1 5=1 6=159744 8=102 9=1
|
||||
Convolution inception_5b/3x3 1 1 inception_5b/3x3_reduce_inception_5b/relu_3x3_reduce inception_5b/3x3_inception_5b/relu_3x3 0=384 1=3 4=1 5=1 6=663552 8=2 9=1
|
||||
Convolution inception_5b/5x5_reduce 1 1 inception_5a/output_splitncnn_1 inception_5b/5x5_reduce_inception_5b/relu_5x5_reduce 0=48 1=1 5=1 6=39936 8=102 9=1
|
||||
Convolution inception_5b/5x5 1 1 inception_5b/5x5_reduce_inception_5b/relu_5x5_reduce inception_5b/5x5_inception_5b/relu_5x5 0=128 1=5 4=2 5=1 6=153600 8=2 9=1
|
||||
Pooling inception_5b/pool 1 1 inception_5a/output_splitncnn_0 inception_5b/pool 1=3 3=1
|
||||
Convolution inception_5b/pool_proj 1 1 inception_5b/pool inception_5b/pool_proj_inception_5b/relu_pool_proj 0=128 1=1 5=1 6=106496 8=2 9=1
|
||||
Concat inception_5b/output 4 1 inception_5b/1x1_inception_5b/relu_1x1 inception_5b/3x3_inception_5b/relu_3x3 inception_5b/5x5_inception_5b/relu_5x5 inception_5b/pool_proj_inception_5b/relu_pool_proj inception_5b/output
|
||||
Pooling pool5/7x7_s1 1 1 inception_5b/output pool5/7x7_s1_pool5/drop_7x7_s1 0=1 1=7
|
||||
InnerProduct loss3/classifier 1 1 pool5/7x7_s1_pool5/drop_7x7_s1 loss3/classifier 0=1000 1=1 2=1024000
|
||||
Softmax prob 1 1 loss3/classifier output
|
78
3rdparty/ncnn/benchmark/mnasnet.param
vendored
Normal file
78
3rdparty/ncnn/benchmark/mnasnet.param
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
7767517
|
||||
76 86
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution first-3x3-conv 1 1 data first-3x3-conv_relu -23330=4,3,112,112,32 0=32 1=3 3=2 4=1 5=1 6=864 9=1
|
||||
ConvolutionDepthWise A0_dw 1 1 first-3x3-conv_relu A0_dw_relu -23330=4,3,112,112,32 0=32 1=3 4=1 5=1 6=288 7=32 9=1
|
||||
Convolution A0_linear 1 1 A0_dw_relu A0_linear_bn -23330=4,3,112,112,16 0=16 1=1 5=1 6=512
|
||||
Convolution B0_expand 1 1 A0_linear_bn B0_expand_relu -23330=4,3,112,112,48 0=48 1=1 5=1 6=768 9=1
|
||||
ConvolutionDepthWise B0_dw 1 1 B0_expand_relu B0_dw_relu -23330=4,3,56,56,48 0=48 1=3 3=2 4=1 5=1 6=432 7=48 9=1
|
||||
Convolution B0_linear 1 1 B0_dw_relu B0_linear_bn -23330=4,3,56,56,24 0=24 1=1 5=1 6=1152
|
||||
Split splitncnn_0 1 2 B0_linear_bn B0_linear_bn_splitncnn_0 B0_linear_bn_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
|
||||
Convolution B1_expand 1 1 B0_linear_bn_splitncnn_1 B1_expand_relu -23330=4,3,56,56,72 0=72 1=1 5=1 6=1728 9=1
|
||||
ConvolutionDepthWise B1_dw 1 1 B1_expand_relu B1_dw_relu -23330=4,3,56,56,72 0=72 1=3 4=1 5=1 6=648 7=72 9=1
|
||||
Convolution B1_linear 1 1 B1_dw_relu B1_linear_bn -23330=4,3,56,56,24 0=24 1=1 5=1 6=1728
|
||||
BinaryOp unknownncnn_0 2 1 B0_linear_bn_splitncnn_0 B1_linear_bn unknownncnn_0 -23330=4,3,56,56,24
|
||||
Split splitncnn_1 1 2 unknownncnn_0 unknownncnn_0_splitncnn_0 unknownncnn_0_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
|
||||
Convolution B2_expand 1 1 unknownncnn_0_splitncnn_1 B2_expand_relu -23330=4,3,56,56,72 0=72 1=1 5=1 6=1728 9=1
|
||||
ConvolutionDepthWise B2_dw 1 1 B2_expand_relu B2_dw_relu -23330=4,3,56,56,72 0=72 1=3 4=1 5=1 6=648 7=72 9=1
|
||||
Convolution B2_linear 1 1 B2_dw_relu B2_linear_bn -23330=4,3,56,56,24 0=24 1=1 5=1 6=1728
|
||||
BinaryOp unknownncnn_1 2 1 unknownncnn_0_splitncnn_0 B2_linear_bn unknownncnn_1 -23330=4,3,56,56,24
|
||||
Convolution C0_expand 1 1 unknownncnn_1 C0_expand_relu -23330=4,3,56,56,72 0=72 1=1 5=1 6=1728 9=1
|
||||
ConvolutionDepthWise C0_dw 1 1 C0_expand_relu C0_dw_relu -23330=4,3,28,28,72 0=72 1=5 3=2 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution C0_linear 1 1 C0_dw_relu C0_linear_bn -23330=4,3,28,28,40 0=40 1=1 5=1 6=2880
|
||||
Split splitncnn_2 1 2 C0_linear_bn C0_linear_bn_splitncnn_0 C0_linear_bn_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
|
||||
Convolution C1_expand 1 1 C0_linear_bn_splitncnn_1 C1_expand_relu -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
|
||||
ConvolutionDepthWise C1_dw 1 1 C1_expand_relu C1_dw_relu -23330=4,3,28,28,120 0=120 1=5 4=2 5=1 6=3000 7=120 9=1
|
||||
Convolution C1_linear 1 1 C1_dw_relu C1_linear_bn -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
|
||||
BinaryOp unknownncnn_2 2 1 C0_linear_bn_splitncnn_0 C1_linear_bn unknownncnn_2 -23330=4,3,28,28,40
|
||||
Split splitncnn_3 1 2 unknownncnn_2 unknownncnn_2_splitncnn_0 unknownncnn_2_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
|
||||
Convolution C2_expand 1 1 unknownncnn_2_splitncnn_1 C2_expand_relu -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
|
||||
ConvolutionDepthWise C2_dw 1 1 C2_expand_relu C2_dw_relu -23330=4,3,28,28,120 0=120 1=5 4=2 5=1 6=3000 7=120 9=1
|
||||
Convolution C2_linear 1 1 C2_dw_relu C2_linear_bn -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
|
||||
BinaryOp unknownncnn_3 2 1 unknownncnn_2_splitncnn_0 C2_linear_bn unknownncnn_3 -23330=4,3,28,28,40
|
||||
Convolution D0_expand 1 1 unknownncnn_3 D0_expand_relu -23330=4,3,28,28,240 0=240 1=1 5=1 6=9600 9=1
|
||||
ConvolutionDepthWise D0_dw 1 1 D0_expand_relu D0_dw_relu -23330=4,3,14,14,240 0=240 1=5 3=2 4=2 5=1 6=6000 7=240 9=1
|
||||
Convolution D0_linear 1 1 D0_dw_relu D0_linear_bn -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
|
||||
Split splitncnn_4 1 2 D0_linear_bn D0_linear_bn_splitncnn_0 D0_linear_bn_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution D1_expand 1 1 D0_linear_bn_splitncnn_1 D1_expand_relu -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400 9=1
|
||||
ConvolutionDepthWise D1_dw 1 1 D1_expand_relu D1_dw_relu -23330=4,3,14,14,480 0=480 1=5 4=2 5=1 6=12000 7=480 9=1
|
||||
Convolution D1_linear 1 1 D1_dw_relu D1_linear_bn -23330=4,3,14,14,80 0=80 1=1 5=1 6=38400
|
||||
BinaryOp unknownncnn_4 2 1 D0_linear_bn_splitncnn_0 D1_linear_bn unknownncnn_4 -23330=4,3,14,14,80
|
||||
Split splitncnn_5 1 2 unknownncnn_4 unknownncnn_4_splitncnn_0 unknownncnn_4_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution D2_expand 1 1 unknownncnn_4_splitncnn_1 D2_expand_relu -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400 9=1
|
||||
ConvolutionDepthWise D2_dw 1 1 D2_expand_relu D2_dw_relu -23330=4,3,14,14,480 0=480 1=5 4=2 5=1 6=12000 7=480 9=1
|
||||
Convolution D2_linear 1 1 D2_dw_relu D2_linear_bn -23330=4,3,14,14,80 0=80 1=1 5=1 6=38400
|
||||
BinaryOp unknownncnn_5 2 1 unknownncnn_4_splitncnn_0 D2_linear_bn unknownncnn_5 -23330=4,3,14,14,80
|
||||
Convolution E0_expand 1 1 unknownncnn_5 E0_expand_relu -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400 9=1
|
||||
ConvolutionDepthWise E0_dw 1 1 E0_expand_relu E0_dw_relu -23330=4,3,14,14,480 0=480 1=3 4=1 5=1 6=4320 7=480 9=1
|
||||
Convolution E0_linear 1 1 E0_dw_relu E0_linear_bn -23330=4,3,14,14,96 0=96 1=1 5=1 6=46080
|
||||
Split splitncnn_6 1 2 E0_linear_bn E0_linear_bn_splitncnn_0 E0_linear_bn_splitncnn_1 -23330=8,3,14,14,96,3,14,14,96
|
||||
Convolution E1_expand 1 1 E0_linear_bn_splitncnn_1 E1_expand_relu -23330=4,3,14,14,576 0=576 1=1 5=1 6=55296 9=1
|
||||
ConvolutionDepthWise E1_dw 1 1 E1_expand_relu E1_dw_relu -23330=4,3,14,14,576 0=576 1=3 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution E1_linear 1 1 E1_dw_relu E1_linear_bn -23330=4,3,14,14,96 0=96 1=1 5=1 6=55296
|
||||
BinaryOp unknownncnn_6 2 1 E0_linear_bn_splitncnn_0 E1_linear_bn unknownncnn_6 -23330=4,3,14,14,96
|
||||
Convolution F0_expand 1 1 unknownncnn_6 F0_expand_relu -23330=4,3,14,14,576 0=576 1=1 5=1 6=55296 9=1
|
||||
ConvolutionDepthWise F0_dw 1 1 F0_expand_relu F0_dw_relu -23330=4,3,7,7,576 0=576 1=5 3=2 4=2 5=1 6=14400 7=576 9=1
|
||||
Convolution F0_linear 1 1 F0_dw_relu F0_linear_bn -23330=4,3,7,7,192 0=192 1=1 5=1 6=110592
|
||||
Split splitncnn_7 1 2 F0_linear_bn F0_linear_bn_splitncnn_0 F0_linear_bn_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution F1_expand 1 1 F0_linear_bn_splitncnn_1 F1_expand_relu -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184 9=1
|
||||
ConvolutionDepthWise F1_dw 1 1 F1_expand_relu F1_dw_relu -23330=4,3,7,7,1152 0=1152 1=5 4=2 5=1 6=28800 7=1152 9=1
|
||||
Convolution F1_linear 1 1 F1_dw_relu F1_linear_bn -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
|
||||
BinaryOp unknownncnn_7 2 1 F0_linear_bn_splitncnn_0 F1_linear_bn unknownncnn_7 -23330=4,3,7,7,192
|
||||
Split splitncnn_8 1 2 unknownncnn_7 unknownncnn_7_splitncnn_0 unknownncnn_7_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution F2_expand 1 1 unknownncnn_7_splitncnn_1 F2_expand_relu -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184 9=1
|
||||
ConvolutionDepthWise F2_dw 1 1 F2_expand_relu F2_dw_relu -23330=4,3,7,7,1152 0=1152 1=5 4=2 5=1 6=28800 7=1152 9=1
|
||||
Convolution F2_linear 1 1 F2_dw_relu F2_linear_bn -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
|
||||
BinaryOp unknownncnn_8 2 1 unknownncnn_7_splitncnn_0 F2_linear_bn unknownncnn_8 -23330=4,3,7,7,192
|
||||
Split splitncnn_9 1 2 unknownncnn_8 unknownncnn_8_splitncnn_0 unknownncnn_8_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution F3_expand 1 1 unknownncnn_8_splitncnn_1 F3_expand_relu -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184 9=1
|
||||
ConvolutionDepthWise F3_dw 1 1 F3_expand_relu F3_dw_relu -23330=4,3,7,7,1152 0=1152 1=5 4=2 5=1 6=28800 7=1152 9=1
|
||||
Convolution F3_linear 1 1 F3_dw_relu F3_linear_bn -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
|
||||
BinaryOp unknownncnn_9 2 1 unknownncnn_8_splitncnn_0 F3_linear_bn unknownncnn_9 -23330=4,3,7,7,192
|
||||
Convolution G0_expand 1 1 unknownncnn_9 G0_expand_relu -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184 9=1
|
||||
ConvolutionDepthWise G0_dw 1 1 G0_expand_relu G0_dw_relu -23330=4,3,7,7,1152 0=1152 1=3 4=1 5=1 6=10368 7=1152 9=1
|
||||
Convolution G0_linear 1 1 G0_dw_relu G0_linear_bn -23330=4,3,7,7,320 0=320 1=1 5=1 6=368640
|
||||
Convolution last-1x1-conv 1 1 G0_linear_bn last-1x1-conv_relu -23330=4,3,7,7,1280 0=1280 1=1 5=1 6=409600 9=1
|
||||
Pooling avgpool 1 1 last-1x1-conv_relu flatten -23330=4,1,1280,1,1 0=1 1=7 4=1 5=1
|
||||
InnerProduct fc 1 1 flatten fc -23330=4,1,1000,1,1 0=1000 1=1 2=1280000
|
||||
Softmax prob 1 1 fc output -23330=4,1,1000,1,1
|
33
3rdparty/ncnn/benchmark/mobilenet.param
vendored
Normal file
33
3rdparty/ncnn/benchmark/mobilenet.param
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
7767517
|
||||
31 31
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1_relu1 -23330=4,3,112,112,32 0=32 1=3 3=2 4=1 5=1 6=864 9=1
|
||||
ConvolutionDepthWise conv2_1/dw 1 1 conv1_relu1 conv2_1/dw_relu2_1/dw -23330=4,3,112,112,32 0=32 1=3 4=1 5=1 6=288 7=32 9=1
|
||||
Convolution conv2_1/sep 1 1 conv2_1/dw_relu2_1/dw conv2_1/sep_relu2_1/sep -23330=4,3,112,112,64 0=64 1=1 5=1 6=2048 9=1
|
||||
ConvolutionDepthWise conv2_2/dw 1 1 conv2_1/sep_relu2_1/sep conv2_2/dw_relu2_2/dw -23330=4,3,56,56,64 0=64 1=3 3=2 4=1 5=1 6=576 7=64 9=1
|
||||
Convolution conv2_2/sep 1 1 conv2_2/dw_relu2_2/dw conv2_2/sep_relu2_2/sep -23330=4,3,56,56,128 0=128 1=1 5=1 6=8192 9=1
|
||||
ConvolutionDepthWise conv3_1/dw 1 1 conv2_2/sep_relu2_2/sep conv3_1/dw_relu3_1/dw -23330=4,3,56,56,128 0=128 1=3 4=1 5=1 6=1152 7=128 9=1
|
||||
Convolution conv3_1/sep 1 1 conv3_1/dw_relu3_1/dw conv3_1/sep_relu3_1/sep -23330=4,3,56,56,128 0=128 1=1 5=1 6=16384 9=1
|
||||
ConvolutionDepthWise conv3_2/dw 1 1 conv3_1/sep_relu3_1/sep conv3_2/dw_relu3_2/dw -23330=4,3,28,28,128 0=128 1=3 3=2 4=1 5=1 6=1152 7=128 9=1
|
||||
Convolution conv3_2/sep 1 1 conv3_2/dw_relu3_2/dw conv3_2/sep_relu3_2/sep -23330=4,3,28,28,256 0=256 1=1 5=1 6=32768 9=1
|
||||
ConvolutionDepthWise conv4_1/dw 1 1 conv3_2/sep_relu3_2/sep conv4_1/dw_relu4_1/dw -23330=4,3,28,28,256 0=256 1=3 4=1 5=1 6=2304 7=256 9=1
|
||||
Convolution conv4_1/sep 1 1 conv4_1/dw_relu4_1/dw conv4_1/sep_relu4_1/sep -23330=4,3,28,28,256 0=256 1=1 5=1 6=65536 9=1
|
||||
ConvolutionDepthWise conv4_2/dw 1 1 conv4_1/sep_relu4_1/sep conv4_2/dw_relu4_2/dw -23330=4,3,14,14,256 0=256 1=3 3=2 4=1 5=1 6=2304 7=256 9=1
|
||||
Convolution conv4_2/sep 1 1 conv4_2/dw_relu4_2/dw conv4_2/sep_relu4_2/sep -23330=4,3,14,14,512 0=512 1=1 5=1 6=131072 9=1
|
||||
ConvolutionDepthWise conv5_1/dw 1 1 conv4_2/sep_relu4_2/sep conv5_1/dw_relu5_1/dw -23330=4,3,14,14,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv5_1/sep 1 1 conv5_1/dw_relu5_1/dw conv5_1/sep_relu5_1/sep -23330=4,3,14,14,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv5_2/dw 1 1 conv5_1/sep_relu5_1/sep conv5_2/dw_relu5_2/dw -23330=4,3,14,14,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv5_2/sep 1 1 conv5_2/dw_relu5_2/dw conv5_2/sep_relu5_2/sep -23330=4,3,14,14,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv5_3/dw 1 1 conv5_2/sep_relu5_2/sep conv5_3/dw_relu5_3/dw -23330=4,3,14,14,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv5_3/sep 1 1 conv5_3/dw_relu5_3/dw conv5_3/sep_relu5_3/sep -23330=4,3,14,14,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv5_4/dw 1 1 conv5_3/sep_relu5_3/sep conv5_4/dw_relu5_4/dw -23330=4,3,14,14,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv5_4/sep 1 1 conv5_4/dw_relu5_4/dw conv5_4/sep_relu5_4/sep -23330=4,3,14,14,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv5_5/dw 1 1 conv5_4/sep_relu5_4/sep conv5_5/dw_relu5_5/dw -23330=4,3,14,14,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv5_5/sep 1 1 conv5_5/dw_relu5_5/dw conv5_5/sep_relu5_5/sep -23330=4,3,14,14,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv5_6/dw 1 1 conv5_5/sep_relu5_5/sep conv5_6/dw_relu5_6/dw -23330=4,3,7,7,512 0=512 1=3 3=2 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv5_6/sep 1 1 conv5_6/dw_relu5_6/dw conv5_6/sep_relu5_6/sep -23330=4,3,7,7,1024 0=1024 1=1 5=1 6=524288 9=1
|
||||
ConvolutionDepthWise conv6/dw 1 1 conv5_6/sep_relu5_6/sep conv6/dw_relu6/dw -23330=4,3,7,7,1024 0=1024 1=3 4=1 5=1 6=9216 7=1024 9=1
|
||||
Convolution conv6/sep 1 1 conv6/dw_relu6/dw conv6/sep_relu6/sep -23330=4,3,7,7,1024 0=1024 1=1 5=1 6=1048576 9=1
|
||||
Pooling pool6 1 1 conv6/sep_relu6/sep pool6 -23330=4,1,1024,1,1 0=1 4=1
|
||||
InnerProduct fc7 1 1 pool6 fc7 -23330=4,1,1000,1,1 0=1000 1=1 2=1024000
|
||||
Softmax prob 1 1 fc7 output -23330=4,1,1000,1,1
|
33
3rdparty/ncnn/benchmark/mobilenet_int8.param
vendored
Normal file
33
3rdparty/ncnn/benchmark/mobilenet_int8.param
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
7767517
|
||||
31 31
|
||||
Input data 0 1 data 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1_relu1 0=32 1=3 3=2 4=1 5=1 6=864 8=102 9=1
|
||||
ConvolutionDepthWise conv2_1/dw 1 1 conv1_relu1 conv2_1/dw_relu2_1/dw 0=32 1=3 4=1 5=1 6=288 7=32 8=101 9=1
|
||||
Convolution conv2_1/sep 1 1 conv2_1/dw_relu2_1/dw conv2_1/sep_relu2_1/sep 0=64 1=1 5=1 6=2048 8=102 9=1
|
||||
ConvolutionDepthWise conv2_2/dw 1 1 conv2_1/sep_relu2_1/sep conv2_2/dw_relu2_2/dw 0=64 1=3 3=2 4=1 5=1 6=576 7=64 8=101 9=1
|
||||
Convolution conv2_2/sep 1 1 conv2_2/dw_relu2_2/dw conv2_2/sep_relu2_2/sep 0=128 1=1 5=1 6=8192 8=102 9=1
|
||||
ConvolutionDepthWise conv3_1/dw 1 1 conv2_2/sep_relu2_2/sep conv3_1/dw_relu3_1/dw 0=128 1=3 4=1 5=1 6=1152 7=128 8=101 9=1
|
||||
Convolution conv3_1/sep 1 1 conv3_1/dw_relu3_1/dw conv3_1/sep_relu3_1/sep 0=128 1=1 5=1 6=16384 8=102 9=1
|
||||
ConvolutionDepthWise conv3_2/dw 1 1 conv3_1/sep_relu3_1/sep conv3_2/dw_relu3_2/dw 0=128 1=3 3=2 4=1 5=1 6=1152 7=128 8=101 9=1
|
||||
Convolution conv3_2/sep 1 1 conv3_2/dw_relu3_2/dw conv3_2/sep_relu3_2/sep 0=256 1=1 5=1 6=32768 8=102 9=1
|
||||
ConvolutionDepthWise conv4_1/dw 1 1 conv3_2/sep_relu3_2/sep conv4_1/dw_relu4_1/dw 0=256 1=3 4=1 5=1 6=2304 7=256 8=101 9=1
|
||||
Convolution conv4_1/sep 1 1 conv4_1/dw_relu4_1/dw conv4_1/sep_relu4_1/sep 0=256 1=1 5=1 6=65536 8=102 9=1
|
||||
ConvolutionDepthWise conv4_2/dw 1 1 conv4_1/sep_relu4_1/sep conv4_2/dw_relu4_2/dw 0=256 1=3 3=2 4=1 5=1 6=2304 7=256 8=101 9=1
|
||||
Convolution conv4_2/sep 1 1 conv4_2/dw_relu4_2/dw conv4_2/sep_relu4_2/sep 0=512 1=1 5=1 6=131072 8=102 9=1
|
||||
ConvolutionDepthWise conv5_1/dw 1 1 conv4_2/sep_relu4_2/sep conv5_1/dw_relu5_1/dw 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv5_1/sep 1 1 conv5_1/dw_relu5_1/dw conv5_1/sep_relu5_1/sep 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv5_2/dw 1 1 conv5_1/sep_relu5_1/sep conv5_2/dw_relu5_2/dw 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv5_2/sep 1 1 conv5_2/dw_relu5_2/dw conv5_2/sep_relu5_2/sep 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv5_3/dw 1 1 conv5_2/sep_relu5_2/sep conv5_3/dw_relu5_3/dw 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv5_3/sep 1 1 conv5_3/dw_relu5_3/dw conv5_3/sep_relu5_3/sep 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv5_4/dw 1 1 conv5_3/sep_relu5_3/sep conv5_4/dw_relu5_4/dw 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv5_4/sep 1 1 conv5_4/dw_relu5_4/dw conv5_4/sep_relu5_4/sep 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv5_5/dw 1 1 conv5_4/sep_relu5_4/sep conv5_5/dw_relu5_5/dw 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv5_5/sep 1 1 conv5_5/dw_relu5_5/dw conv5_5/sep_relu5_5/sep 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv5_6/dw 1 1 conv5_5/sep_relu5_5/sep conv5_6/dw_relu5_6/dw 0=512 1=3 3=2 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv5_6/sep 1 1 conv5_6/dw_relu5_6/dw conv5_6/sep_relu5_6/sep 0=1024 1=1 5=1 6=524288 8=102 9=1
|
||||
ConvolutionDepthWise conv6/dw 1 1 conv5_6/sep_relu5_6/sep conv6/dw_relu6/dw 0=1024 1=3 4=1 5=1 6=9216 7=1024 8=101 9=1
|
||||
Convolution conv6/sep 1 1 conv6/dw_relu6/dw conv6/sep_relu6/sep 0=1024 1=1 5=1 6=1048576 8=2 9=1
|
||||
Pooling pool6 1 1 conv6/sep_relu6/sep pool6 0=1 4=1
|
||||
InnerProduct fc7 1 1 pool6 fc7 0=1000 1=1 2=1024000 8=2
|
||||
Softmax prob 1 1 fc7 output
|
94
3rdparty/ncnn/benchmark/mobilenet_ssd.param
vendored
Normal file
94
3rdparty/ncnn/benchmark/mobilenet_ssd.param
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
7767517
|
||||
92 115
|
||||
Input input 0 1 data -23330=4,3,300,300,3 0=300 1=300 2=3
|
||||
Split splitncnn_0 1 7 data data_splitncnn_0 data_splitncnn_1 data_splitncnn_2 data_splitncnn_3 data_splitncnn_4 data_splitncnn_5 data_splitncnn_6 -23330=28,3,300,300,3,3,300,300,3,3,300,300,3,3,300,300,3,3,300,300,3,3,300,300,3,3,300,300,3
|
||||
Convolution conv0 1 1 data_splitncnn_6 conv0_conv0/relu -23330=4,3,150,150,32 0=32 1=3 3=2 4=1 5=1 6=864 9=1
|
||||
ConvolutionDepthWise conv1/dw 1 1 conv0_conv0/relu conv1/dw_conv1/dw/relu -23330=4,3,150,150,32 0=32 1=3 4=1 5=1 6=288 7=32 9=1
|
||||
Convolution conv1 1 1 conv1/dw_conv1/dw/relu conv1_conv1/relu -23330=4,3,150,150,64 0=64 1=1 5=1 6=2048 9=1
|
||||
ConvolutionDepthWise conv2/dw 1 1 conv1_conv1/relu conv2/dw_conv2/dw/relu -23330=4,3,75,75,64 0=64 1=3 3=2 4=1 5=1 6=576 7=64 9=1
|
||||
Convolution conv2 1 1 conv2/dw_conv2/dw/relu conv2_conv2/relu -23330=4,3,75,75,128 0=128 1=1 5=1 6=8192 9=1
|
||||
ConvolutionDepthWise conv3/dw 1 1 conv2_conv2/relu conv3/dw_conv3/dw/relu -23330=4,3,75,75,128 0=128 1=3 4=1 5=1 6=1152 7=128 9=1
|
||||
Convolution conv3 1 1 conv3/dw_conv3/dw/relu conv3_conv3/relu -23330=4,3,75,75,128 0=128 1=1 5=1 6=16384 9=1
|
||||
ConvolutionDepthWise conv4/dw 1 1 conv3_conv3/relu conv4/dw_conv4/dw/relu -23330=4,3,38,38,128 0=128 1=3 3=2 4=1 5=1 6=1152 7=128 9=1
|
||||
Convolution conv4 1 1 conv4/dw_conv4/dw/relu conv4_conv4/relu -23330=4,3,38,38,256 0=256 1=1 5=1 6=32768 9=1
|
||||
ConvolutionDepthWise conv5/dw 1 1 conv4_conv4/relu conv5/dw_conv5/dw/relu -23330=4,3,38,38,256 0=256 1=3 4=1 5=1 6=2304 7=256 9=1
|
||||
Convolution conv5 1 1 conv5/dw_conv5/dw/relu conv5_conv5/relu -23330=4,3,38,38,256 0=256 1=1 5=1 6=65536 9=1
|
||||
ConvolutionDepthWise conv6/dw 1 1 conv5_conv5/relu conv6/dw_conv6/dw/relu -23330=4,3,19,19,256 0=256 1=3 3=2 4=1 5=1 6=2304 7=256 9=1
|
||||
Convolution conv6 1 1 conv6/dw_conv6/dw/relu conv6_conv6/relu -23330=4,3,19,19,512 0=512 1=1 5=1 6=131072 9=1
|
||||
ConvolutionDepthWise conv7/dw 1 1 conv6_conv6/relu conv7/dw_conv7/dw/relu -23330=4,3,19,19,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv7 1 1 conv7/dw_conv7/dw/relu conv7_conv7/relu -23330=4,3,19,19,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv8/dw 1 1 conv7_conv7/relu conv8/dw_conv8/dw/relu -23330=4,3,19,19,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv8 1 1 conv8/dw_conv8/dw/relu conv8_conv8/relu -23330=4,3,19,19,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv9/dw 1 1 conv8_conv8/relu conv9/dw_conv9/dw/relu -23330=4,3,19,19,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv9 1 1 conv9/dw_conv9/dw/relu conv9_conv9/relu -23330=4,3,19,19,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv10/dw 1 1 conv9_conv9/relu conv10/dw_conv10/dw/relu -23330=4,3,19,19,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv10 1 1 conv10/dw_conv10/dw/relu conv10_conv10/relu -23330=4,3,19,19,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv11/dw 1 1 conv10_conv10/relu conv11/dw_conv11/dw/relu -23330=4,3,19,19,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv11 1 1 conv11/dw_conv11/dw/relu conv11_conv11/relu -23330=4,3,19,19,512 0=512 1=1 5=1 6=262144 9=1
|
||||
Split splitncnn_1 1 4 conv11_conv11/relu conv11_conv11/relu_splitncnn_0 conv11_conv11/relu_splitncnn_1 conv11_conv11/relu_splitncnn_2 conv11_conv11/relu_splitncnn_3 -23330=16,3,19,19,512,3,19,19,512,3,19,19,512,3,19,19,512
|
||||
ConvolutionDepthWise conv12/dw 1 1 conv11_conv11/relu_splitncnn_3 conv12/dw_conv12/dw/relu -23330=4,3,10,10,512 0=512 1=3 3=2 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv12 1 1 conv12/dw_conv12/dw/relu conv12_conv12/relu -23330=4,3,10,10,1024 0=1024 1=1 5=1 6=524288 9=1
|
||||
ConvolutionDepthWise conv13/dw 1 1 conv12_conv12/relu conv13/dw_conv13/dw/relu -23330=4,3,10,10,1024 0=1024 1=3 4=1 5=1 6=9216 7=1024 9=1
|
||||
Convolution conv13 1 1 conv13/dw_conv13/dw/relu conv13_conv13/relu -23330=4,3,10,10,1024 0=1024 1=1 5=1 6=1048576 9=1
|
||||
Split splitncnn_2 1 4 conv13_conv13/relu conv13_conv13/relu_splitncnn_0 conv13_conv13/relu_splitncnn_1 conv13_conv13/relu_splitncnn_2 conv13_conv13/relu_splitncnn_3 -23330=16,3,10,10,1024,3,10,10,1024,3,10,10,1024,3,10,10,1024
|
||||
Convolution conv14_1 1 1 conv13_conv13/relu_splitncnn_3 conv14_1_conv14_1/relu -23330=4,3,10,10,256 0=256 1=1 5=1 6=262144 9=1
|
||||
Convolution conv14_2 1 1 conv14_1_conv14_1/relu conv14_2_conv14_2/relu -23330=4,3,5,5,512 0=512 1=3 3=2 4=1 5=1 6=1179648 9=1
|
||||
Split splitncnn_3 1 4 conv14_2_conv14_2/relu conv14_2_conv14_2/relu_splitncnn_0 conv14_2_conv14_2/relu_splitncnn_1 conv14_2_conv14_2/relu_splitncnn_2 conv14_2_conv14_2/relu_splitncnn_3 -23330=16,3,5,5,512,3,5,5,512,3,5,5,512,3,5,5,512
|
||||
Convolution conv15_1 1 1 conv14_2_conv14_2/relu_splitncnn_3 conv15_1_conv15_1/relu -23330=4,3,5,5,128 0=128 1=1 5=1 6=65536 9=1
|
||||
Convolution conv15_2 1 1 conv15_1_conv15_1/relu conv15_2_conv15_2/relu -23330=4,3,3,3,256 0=256 1=3 3=2 4=1 5=1 6=294912 9=1
|
||||
Split splitncnn_4 1 4 conv15_2_conv15_2/relu conv15_2_conv15_2/relu_splitncnn_0 conv15_2_conv15_2/relu_splitncnn_1 conv15_2_conv15_2/relu_splitncnn_2 conv15_2_conv15_2/relu_splitncnn_3 -23330=16,3,3,3,256,3,3,3,256,3,3,3,256,3,3,3,256
|
||||
Convolution conv16_1 1 1 conv15_2_conv15_2/relu_splitncnn_3 conv16_1_conv16_1/relu -23330=4,3,3,3,128 0=128 1=1 5=1 6=32768 9=1
|
||||
Convolution conv16_2 1 1 conv16_1_conv16_1/relu conv16_2_conv16_2/relu -23330=4,3,2,2,256 0=256 1=3 3=2 4=1 5=1 6=294912 9=1
|
||||
Split splitncnn_5 1 4 conv16_2_conv16_2/relu conv16_2_conv16_2/relu_splitncnn_0 conv16_2_conv16_2/relu_splitncnn_1 conv16_2_conv16_2/relu_splitncnn_2 conv16_2_conv16_2/relu_splitncnn_3 -23330=16,3,2,2,256,3,2,2,256,3,2,2,256,3,2,2,256
|
||||
Convolution conv17_1 1 1 conv16_2_conv16_2/relu_splitncnn_3 conv17_1_conv17_1/relu -23330=4,3,2,2,64 0=64 1=1 5=1 6=16384 9=1
|
||||
Convolution conv17_2 1 1 conv17_1_conv17_1/relu conv17_2_conv17_2/relu -23330=4,3,1,1,128 0=128 1=3 3=2 4=1 5=1 6=73728 9=1
|
||||
Split splitncnn_6 1 3 conv17_2_conv17_2/relu conv17_2_conv17_2/relu_splitncnn_0 conv17_2_conv17_2/relu_splitncnn_1 conv17_2_conv17_2/relu_splitncnn_2 -23330=12,3,1,1,128,3,1,1,128,3,1,1,128
|
||||
Convolution conv11_mbox_loc 1 1 conv11_conv11/relu_splitncnn_2 conv11_mbox_loc -23330=4,3,19,19,12 0=12 1=1 5=1 6=6144
|
||||
Permute conv11_mbox_loc_perm 1 1 conv11_mbox_loc conv11_mbox_loc_perm -23330=4,3,12,19,19 0=3
|
||||
Flatten conv11_mbox_loc_flat 1 1 conv11_mbox_loc_perm conv11_mbox_loc_flat -23330=4,1,4332,1,1
|
||||
Convolution conv11_mbox_conf 1 1 conv11_conv11/relu_splitncnn_1 conv11_mbox_conf -23330=4,3,19,19,63 0=63 1=1 5=1 6=32256
|
||||
Permute conv11_mbox_conf_perm 1 1 conv11_mbox_conf conv11_mbox_conf_perm -23330=4,3,63,19,19 0=3
|
||||
Flatten conv11_mbox_conf_flat 1 1 conv11_mbox_conf_perm conv11_mbox_conf_flat -23330=4,1,22743,1,1
|
||||
PriorBox conv11_mbox_priorbox 2 1 conv11_conv11/relu_splitncnn_0 data_splitncnn_5 conv11_mbox_priorbox -23330=4,2,4332,2,1 -23300=1,6.000000e+01 -23302=1,2.000000e+00 9=-233 10=-233 13=5.000000e-01
|
||||
Convolution conv13_mbox_loc 1 1 conv13_conv13/relu_splitncnn_2 conv13_mbox_loc -23330=4,3,10,10,24 0=24 1=1 5=1 6=24576
|
||||
Permute conv13_mbox_loc_perm 1 1 conv13_mbox_loc conv13_mbox_loc_perm -23330=4,3,24,10,10 0=3
|
||||
Flatten conv13_mbox_loc_flat 1 1 conv13_mbox_loc_perm conv13_mbox_loc_flat -23330=4,1,2400,1,1
|
||||
Convolution conv13_mbox_conf 1 1 conv13_conv13/relu_splitncnn_1 conv13_mbox_conf -23330=4,3,10,10,126 0=126 1=1 5=1 6=129024
|
||||
Permute conv13_mbox_conf_perm 1 1 conv13_mbox_conf conv13_mbox_conf_perm -23330=4,3,126,10,10 0=3
|
||||
Flatten conv13_mbox_conf_flat 1 1 conv13_mbox_conf_perm conv13_mbox_conf_flat -23330=4,1,12600,1,1
|
||||
PriorBox conv13_mbox_priorbox 2 1 conv13_conv13/relu_splitncnn_0 data_splitncnn_4 conv13_mbox_priorbox -23330=4,2,2400,2,1 -23300=1,1.050000e+02 -23301=1,1.500000e+02 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 13=5.000000e-01
|
||||
Convolution conv14_2_mbox_loc 1 1 conv14_2_conv14_2/relu_splitncnn_2 conv14_2_mbox_loc -23330=4,3,5,5,24 0=24 1=1 5=1 6=12288
|
||||
Permute conv14_2_mbox_loc_perm 1 1 conv14_2_mbox_loc conv14_2_mbox_loc_perm -23330=4,3,24,5,5 0=3
|
||||
Flatten conv14_2_mbox_loc_flat 1 1 conv14_2_mbox_loc_perm conv14_2_mbox_loc_flat -23330=4,1,600,1,1
|
||||
Convolution conv14_2_mbox_conf 1 1 conv14_2_conv14_2/relu_splitncnn_1 conv14_2_mbox_conf -23330=4,3,5,5,126 0=126 1=1 5=1 6=64512
|
||||
Permute conv14_2_mbox_conf_perm 1 1 conv14_2_mbox_conf conv14_2_mbox_conf_perm -23330=4,3,126,5,5 0=3
|
||||
Flatten conv14_2_mbox_conf_flat 1 1 conv14_2_mbox_conf_perm conv14_2_mbox_conf_flat -23330=4,1,3150,1,1
|
||||
PriorBox conv14_2_mbox_priorbox 2 1 conv14_2_conv14_2/relu_splitncnn_0 data_splitncnn_3 conv14_2_mbox_priorbox -23330=4,2,600,2,1 -23300=1,1.500000e+02 -23301=1,1.950000e+02 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 13=5.000000e-01
|
||||
Convolution conv15_2_mbox_loc 1 1 conv15_2_conv15_2/relu_splitncnn_2 conv15_2_mbox_loc -23330=4,3,3,3,24 0=24 1=1 5=1 6=6144
|
||||
Permute conv15_2_mbox_loc_perm 1 1 conv15_2_mbox_loc conv15_2_mbox_loc_perm -23330=4,3,24,3,3 0=3
|
||||
Flatten conv15_2_mbox_loc_flat 1 1 conv15_2_mbox_loc_perm conv15_2_mbox_loc_flat -23330=4,1,216,1,1
|
||||
Convolution conv15_2_mbox_conf 1 1 conv15_2_conv15_2/relu_splitncnn_1 conv15_2_mbox_conf -23330=4,3,3,3,126 0=126 1=1 5=1 6=32256
|
||||
Permute conv15_2_mbox_conf_perm 1 1 conv15_2_mbox_conf conv15_2_mbox_conf_perm -23330=4,3,126,3,3 0=3
|
||||
Flatten conv15_2_mbox_conf_flat 1 1 conv15_2_mbox_conf_perm conv15_2_mbox_conf_flat -23330=4,1,1134,1,1
|
||||
PriorBox conv15_2_mbox_priorbox 2 1 conv15_2_conv15_2/relu_splitncnn_0 data_splitncnn_2 conv15_2_mbox_priorbox -23330=4,2,216,2,1 -23300=1,1.950000e+02 -23301=1,2.400000e+02 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 13=5.000000e-01
|
||||
Convolution conv16_2_mbox_loc 1 1 conv16_2_conv16_2/relu_splitncnn_2 conv16_2_mbox_loc -23330=4,3,2,2,24 0=24 1=1 5=1 6=6144
|
||||
Permute conv16_2_mbox_loc_perm 1 1 conv16_2_mbox_loc conv16_2_mbox_loc_perm -23330=4,3,24,2,2 0=3
|
||||
Flatten conv16_2_mbox_loc_flat 1 1 conv16_2_mbox_loc_perm conv16_2_mbox_loc_flat -23330=4,1,96,1,1
|
||||
Convolution conv16_2_mbox_conf 1 1 conv16_2_conv16_2/relu_splitncnn_1 conv16_2_mbox_conf -23330=4,3,2,2,126 0=126 1=1 5=1 6=32256
|
||||
Permute conv16_2_mbox_conf_perm 1 1 conv16_2_mbox_conf conv16_2_mbox_conf_perm -23330=4,3,126,2,2 0=3
|
||||
Flatten conv16_2_mbox_conf_flat 1 1 conv16_2_mbox_conf_perm conv16_2_mbox_conf_flat -23330=4,1,504,1,1
|
||||
PriorBox conv16_2_mbox_priorbox 2 1 conv16_2_conv16_2/relu_splitncnn_0 data_splitncnn_1 conv16_2_mbox_priorbox -23330=4,2,96,2,1 -23300=1,2.400000e+02 -23301=1,2.850000e+02 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 13=5.000000e-01
|
||||
Convolution conv17_2_mbox_loc 1 1 conv17_2_conv17_2/relu_splitncnn_2 conv17_2_mbox_loc -23330=4,3,1,1,24 0=24 1=1 5=1 6=3072
|
||||
Permute conv17_2_mbox_loc_perm 1 1 conv17_2_mbox_loc conv17_2_mbox_loc_perm -23330=4,3,24,1,1 0=3
|
||||
Flatten conv17_2_mbox_loc_flat 1 1 conv17_2_mbox_loc_perm conv17_2_mbox_loc_flat -23330=4,1,24,1,1
|
||||
Convolution conv17_2_mbox_conf 1 1 conv17_2_conv17_2/relu_splitncnn_1 conv17_2_mbox_conf -23330=4,3,1,1,126 0=126 1=1 5=1 6=16128
|
||||
Permute conv17_2_mbox_conf_perm 1 1 conv17_2_mbox_conf conv17_2_mbox_conf_perm -23330=4,3,126,1,1 0=3
|
||||
Flatten conv17_2_mbox_conf_flat 1 1 conv17_2_mbox_conf_perm conv17_2_mbox_conf_flat -23330=4,1,126,1,1
|
||||
PriorBox conv17_2_mbox_priorbox 2 1 conv17_2_conv17_2/relu_splitncnn_0 data_splitncnn_0 conv17_2_mbox_priorbox -23330=4,2,24,2,1 -23300=1,2.850000e+02 -23301=1,3.000000e+02 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 13=5.000000e-01
|
||||
Concat mbox_loc 6 1 conv11_mbox_loc_flat conv13_mbox_loc_flat conv14_2_mbox_loc_flat conv15_2_mbox_loc_flat conv16_2_mbox_loc_flat conv17_2_mbox_loc_flat mbox_loc -23330=4,1,7668,1,1
|
||||
Concat mbox_conf 6 1 conv11_mbox_conf_flat conv13_mbox_conf_flat conv14_2_mbox_conf_flat conv15_2_mbox_conf_flat conv16_2_mbox_conf_flat conv17_2_mbox_conf_flat mbox_conf -23330=4,1,40257,1,1
|
||||
Concat mbox_priorbox 6 1 conv11_mbox_priorbox conv13_mbox_priorbox conv14_2_mbox_priorbox conv15_2_mbox_priorbox conv16_2_mbox_priorbox conv17_2_mbox_priorbox mbox_priorbox -23330=4,2,7668,2,1 0=1
|
||||
Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape -23330=4,2,21,1917,1 0=21 1=-1
|
||||
Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax -23330=4,2,21,1917,1 0=1 1=1
|
||||
Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten -23330=4,1,40257,1,1
|
||||
DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox output 0=21 1=4.500000e-01 2=100 4=2.500000e-01
|
94
3rdparty/ncnn/benchmark/mobilenet_ssd_int8.param
vendored
Normal file
94
3rdparty/ncnn/benchmark/mobilenet_ssd_int8.param
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
7767517
|
||||
92 115
|
||||
Input input 0 1 data 0=300 1=300 2=3
|
||||
Split splitncnn_0 1 7 data data_splitncnn_0 data_splitncnn_1 data_splitncnn_2 data_splitncnn_3 data_splitncnn_4 data_splitncnn_5 data_splitncnn_6
|
||||
Convolution conv0 1 1 data_splitncnn_6 conv0_conv0/relu 0=32 1=3 3=2 4=1 5=1 6=864 8=102 9=1
|
||||
ConvolutionDepthWise conv1/dw 1 1 conv0_conv0/relu conv1/dw_conv1/dw/relu 0=32 1=3 4=1 5=1 6=288 7=32 8=101 9=1
|
||||
Convolution conv1 1 1 conv1/dw_conv1/dw/relu conv1_conv1/relu 0=64 1=1 5=1 6=2048 8=102 9=1
|
||||
ConvolutionDepthWise conv2/dw 1 1 conv1_conv1/relu conv2/dw_conv2/dw/relu 0=64 1=3 3=2 4=1 5=1 6=576 7=64 8=101 9=1
|
||||
Convolution conv2 1 1 conv2/dw_conv2/dw/relu conv2_conv2/relu 0=128 1=1 5=1 6=8192 8=102 9=1
|
||||
ConvolutionDepthWise conv3/dw 1 1 conv2_conv2/relu conv3/dw_conv3/dw/relu 0=128 1=3 4=1 5=1 6=1152 7=128 8=101 9=1
|
||||
Convolution conv3 1 1 conv3/dw_conv3/dw/relu conv3_conv3/relu 0=128 1=1 5=1 6=16384 8=102 9=1
|
||||
ConvolutionDepthWise conv4/dw 1 1 conv3_conv3/relu conv4/dw_conv4/dw/relu 0=128 1=3 3=2 4=1 5=1 6=1152 7=128 8=101 9=1
|
||||
Convolution conv4 1 1 conv4/dw_conv4/dw/relu conv4_conv4/relu 0=256 1=1 5=1 6=32768 8=102 9=1
|
||||
ConvolutionDepthWise conv5/dw 1 1 conv4_conv4/relu conv5/dw_conv5/dw/relu 0=256 1=3 4=1 5=1 6=2304 7=256 8=101 9=1
|
||||
Convolution conv5 1 1 conv5/dw_conv5/dw/relu conv5_conv5/relu 0=256 1=1 5=1 6=65536 8=102 9=1
|
||||
ConvolutionDepthWise conv6/dw 1 1 conv5_conv5/relu conv6/dw_conv6/dw/relu 0=256 1=3 3=2 4=1 5=1 6=2304 7=256 8=101 9=1
|
||||
Convolution conv6 1 1 conv6/dw_conv6/dw/relu conv6_conv6/relu 0=512 1=1 5=1 6=131072 8=102 9=1
|
||||
ConvolutionDepthWise conv7/dw 1 1 conv6_conv6/relu conv7/dw_conv7/dw/relu 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv7 1 1 conv7/dw_conv7/dw/relu conv7_conv7/relu 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv8/dw 1 1 conv7_conv7/relu conv8/dw_conv8/dw/relu 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv8 1 1 conv8/dw_conv8/dw/relu conv8_conv8/relu 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv9/dw 1 1 conv8_conv8/relu conv9/dw_conv9/dw/relu 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv9 1 1 conv9/dw_conv9/dw/relu conv9_conv9/relu 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv10/dw 1 1 conv9_conv9/relu conv10/dw_conv10/dw/relu 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv10 1 1 conv10/dw_conv10/dw/relu conv10_conv10/relu 0=512 1=1 5=1 6=262144 8=102 9=1
|
||||
ConvolutionDepthWise conv11/dw 1 1 conv10_conv10/relu conv11/dw_conv11/dw/relu 0=512 1=3 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv11 1 1 conv11/dw_conv11/dw/relu conv11_conv11/relu 0=512 1=1 5=1 6=262144 8=2 9=1
|
||||
Split splitncnn_1 1 4 conv11_conv11/relu conv11_conv11/relu_splitncnn_0 conv11_conv11/relu_splitncnn_1 conv11_conv11/relu_splitncnn_2 conv11_conv11/relu_splitncnn_3
|
||||
ConvolutionDepthWise conv12/dw 1 1 conv11_conv11/relu_splitncnn_3 conv12/dw_conv12/dw/relu 0=512 1=3 3=2 4=1 5=1 6=4608 7=512 8=101 9=1
|
||||
Convolution conv12 1 1 conv12/dw_conv12/dw/relu conv12_conv12/relu 0=1024 1=1 5=1 6=524288 8=102 9=1
|
||||
ConvolutionDepthWise conv13/dw 1 1 conv12_conv12/relu conv13/dw_conv13/dw/relu 0=1024 1=3 4=1 5=1 6=9216 7=1024 8=101 9=1
|
||||
Convolution conv13 1 1 conv13/dw_conv13/dw/relu conv13_conv13/relu 0=1024 1=1 5=1 6=1048576 8=2 9=1
|
||||
Split splitncnn_2 1 4 conv13_conv13/relu conv13_conv13/relu_splitncnn_0 conv13_conv13/relu_splitncnn_1 conv13_conv13/relu_splitncnn_2 conv13_conv13/relu_splitncnn_3
|
||||
Convolution conv14_1 1 1 conv13_conv13/relu_splitncnn_3 conv14_1_conv14_1/relu 0=256 1=1 5=1 6=262144 8=102 9=1
|
||||
Convolution conv14_2 1 1 conv14_1_conv14_1/relu conv14_2_conv14_2/relu 0=512 1=3 3=2 4=1 5=1 6=1179648 8=2 9=1
|
||||
Split splitncnn_3 1 4 conv14_2_conv14_2/relu conv14_2_conv14_2/relu_splitncnn_0 conv14_2_conv14_2/relu_splitncnn_1 conv14_2_conv14_2/relu_splitncnn_2 conv14_2_conv14_2/relu_splitncnn_3
|
||||
Convolution conv15_1 1 1 conv14_2_conv14_2/relu_splitncnn_3 conv15_1_conv15_1/relu 0=128 1=1 5=1 6=65536 8=102 9=1
|
||||
Convolution conv15_2 1 1 conv15_1_conv15_1/relu conv15_2_conv15_2/relu 0=256 1=3 3=2 4=1 5=1 6=294912 8=2 9=1
|
||||
Split splitncnn_4 1 4 conv15_2_conv15_2/relu conv15_2_conv15_2/relu_splitncnn_0 conv15_2_conv15_2/relu_splitncnn_1 conv15_2_conv15_2/relu_splitncnn_2 conv15_2_conv15_2/relu_splitncnn_3
|
||||
Convolution conv16_1 1 1 conv15_2_conv15_2/relu_splitncnn_3 conv16_1_conv16_1/relu 0=128 1=1 5=1 6=32768 8=102 9=1
|
||||
Convolution conv16_2 1 1 conv16_1_conv16_1/relu conv16_2_conv16_2/relu 0=256 1=3 3=2 4=1 5=1 6=294912 8=2 9=1
|
||||
Split splitncnn_5 1 4 conv16_2_conv16_2/relu conv16_2_conv16_2/relu_splitncnn_0 conv16_2_conv16_2/relu_splitncnn_1 conv16_2_conv16_2/relu_splitncnn_2 conv16_2_conv16_2/relu_splitncnn_3
|
||||
Convolution conv17_1 1 1 conv16_2_conv16_2/relu_splitncnn_3 conv17_1_conv17_1/relu 0=64 1=1 5=1 6=16384 8=102 9=1
|
||||
Convolution conv17_2 1 1 conv17_1_conv17_1/relu conv17_2_conv17_2/relu 0=128 1=3 3=2 4=1 5=1 6=73728 8=2 9=1
|
||||
Split splitncnn_6 1 3 conv17_2_conv17_2/relu conv17_2_conv17_2/relu_splitncnn_0 conv17_2_conv17_2/relu_splitncnn_1 conv17_2_conv17_2/relu_splitncnn_2
|
||||
Convolution conv11_mbox_loc 1 1 conv11_conv11/relu_splitncnn_2 conv11_mbox_loc 0=12 1=1 5=1 6=6144 8=2
|
||||
Permute conv11_mbox_loc_perm 1 1 conv11_mbox_loc conv11_mbox_loc_perm 0=3
|
||||
Flatten conv11_mbox_loc_flat 1 1 conv11_mbox_loc_perm conv11_mbox_loc_flat
|
||||
Convolution conv11_mbox_conf 1 1 conv11_conv11/relu_splitncnn_1 conv11_mbox_conf 0=63 1=1 5=1 6=32256 8=2
|
||||
Permute conv11_mbox_conf_perm 1 1 conv11_mbox_conf conv11_mbox_conf_perm 0=3
|
||||
Flatten conv11_mbox_conf_flat 1 1 conv11_mbox_conf_perm conv11_mbox_conf_flat
|
||||
PriorBox conv11_mbox_priorbox 2 1 conv11_conv11/relu_splitncnn_0 data_splitncnn_5 conv11_mbox_priorbox -23300=1,60.000000 -23302=1,2.000000 9=-233 10=-233 13=0.500000
|
||||
Convolution conv13_mbox_loc 1 1 conv13_conv13/relu_splitncnn_2 conv13_mbox_loc 0=24 1=1 5=1 6=24576 8=2
|
||||
Permute conv13_mbox_loc_perm 1 1 conv13_mbox_loc conv13_mbox_loc_perm 0=3
|
||||
Flatten conv13_mbox_loc_flat 1 1 conv13_mbox_loc_perm conv13_mbox_loc_flat
|
||||
Convolution conv13_mbox_conf 1 1 conv13_conv13/relu_splitncnn_1 conv13_mbox_conf 0=126 1=1 5=1 6=129024 8=2
|
||||
Permute conv13_mbox_conf_perm 1 1 conv13_mbox_conf conv13_mbox_conf_perm 0=3
|
||||
Flatten conv13_mbox_conf_flat 1 1 conv13_mbox_conf_perm conv13_mbox_conf_flat
|
||||
PriorBox conv13_mbox_priorbox 2 1 conv13_conv13/relu_splitncnn_0 data_splitncnn_4 conv13_mbox_priorbox -23300=1,105.000000 -23301=1,150.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 13=0.500000
|
||||
Convolution conv14_2_mbox_loc 1 1 conv14_2_conv14_2/relu_splitncnn_2 conv14_2_mbox_loc 0=24 1=1 5=1 6=12288 8=2
|
||||
Permute conv14_2_mbox_loc_perm 1 1 conv14_2_mbox_loc conv14_2_mbox_loc_perm 0=3
|
||||
Flatten conv14_2_mbox_loc_flat 1 1 conv14_2_mbox_loc_perm conv14_2_mbox_loc_flat
|
||||
Convolution conv14_2_mbox_conf 1 1 conv14_2_conv14_2/relu_splitncnn_1 conv14_2_mbox_conf 0=126 1=1 5=1 6=64512 8=2
|
||||
Permute conv14_2_mbox_conf_perm 1 1 conv14_2_mbox_conf conv14_2_mbox_conf_perm 0=3
|
||||
Flatten conv14_2_mbox_conf_flat 1 1 conv14_2_mbox_conf_perm conv14_2_mbox_conf_flat
|
||||
PriorBox conv14_2_mbox_priorbox 2 1 conv14_2_conv14_2/relu_splitncnn_0 data_splitncnn_3 conv14_2_mbox_priorbox -23300=1,150.000000 -23301=1,195.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 13=0.500000
|
||||
Convolution conv15_2_mbox_loc 1 1 conv15_2_conv15_2/relu_splitncnn_2 conv15_2_mbox_loc 0=24 1=1 5=1 6=6144 8=2
|
||||
Permute conv15_2_mbox_loc_perm 1 1 conv15_2_mbox_loc conv15_2_mbox_loc_perm 0=3
|
||||
Flatten conv15_2_mbox_loc_flat 1 1 conv15_2_mbox_loc_perm conv15_2_mbox_loc_flat
|
||||
Convolution conv15_2_mbox_conf 1 1 conv15_2_conv15_2/relu_splitncnn_1 conv15_2_mbox_conf 0=126 1=1 5=1 6=32256 8=2
|
||||
Permute conv15_2_mbox_conf_perm 1 1 conv15_2_mbox_conf conv15_2_mbox_conf_perm 0=3
|
||||
Flatten conv15_2_mbox_conf_flat 1 1 conv15_2_mbox_conf_perm conv15_2_mbox_conf_flat
|
||||
PriorBox conv15_2_mbox_priorbox 2 1 conv15_2_conv15_2/relu_splitncnn_0 data_splitncnn_2 conv15_2_mbox_priorbox -23300=1,195.000000 -23301=1,240.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 13=0.500000
|
||||
Convolution conv16_2_mbox_loc 1 1 conv16_2_conv16_2/relu_splitncnn_2 conv16_2_mbox_loc 0=24 1=1 5=1 6=6144 8=2
|
||||
Permute conv16_2_mbox_loc_perm 1 1 conv16_2_mbox_loc conv16_2_mbox_loc_perm 0=3
|
||||
Flatten conv16_2_mbox_loc_flat 1 1 conv16_2_mbox_loc_perm conv16_2_mbox_loc_flat
|
||||
Convolution conv16_2_mbox_conf 1 1 conv16_2_conv16_2/relu_splitncnn_1 conv16_2_mbox_conf 0=126 1=1 5=1 6=32256 8=2
|
||||
Permute conv16_2_mbox_conf_perm 1 1 conv16_2_mbox_conf conv16_2_mbox_conf_perm 0=3
|
||||
Flatten conv16_2_mbox_conf_flat 1 1 conv16_2_mbox_conf_perm conv16_2_mbox_conf_flat
|
||||
PriorBox conv16_2_mbox_priorbox 2 1 conv16_2_conv16_2/relu_splitncnn_0 data_splitncnn_1 conv16_2_mbox_priorbox -23300=1,240.000000 -23301=1,285.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 13=0.500000
|
||||
Convolution conv17_2_mbox_loc 1 1 conv17_2_conv17_2/relu_splitncnn_2 conv17_2_mbox_loc 0=24 1=1 5=1 6=3072 8=2
|
||||
Permute conv17_2_mbox_loc_perm 1 1 conv17_2_mbox_loc conv17_2_mbox_loc_perm 0=3
|
||||
Flatten conv17_2_mbox_loc_flat 1 1 conv17_2_mbox_loc_perm conv17_2_mbox_loc_flat
|
||||
Convolution conv17_2_mbox_conf 1 1 conv17_2_conv17_2/relu_splitncnn_1 conv17_2_mbox_conf 0=126 1=1 5=1 6=16128 8=2
|
||||
Permute conv17_2_mbox_conf_perm 1 1 conv17_2_mbox_conf conv17_2_mbox_conf_perm 0=3
|
||||
Flatten conv17_2_mbox_conf_flat 1 1 conv17_2_mbox_conf_perm conv17_2_mbox_conf_flat
|
||||
PriorBox conv17_2_mbox_priorbox 2 1 conv17_2_conv17_2/relu_splitncnn_0 data_splitncnn_0 conv17_2_mbox_priorbox -23300=1,285.000000 -23301=1,300.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 13=0.500000
|
||||
Concat mbox_loc 6 1 conv11_mbox_loc_flat conv13_mbox_loc_flat conv14_2_mbox_loc_flat conv15_2_mbox_loc_flat conv16_2_mbox_loc_flat conv17_2_mbox_loc_flat mbox_loc
|
||||
Concat mbox_conf 6 1 conv11_mbox_conf_flat conv13_mbox_conf_flat conv14_2_mbox_conf_flat conv15_2_mbox_conf_flat conv16_2_mbox_conf_flat conv17_2_mbox_conf_flat mbox_conf
|
||||
Concat mbox_priorbox 6 1 conv11_mbox_priorbox conv13_mbox_priorbox conv14_2_mbox_priorbox conv15_2_mbox_priorbox conv16_2_mbox_priorbox conv17_2_mbox_priorbox mbox_priorbox 0=1
|
||||
Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1
|
||||
Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax 0=1 1=1
|
||||
Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten
|
||||
DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox output 0=21 1=0.450000 2=100 4=0.250000
|
79
3rdparty/ncnn/benchmark/mobilenet_v2.param
vendored
Normal file
79
3rdparty/ncnn/benchmark/mobilenet_v2.param
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
7767517
|
||||
77 87
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1/bn_relu1 -23330=4,3,112,112,32 0=32 1=3 3=2 4=1 5=1 6=864 9=1
|
||||
Convolution conv2_1/expand 1 1 conv1/bn_relu1 conv2_1/expand/bn_relu2_1/expand -23330=4,3,112,112,32 0=32 1=1 5=1 6=1024 9=1
|
||||
ConvolutionDepthWise conv2_1/dwise 1 1 conv2_1/expand/bn_relu2_1/expand conv2_1/dwise/bn_relu2_1/dwise -23330=4,3,112,112,32 0=32 1=3 4=1 5=1 6=288 7=32 9=1
|
||||
Convolution conv2_1/linear 1 1 conv2_1/dwise/bn_relu2_1/dwise conv2_1/linear/bn_conv2_1/linear/scale -23330=4,3,112,112,16 0=16 1=1 5=1 6=512
|
||||
Convolution conv2_2/expand 1 1 conv2_1/linear/bn_conv2_1/linear/scale conv2_2/expand/bn_relu2_2/expand -23330=4,3,112,112,96 0=96 1=1 5=1 6=1536 9=1
|
||||
ConvolutionDepthWise conv2_2/dwise 1 1 conv2_2/expand/bn_relu2_2/expand conv2_2/dwise/bn_relu2_2/dwise -23330=4,3,56,56,96 0=96 1=3 3=2 4=1 5=1 6=864 7=96 9=1
|
||||
Convolution conv2_2/linear 1 1 conv2_2/dwise/bn_relu2_2/dwise conv2_2/linear/bn_conv2_2/linear/scale -23330=4,3,56,56,24 0=24 1=1 5=1 6=2304
|
||||
Split splitncnn_0 1 2 conv2_2/linear/bn_conv2_2/linear/scale conv2_2/linear/bn_conv2_2/linear/scale_splitncnn_0 conv2_2/linear/bn_conv2_2/linear/scale_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
|
||||
Convolution conv3_1/expand 1 1 conv2_2/linear/bn_conv2_2/linear/scale_splitncnn_1 conv3_1/expand/bn_relu3_1/expand -23330=4,3,56,56,144 0=144 1=1 5=1 6=3456 9=1
|
||||
ConvolutionDepthWise conv3_1/dwise 1 1 conv3_1/expand/bn_relu3_1/expand conv3_1/dwise/bn_relu3_1/dwise -23330=4,3,56,56,144 0=144 1=3 4=1 5=1 6=1296 7=144 9=1
|
||||
Convolution conv3_1/linear 1 1 conv3_1/dwise/bn_relu3_1/dwise conv3_1/linear/bn_conv3_1/linear/scale -23330=4,3,56,56,24 0=24 1=1 5=1 6=3456
|
||||
Eltwise block_3_1 2 1 conv2_2/linear/bn_conv2_2/linear/scale_splitncnn_0 conv3_1/linear/bn_conv3_1/linear/scale block_3_1 -23330=4,3,56,56,24 0=1
|
||||
Convolution conv3_2/expand 1 1 block_3_1 conv3_2/expand/bn_relu3_2/expand -23330=4,3,56,56,144 0=144 1=1 5=1 6=3456 9=1
|
||||
ConvolutionDepthWise conv3_2/dwise 1 1 conv3_2/expand/bn_relu3_2/expand conv3_2/dwise/bn_relu3_2/dwise -23330=4,3,28,28,144 0=144 1=3 3=2 4=1 5=1 6=1296 7=144 9=1
|
||||
Convolution conv3_2/linear 1 1 conv3_2/dwise/bn_relu3_2/dwise conv3_2/linear/bn_conv3_2/linear/scale -23330=4,3,28,28,32 0=32 1=1 5=1 6=4608
|
||||
Split splitncnn_1 1 2 conv3_2/linear/bn_conv3_2/linear/scale conv3_2/linear/bn_conv3_2/linear/scale_splitncnn_0 conv3_2/linear/bn_conv3_2/linear/scale_splitncnn_1 -23330=8,3,28,28,32,3,28,28,32
|
||||
Convolution conv4_1/expand 1 1 conv3_2/linear/bn_conv3_2/linear/scale_splitncnn_1 conv4_1/expand/bn_relu4_1/expand -23330=4,3,28,28,192 0=192 1=1 5=1 6=6144 9=1
|
||||
ConvolutionDepthWise conv4_1/dwise 1 1 conv4_1/expand/bn_relu4_1/expand conv4_1/dwise/bn_relu4_1/dwise -23330=4,3,28,28,192 0=192 1=3 4=1 5=1 6=1728 7=192 9=1
|
||||
Convolution conv4_1/linear 1 1 conv4_1/dwise/bn_relu4_1/dwise conv4_1/linear/bn_conv4_1/linear/scale -23330=4,3,28,28,32 0=32 1=1 5=1 6=6144
|
||||
Eltwise block_4_1 2 1 conv3_2/linear/bn_conv3_2/linear/scale_splitncnn_0 conv4_1/linear/bn_conv4_1/linear/scale block_4_1 -23330=4,3,28,28,32 0=1
|
||||
Split splitncnn_2 1 2 block_4_1 block_4_1_splitncnn_0 block_4_1_splitncnn_1 -23330=8,3,28,28,32,3,28,28,32
|
||||
Convolution conv4_2/expand 1 1 block_4_1_splitncnn_1 conv4_2/expand/bn_relu4_2/expand -23330=4,3,28,28,192 0=192 1=1 5=1 6=6144 9=1
|
||||
ConvolutionDepthWise conv4_2/dwise 1 1 conv4_2/expand/bn_relu4_2/expand conv4_2/dwise/bn_relu4_2/dwise -23330=4,3,28,28,192 0=192 1=3 4=1 5=1 6=1728 7=192 9=1
|
||||
Convolution conv4_2/linear 1 1 conv4_2/dwise/bn_relu4_2/dwise conv4_2/linear/bn_conv4_2/linear/scale -23330=4,3,28,28,32 0=32 1=1 5=1 6=6144
|
||||
Eltwise block_4_2 2 1 block_4_1_splitncnn_0 conv4_2/linear/bn_conv4_2/linear/scale block_4_2 -23330=4,3,28,28,32 0=1
|
||||
Convolution conv4_3/expand 1 1 block_4_2 conv4_3/expand/bn_relu4_3/expand -23330=4,3,28,28,192 0=192 1=1 5=1 6=6144 9=1
|
||||
ConvolutionDepthWise conv4_3/dwise 1 1 conv4_3/expand/bn_relu4_3/expand conv4_3/dwise/bn_relu4_3/dwise -23330=4,3,14,14,192 0=192 1=3 3=2 4=1 5=1 6=1728 7=192 9=1
|
||||
Convolution conv4_3/linear 1 1 conv4_3/dwise/bn_relu4_3/dwise conv4_3/linear/bn_conv4_3/linear/scale -23330=4,3,14,14,64 0=64 1=1 5=1 6=12288
|
||||
Split splitncnn_3 1 2 conv4_3/linear/bn_conv4_3/linear/scale conv4_3/linear/bn_conv4_3/linear/scale_splitncnn_0 conv4_3/linear/bn_conv4_3/linear/scale_splitncnn_1 -23330=8,3,14,14,64,3,14,14,64
|
||||
Convolution conv4_4/expand 1 1 conv4_3/linear/bn_conv4_3/linear/scale_splitncnn_1 conv4_4/expand/bn_relu4_4/expand -23330=4,3,14,14,384 0=384 1=1 5=1 6=24576 9=1
|
||||
ConvolutionDepthWise conv4_4/dwise 1 1 conv4_4/expand/bn_relu4_4/expand conv4_4/dwise/bn_relu4_4/dwise -23330=4,3,14,14,384 0=384 1=3 4=1 5=1 6=3456 7=384 9=1
|
||||
Convolution conv4_4/linear 1 1 conv4_4/dwise/bn_relu4_4/dwise conv4_4/linear/bn_conv4_4/linear/scale -23330=4,3,14,14,64 0=64 1=1 5=1 6=24576
|
||||
Eltwise block_4_4 2 1 conv4_3/linear/bn_conv4_3/linear/scale_splitncnn_0 conv4_4/linear/bn_conv4_4/linear/scale block_4_4 -23330=4,3,14,14,64 0=1
|
||||
Split splitncnn_4 1 2 block_4_4 block_4_4_splitncnn_0 block_4_4_splitncnn_1 -23330=8,3,14,14,64,3,14,14,64
|
||||
Convolution conv4_5/expand 1 1 block_4_4_splitncnn_1 conv4_5/expand/bn_relu4_5/expand -23330=4,3,14,14,384 0=384 1=1 5=1 6=24576 9=1
|
||||
ConvolutionDepthWise conv4_5/dwise 1 1 conv4_5/expand/bn_relu4_5/expand conv4_5/dwise/bn_relu4_5/dwise -23330=4,3,14,14,384 0=384 1=3 4=1 5=1 6=3456 7=384 9=1
|
||||
Convolution conv4_5/linear 1 1 conv4_5/dwise/bn_relu4_5/dwise conv4_5/linear/bn_conv4_5/linear/scale -23330=4,3,14,14,64 0=64 1=1 5=1 6=24576
|
||||
Eltwise block_4_5 2 1 block_4_4_splitncnn_0 conv4_5/linear/bn_conv4_5/linear/scale block_4_5 -23330=4,3,14,14,64 0=1
|
||||
Split splitncnn_5 1 2 block_4_5 block_4_5_splitncnn_0 block_4_5_splitncnn_1 -23330=8,3,14,14,64,3,14,14,64
|
||||
Convolution conv4_6/expand 1 1 block_4_5_splitncnn_1 conv4_6/expand/bn_relu4_6/expand -23330=4,3,14,14,384 0=384 1=1 5=1 6=24576 9=1
|
||||
ConvolutionDepthWise conv4_6/dwise 1 1 conv4_6/expand/bn_relu4_6/expand conv4_6/dwise/bn_relu4_6/dwise -23330=4,3,14,14,384 0=384 1=3 4=1 5=1 6=3456 7=384 9=1
|
||||
Convolution conv4_6/linear 1 1 conv4_6/dwise/bn_relu4_6/dwise conv4_6/linear/bn_conv4_6/linear/scale -23330=4,3,14,14,64 0=64 1=1 5=1 6=24576
|
||||
Eltwise block_4_6 2 1 block_4_5_splitncnn_0 conv4_6/linear/bn_conv4_6/linear/scale block_4_6 -23330=4,3,14,14,64 0=1
|
||||
Convolution conv4_7/expand 1 1 block_4_6 conv4_7/expand/bn_relu4_7/expand -23330=4,3,14,14,384 0=384 1=1 5=1 6=24576 9=1
|
||||
ConvolutionDepthWise conv4_7/dwise 1 1 conv4_7/expand/bn_relu4_7/expand conv4_7/dwise/bn_relu4_7/dwise -23330=4,3,14,14,384 0=384 1=3 4=1 5=1 6=3456 7=384 9=1
|
||||
Convolution conv4_7/linear 1 1 conv4_7/dwise/bn_relu4_7/dwise conv4_7/linear/bn_conv4_7/linear/scale -23330=4,3,14,14,96 0=96 1=1 5=1 6=36864
|
||||
Split splitncnn_6 1 2 conv4_7/linear/bn_conv4_7/linear/scale conv4_7/linear/bn_conv4_7/linear/scale_splitncnn_0 conv4_7/linear/bn_conv4_7/linear/scale_splitncnn_1 -23330=8,3,14,14,96,3,14,14,96
|
||||
Convolution conv5_1/expand 1 1 conv4_7/linear/bn_conv4_7/linear/scale_splitncnn_1 conv5_1/expand/bn_relu5_1/expand -23330=4,3,14,14,576 0=576 1=1 5=1 6=55296 9=1
|
||||
ConvolutionDepthWise conv5_1/dwise 1 1 conv5_1/expand/bn_relu5_1/expand conv5_1/dwise/bn_relu5_1/dwise -23330=4,3,14,14,576 0=576 1=3 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution conv5_1/linear 1 1 conv5_1/dwise/bn_relu5_1/dwise conv5_1/linear/bn_conv5_1/linear/scale -23330=4,3,14,14,96 0=96 1=1 5=1 6=55296
|
||||
Eltwise block_5_1 2 1 conv4_7/linear/bn_conv4_7/linear/scale_splitncnn_0 conv5_1/linear/bn_conv5_1/linear/scale block_5_1 -23330=4,3,14,14,96 0=1
|
||||
Split splitncnn_7 1 2 block_5_1 block_5_1_splitncnn_0 block_5_1_splitncnn_1 -23330=8,3,14,14,96,3,14,14,96
|
||||
Convolution conv5_2/expand 1 1 block_5_1_splitncnn_1 conv5_2/expand/bn_relu5_2/expand -23330=4,3,14,14,576 0=576 1=1 5=1 6=55296 9=1
|
||||
ConvolutionDepthWise conv5_2/dwise 1 1 conv5_2/expand/bn_relu5_2/expand conv5_2/dwise/bn_relu5_2/dwise -23330=4,3,14,14,576 0=576 1=3 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution conv5_2/linear 1 1 conv5_2/dwise/bn_relu5_2/dwise conv5_2/linear/bn_conv5_2/linear/scale -23330=4,3,14,14,96 0=96 1=1 5=1 6=55296
|
||||
Eltwise block_5_2 2 1 block_5_1_splitncnn_0 conv5_2/linear/bn_conv5_2/linear/scale block_5_2 -23330=4,3,14,14,96 0=1
|
||||
Convolution conv5_3/expand 1 1 block_5_2 conv5_3/expand/bn_relu5_3/expand -23330=4,3,14,14,576 0=576 1=1 5=1 6=55296 9=1
|
||||
ConvolutionDepthWise conv5_3/dwise 1 1 conv5_3/expand/bn_relu5_3/expand conv5_3/dwise/bn_relu5_3/dwise -23330=4,3,7,7,576 0=576 1=3 3=2 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution conv5_3/linear 1 1 conv5_3/dwise/bn_relu5_3/dwise conv5_3/linear/bn_conv5_3/linear/scale -23330=4,3,7,7,160 0=160 1=1 5=1 6=92160
|
||||
Split splitncnn_8 1 2 conv5_3/linear/bn_conv5_3/linear/scale conv5_3/linear/bn_conv5_3/linear/scale_splitncnn_0 conv5_3/linear/bn_conv5_3/linear/scale_splitncnn_1 -23330=8,3,7,7,160,3,7,7,160
|
||||
Convolution conv6_1/expand 1 1 conv5_3/linear/bn_conv5_3/linear/scale_splitncnn_1 conv6_1/expand/bn_relu6_1/expand -23330=4,3,7,7,960 0=960 1=1 5=1 6=153600 9=1
|
||||
ConvolutionDepthWise conv6_1/dwise 1 1 conv6_1/expand/bn_relu6_1/expand conv6_1/dwise/bn_relu6_1/dwise -23330=4,3,7,7,960 0=960 1=3 4=1 5=1 6=8640 7=960 9=1
|
||||
Convolution conv6_1/linear 1 1 conv6_1/dwise/bn_relu6_1/dwise conv6_1/linear/bn_conv6_1/linear/scale -23330=4,3,7,7,160 0=160 1=1 5=1 6=153600
|
||||
Eltwise block_6_1 2 1 conv5_3/linear/bn_conv5_3/linear/scale_splitncnn_0 conv6_1/linear/bn_conv6_1/linear/scale block_6_1 -23330=4,3,7,7,160 0=1
|
||||
Split splitncnn_9 1 2 block_6_1 block_6_1_splitncnn_0 block_6_1_splitncnn_1 -23330=8,3,7,7,160,3,7,7,160
|
||||
Convolution conv6_2/expand 1 1 block_6_1_splitncnn_1 conv6_2/expand/bn_relu6_2/expand -23330=4,3,7,7,960 0=960 1=1 5=1 6=153600 9=1
|
||||
ConvolutionDepthWise conv6_2/dwise 1 1 conv6_2/expand/bn_relu6_2/expand conv6_2/dwise/bn_relu6_2/dwise -23330=4,3,7,7,960 0=960 1=3 4=1 5=1 6=8640 7=960 9=1
|
||||
Convolution conv6_2/linear 1 1 conv6_2/dwise/bn_relu6_2/dwise conv6_2/linear/bn_conv6_2/linear/scale -23330=4,3,7,7,160 0=160 1=1 5=1 6=153600
|
||||
Eltwise block_6_2 2 1 block_6_1_splitncnn_0 conv6_2/linear/bn_conv6_2/linear/scale block_6_2 -23330=4,3,7,7,160 0=1
|
||||
Convolution conv6_3/expand 1 1 block_6_2 conv6_3/expand/bn_relu6_3/expand -23330=4,3,7,7,960 0=960 1=1 5=1 6=153600 9=1
|
||||
ConvolutionDepthWise conv6_3/dwise 1 1 conv6_3/expand/bn_relu6_3/expand conv6_3/dwise/bn_relu6_3/dwise -23330=4,3,7,7,960 0=960 1=3 4=1 5=1 6=8640 7=960 9=1
|
||||
Convolution conv6_3/linear 1 1 conv6_3/dwise/bn_relu6_3/dwise conv6_3/linear/bn_conv6_3/linear/scale -23330=4,3,7,7,320 0=320 1=1 5=1 6=307200
|
||||
Convolution conv6_4 1 1 conv6_3/linear/bn_conv6_3/linear/scale conv6_4/bn_relu6_4 -23330=4,3,7,7,1280 0=1280 1=1 5=1 6=409600 9=1
|
||||
Pooling pool6 1 1 conv6_4/bn_relu6_4 pool6 -23330=4,1,1280,1,1 0=1 4=1
|
||||
InnerProduct fc7 1 1 pool6 fc7 -23330=4,1,1000,1,1 0=1000 1=1 2=1280000
|
||||
Softmax prob 1 1 fc7 output -23330=4,1,1000,1,1
|
147
3rdparty/ncnn/benchmark/mobilenet_v3.param
vendored
Normal file
147
3rdparty/ncnn/benchmark/mobilenet_v3.param
vendored
Normal file
@ -0,0 +1,147 @@
|
||||
7767517
|
||||
145 163
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution 313 1 1 data 313 -23330=4,3,112,112,16 0=16 1=3 3=2 4=1 5=1 6=432
|
||||
Split splitncnn_0 1 2 313 313_splitncnn_0 313_splitncnn_1 -23330=8,3,112,112,16,3,112,112,16
|
||||
HardSigmoid 319 1 1 313_splitncnn_1 319 -23330=4,3,112,112,16
|
||||
BinaryOp 320 2 1 313_splitncnn_0 319 320 -23330=4,3,112,112,16 0=2
|
||||
Split splitncnn_1 1 2 320 320_splitncnn_0 320_splitncnn_1 -23330=8,3,112,112,16,3,112,112,16
|
||||
ConvolutionDepthWise 321 1 1 320_splitncnn_1 323 -23330=4,3,112,112,16 0=16 1=3 4=1 5=1 6=144 7=16 9=1
|
||||
Convolution 324 1 1 323 324 -23330=4,3,112,112,16 0=16 1=1 5=1 6=256
|
||||
BinaryOp 326 2 1 320_splitncnn_0 324 326 -23330=4,3,112,112,16
|
||||
Convolution 327 1 1 326 329 -23330=4,3,112,112,64 0=64 1=1 5=1 6=1024 9=1
|
||||
ConvolutionDepthWise 330 1 1 329 332 -23330=4,3,56,56,64 0=64 1=3 3=2 4=1 5=1 6=576 7=64 9=1
|
||||
Convolution 333 1 1 332 333 -23330=4,3,56,56,24 0=24 1=1 5=1 6=1536
|
||||
Split splitncnn_2 1 2 333 333_splitncnn_0 333_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
|
||||
Convolution 335 1 1 333_splitncnn_1 337 -23330=4,3,56,56,72 0=72 1=1 5=1 6=1728 9=1
|
||||
ConvolutionDepthWise 338 1 1 337 340 -23330=4,3,56,56,72 0=72 1=3 4=1 5=1 6=648 7=72 9=1
|
||||
Convolution 341 1 1 340 341 -23330=4,3,56,56,24 0=24 1=1 5=1 6=1728
|
||||
BinaryOp 343 2 1 333_splitncnn_0 341 343 -23330=4,3,56,56,24
|
||||
Convolution 344 1 1 343 346 -23330=4,3,56,56,72 0=72 1=1 5=1 6=1728 9=1
|
||||
ConvolutionDepthWise 347 1 1 346 347 -23330=4,3,28,28,72 0=72 1=5 3=2 4=2 5=1 6=1800 7=72
|
||||
Split splitncnn_3 1 2 347 347_splitncnn_0 347_splitncnn_1 -23330=8,3,28,28,72,3,28,28,72
|
||||
Pooling 355 1 1 347_splitncnn_1 359 -23330=4,1,72,1,1 0=1 4=1
|
||||
InnerProduct 360 1 1 359 361 -23330=4,1,18,1,1 0=18 1=1 2=1296 9=1
|
||||
InnerProduct 362 1 1 361 362 -23330=4,1,72,1,1 0=72 1=1 2=1296
|
||||
HardSigmoid 367 1 1 362 367 -23330=4,1,72,1,1
|
||||
BinaryOp 376 2 1 347_splitncnn_0 367 376 -23330=4,3,28,28,72 0=2
|
||||
ReLU 377 1 1 376 377 -23330=4,3,28,28,72
|
||||
Convolution 378 1 1 377 378 -23330=4,3,28,28,40 0=40 1=1 5=1 6=2880
|
||||
Split splitncnn_4 1 2 378 378_splitncnn_0 378_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
|
||||
Convolution 380 1 1 378_splitncnn_1 382 -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
|
||||
ConvolutionDepthWise 383 1 1 382 383 -23330=4,3,28,28,120 0=120 1=5 4=2 5=1 6=3000 7=120
|
||||
Split splitncnn_5 1 2 383 383_splitncnn_0 383_splitncnn_1 -23330=8,3,28,28,120,3,28,28,120
|
||||
Pooling 391 1 1 383_splitncnn_1 395 -23330=4,1,120,1,1 0=1 4=1
|
||||
InnerProduct 396 1 1 395 397 -23330=4,1,30,1,1 0=30 1=1 2=3600 9=1
|
||||
InnerProduct 398 1 1 397 398 -23330=4,1,120,1,1 0=120 1=1 2=3600
|
||||
HardSigmoid 403 1 1 398 403 -23330=4,1,120,1,1
|
||||
BinaryOp 412 2 1 383_splitncnn_0 403 412 -23330=4,3,28,28,120 0=2
|
||||
ReLU 413 1 1 412 413 -23330=4,3,28,28,120
|
||||
Convolution 414 1 1 413 414 -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
|
||||
BinaryOp 416 2 1 378_splitncnn_0 414 416 -23330=4,3,28,28,40
|
||||
Split splitncnn_6 1 2 416 416_splitncnn_0 416_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
|
||||
Convolution 417 1 1 416_splitncnn_1 419 -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
|
||||
ConvolutionDepthWise 420 1 1 419 420 -23330=4,3,28,28,120 0=120 1=5 4=2 5=1 6=3000 7=120
|
||||
Split splitncnn_7 1 2 420 420_splitncnn_0 420_splitncnn_1 -23330=8,3,28,28,120,3,28,28,120
|
||||
Pooling 428 1 1 420_splitncnn_1 432 -23330=4,1,120,1,1 0=1 4=1
|
||||
InnerProduct 433 1 1 432 434 -23330=4,1,30,1,1 0=30 1=1 2=3600 9=1
|
||||
InnerProduct 435 1 1 434 435 -23330=4,1,120,1,1 0=120 1=1 2=3600
|
||||
HardSigmoid 440 1 1 435 440 -23330=4,1,120,1,1
|
||||
BinaryOp 449 2 1 420_splitncnn_0 440 449 -23330=4,3,28,28,120 0=2
|
||||
ReLU 450 1 1 449 450 -23330=4,3,28,28,120
|
||||
Convolution 451 1 1 450 451 -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
|
||||
BinaryOp 453 2 1 416_splitncnn_0 451 453 -23330=4,3,28,28,40
|
||||
Convolution 454 1 1 453 454 -23330=4,3,28,28,240 0=240 1=1 5=1 6=9600
|
||||
HardSwish 461 1 1 454 461 -23330=4,3,28,28,240
|
||||
ConvolutionDepthWise 462 1 1 461 462 -23330=4,3,14,14,240 0=240 1=3 3=2 4=1 5=1 6=2160 7=240
|
||||
HardSwish 469 1 1 462 469 -23330=4,3,14,14,240
|
||||
Convolution 470 1 1 469 470 -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
|
||||
Split splitncnn_8 1 2 470 470_splitncnn_0 470_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution 472 1 1 470_splitncnn_1 472 -23330=4,3,14,14,200 0=200 1=1 5=1 6=16000
|
||||
HardSwish 479 1 1 472 479 -23330=4,3,14,14,200
|
||||
ConvolutionDepthWise 480 1 1 479 480 -23330=4,3,14,14,200 0=200 1=3 4=1 5=1 6=1800 7=200
|
||||
HardSwish 487 1 1 480 487 -23330=4,3,14,14,200
|
||||
Convolution 488 1 1 487 488 -23330=4,3,14,14,80 0=80 1=1 5=1 6=16000
|
||||
BinaryOp 490 2 1 470_splitncnn_0 488 490 -23330=4,3,14,14,80
|
||||
Split splitncnn_9 1 2 490 490_splitncnn_0 490_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution 491 1 1 490_splitncnn_1 491 -23330=4,3,14,14,184 0=184 1=1 5=1 6=14720
|
||||
HardSwish 498 1 1 491 498 -23330=4,3,14,14,184
|
||||
ConvolutionDepthWise 499 1 1 498 499 -23330=4,3,14,14,184 0=184 1=3 4=1 5=1 6=1656 7=184
|
||||
HardSwish 506 1 1 499 506 -23330=4,3,14,14,184
|
||||
Convolution 507 1 1 506 507 -23330=4,3,14,14,80 0=80 1=1 5=1 6=14720
|
||||
BinaryOp 509 2 1 490_splitncnn_0 507 509 -23330=4,3,14,14,80
|
||||
Split splitncnn_10 1 2 509 509_splitncnn_0 509_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution 510 1 1 509_splitncnn_1 510 -23330=4,3,14,14,184 0=184 1=1 5=1 6=14720
|
||||
HardSwish 517 1 1 510 517 -23330=4,3,14,14,184
|
||||
ConvolutionDepthWise 518 1 1 517 518 -23330=4,3,14,14,184 0=184 1=3 4=1 5=1 6=1656 7=184
|
||||
HardSwish 525 1 1 518 525 -23330=4,3,14,14,184
|
||||
Convolution 526 1 1 525 526 -23330=4,3,14,14,80 0=80 1=1 5=1 6=14720
|
||||
BinaryOp 528 2 1 509_splitncnn_0 526 528 -23330=4,3,14,14,80
|
||||
Convolution 529 1 1 528 529 -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400
|
||||
HardSwish 536 1 1 529 536 -23330=4,3,14,14,480
|
||||
ConvolutionDepthWise 537 1 1 536 537 -23330=4,3,14,14,480 0=480 1=3 4=1 5=1 6=4320 7=480
|
||||
Split splitncnn_11 1 2 537 537_splitncnn_0 537_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
Pooling 545 1 1 537_splitncnn_1 549 -23330=4,1,480,1,1 0=1 4=1
|
||||
InnerProduct 550 1 1 549 551 -23330=4,1,120,1,1 0=120 1=1 2=57600 9=1
|
||||
InnerProduct 552 1 1 551 552 -23330=4,1,480,1,1 0=480 1=1 2=57600
|
||||
HardSigmoid 557 1 1 552 557 -23330=4,1,480,1,1
|
||||
BinaryOp 566 2 1 537_splitncnn_0 557 566 -23330=4,3,14,14,480 0=2
|
||||
HardSwish 572 1 1 566 572 -23330=4,3,14,14,480
|
||||
Convolution 573 1 1 572 573 -23330=4,3,14,14,112 0=112 1=1 5=1 6=53760
|
||||
Split splitncnn_12 1 2 573 573_splitncnn_0 573_splitncnn_1 -23330=8,3,14,14,112,3,14,14,112
|
||||
Convolution 575 1 1 573_splitncnn_1 575 -23330=4,3,14,14,672 0=672 1=1 5=1 6=75264
|
||||
HardSwish 582 1 1 575 582 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise 583 1 1 582 583 -23330=4,3,14,14,672 0=672 1=3 4=1 5=1 6=6048 7=672
|
||||
Split splitncnn_13 1 2 583 583_splitncnn_0 583_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
|
||||
Pooling 591 1 1 583_splitncnn_1 595 -23330=4,1,672,1,1 0=1 4=1
|
||||
InnerProduct 596 1 1 595 597 -23330=4,1,168,1,1 0=168 1=1 2=112896 9=1
|
||||
InnerProduct 598 1 1 597 598 -23330=4,1,672,1,1 0=672 1=1 2=112896
|
||||
HardSigmoid 603 1 1 598 603 -23330=4,1,672,1,1
|
||||
BinaryOp 612 2 1 583_splitncnn_0 603 612 -23330=4,3,14,14,672 0=2
|
||||
HardSwish 618 1 1 612 618 -23330=4,3,14,14,672
|
||||
Convolution 619 1 1 618 619 -23330=4,3,14,14,112 0=112 1=1 5=1 6=75264
|
||||
BinaryOp 621 2 1 573_splitncnn_0 619 621 -23330=4,3,14,14,112
|
||||
Convolution 622 1 1 621 622 -23330=4,3,14,14,672 0=672 1=1 5=1 6=75264
|
||||
HardSwish 629 1 1 622 629 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise 630 1 1 629 630 -23330=4,3,14,14,672 0=672 1=5 4=2 5=1 6=16800 7=672
|
||||
Split splitncnn_14 1 2 630 630_splitncnn_0 630_splitncnn_1 -23330=8,3,14,14,672,3,14,14,672
|
||||
Pooling 638 1 1 630_splitncnn_1 642 -23330=4,1,672,1,1 0=1 4=1
|
||||
InnerProduct 643 1 1 642 644 -23330=4,1,168,1,1 0=168 1=1 2=112896 9=1
|
||||
InnerProduct 645 1 1 644 645 -23330=4,1,672,1,1 0=672 1=1 2=112896
|
||||
HardSigmoid 650 1 1 645 650 -23330=4,1,672,1,1
|
||||
BinaryOp 659 2 1 630_splitncnn_0 650 659 -23330=4,3,14,14,672 0=2
|
||||
HardSwish 665 1 1 659 665 -23330=4,3,14,14,672
|
||||
Convolution 666 1 1 665 666 -23330=4,3,14,14,160 0=160 1=1 5=1 6=107520
|
||||
Convolution 668 1 1 666 668 -23330=4,3,14,14,672 0=672 1=1 5=1 6=107520
|
||||
HardSwish 675 1 1 668 675 -23330=4,3,14,14,672
|
||||
ConvolutionDepthWise 676 1 1 675 676 -23330=4,3,7,7,672 0=672 1=5 3=2 4=2 5=1 6=16800 7=672
|
||||
Split splitncnn_15 1 2 676 676_splitncnn_0 676_splitncnn_1 -23330=8,3,7,7,672,3,7,7,672
|
||||
Pooling 684 1 1 676_splitncnn_1 688 -23330=4,1,672,1,1 0=1 4=1
|
||||
InnerProduct 689 1 1 688 690 -23330=4,1,168,1,1 0=168 1=1 2=112896 9=1
|
||||
InnerProduct 691 1 1 690 691 -23330=4,1,672,1,1 0=672 1=1 2=112896
|
||||
HardSigmoid 696 1 1 691 696 -23330=4,1,672,1,1
|
||||
BinaryOp 705 2 1 676_splitncnn_0 696 705 -23330=4,3,7,7,672 0=2
|
||||
HardSwish 711 1 1 705 711 -23330=4,3,7,7,672
|
||||
Convolution 712 1 1 711 712 -23330=4,3,7,7,160 0=160 1=1 5=1 6=107520
|
||||
Split splitncnn_16 1 2 712 712_splitncnn_0 712_splitncnn_1 -23330=8,3,7,7,160,3,7,7,160
|
||||
Convolution 714 1 1 712_splitncnn_1 714 -23330=4,3,7,7,960 0=960 1=1 5=1 6=153600
|
||||
HardSwish 721 1 1 714 721 -23330=4,3,7,7,960
|
||||
ConvolutionDepthWise 722 1 1 721 722 -23330=4,3,7,7,960 0=960 1=5 4=2 5=1 6=24000 7=960
|
||||
Split splitncnn_17 1 2 722 722_splitncnn_0 722_splitncnn_1 -23330=8,3,7,7,960,3,7,7,960
|
||||
Pooling 730 1 1 722_splitncnn_1 734 -23330=4,1,960,1,1 0=1 4=1
|
||||
InnerProduct 735 1 1 734 736 -23330=4,1,240,1,1 0=240 1=1 2=230400 9=1
|
||||
InnerProduct 737 1 1 736 737 -23330=4,1,960,1,1 0=960 1=1 2=230400
|
||||
HardSigmoid 742 1 1 737 742 -23330=4,1,960,1,1
|
||||
BinaryOp 751 2 1 722_splitncnn_0 742 751 -23330=4,3,7,7,960 0=2
|
||||
HardSwish 757 1 1 751 757 -23330=4,3,7,7,960
|
||||
Convolution 758 1 1 757 758 -23330=4,3,7,7,160 0=160 1=1 5=1 6=153600
|
||||
BinaryOp 760 2 1 712_splitncnn_0 758 760 -23330=4,3,7,7,160
|
||||
Convolution 761 1 1 760 761 -23330=4,3,7,7,960 0=960 1=1 5=1 6=153600
|
||||
HardSwish 768 1 1 761 768 -23330=4,3,7,7,960
|
||||
Pooling 769 1 1 768 769 -23330=4,1,960,1,1 0=1 4=1
|
||||
HardSwish 775 1 1 769 775 -23330=4,1,960,1,1
|
||||
Reshape 783 1 1 775 783 -23330=4,1,960,1,1 0=-1
|
||||
InnerProduct 784 1 1 783 784 -23330=4,1,1280,1,1 0=1280 1=1 2=1228800
|
||||
HardSwish 790 1 1 784 790 -23330=4,1,1280,1,1
|
||||
InnerProduct 791 1 1 790 791 -23330=4,1,1000,1,1 0=1000 1=1 2=1280000
|
||||
Softmax prob 1 1 791 output -23330=4,1,1000,1,1
|
41
3rdparty/ncnn/benchmark/mobilenet_yolo.param
vendored
Normal file
41
3rdparty/ncnn/benchmark/mobilenet_yolo.param
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
7767517
|
||||
39 41
|
||||
Input data 0 1 data -23330=4,3,416,416,3 0=416 1=416 2=3
|
||||
Convolution conv0 1 1 data conv0_conv0/relu -23330=4,3,208,208,32 0=32 1=3 3=2 4=1 5=1 6=864 9=1
|
||||
ConvolutionDepthWise conv1/dw 1 1 conv0_conv0/relu conv1/dw_conv1/dw/relu -23330=4,3,208,208,32 0=32 1=3 4=1 5=1 6=288 7=32 9=1
|
||||
Convolution conv1 1 1 conv1/dw_conv1/dw/relu conv1_conv1/relu -23330=4,3,208,208,64 0=64 1=1 5=1 6=2048 9=1
|
||||
ConvolutionDepthWise conv2/dw 1 1 conv1_conv1/relu conv2/dw_conv2/dw/relu -23330=4,3,104,104,64 0=64 1=3 3=2 4=1 5=1 6=576 7=64 9=1
|
||||
Convolution conv2 1 1 conv2/dw_conv2/dw/relu conv2_conv2/relu -23330=4,3,104,104,128 0=128 1=1 5=1 6=8192 9=1
|
||||
ConvolutionDepthWise conv3/dw 1 1 conv2_conv2/relu conv3/dw_conv3/dw/relu -23330=4,3,104,104,128 0=128 1=3 4=1 5=1 6=1152 7=128 9=1
|
||||
Convolution conv3 1 1 conv3/dw_conv3/dw/relu conv3_conv3/relu -23330=4,3,104,104,128 0=128 1=1 5=1 6=16384 9=1
|
||||
ConvolutionDepthWise conv4/dw 1 1 conv3_conv3/relu conv4/dw_conv4/dw/relu -23330=4,3,52,52,128 0=128 1=3 3=2 4=1 5=1 6=1152 7=128 9=1
|
||||
Convolution conv4 1 1 conv4/dw_conv4/dw/relu conv4_conv4/relu -23330=4,3,52,52,256 0=256 1=1 5=1 6=32768 9=1
|
||||
ConvolutionDepthWise conv5/dw 1 1 conv4_conv4/relu conv5/dw_conv5/dw/relu -23330=4,3,52,52,256 0=256 1=3 4=1 5=1 6=2304 7=256 9=1
|
||||
Convolution conv5 1 1 conv5/dw_conv5/dw/relu conv5_conv5/relu -23330=4,3,52,52,256 0=256 1=1 5=1 6=65536 9=1
|
||||
ConvolutionDepthWise conv6/dw 1 1 conv5_conv5/relu conv6/dw_conv6/dw/relu -23330=4,3,26,26,256 0=256 1=3 3=2 4=1 5=1 6=2304 7=256 9=1
|
||||
Convolution conv6 1 1 conv6/dw_conv6/dw/relu conv6_conv6/relu -23330=4,3,26,26,512 0=512 1=1 5=1 6=131072 9=1
|
||||
ConvolutionDepthWise conv7/dw 1 1 conv6_conv6/relu conv7/dw_conv7/dw/relu -23330=4,3,26,26,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv7 1 1 conv7/dw_conv7/dw/relu conv7_conv7/relu -23330=4,3,26,26,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv8/dw 1 1 conv7_conv7/relu conv8/dw_conv8/dw/relu -23330=4,3,26,26,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv8 1 1 conv8/dw_conv8/dw/relu conv8_conv8/relu -23330=4,3,26,26,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv9/dw 1 1 conv8_conv8/relu conv9/dw_conv9/dw/relu -23330=4,3,26,26,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv9 1 1 conv9/dw_conv9/dw/relu conv9_conv9/relu -23330=4,3,26,26,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv10/dw 1 1 conv9_conv9/relu conv10/dw_conv10/dw/relu -23330=4,3,26,26,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv10 1 1 conv10/dw_conv10/dw/relu conv10_conv10/relu -23330=4,3,26,26,512 0=512 1=1 5=1 6=262144 9=1
|
||||
ConvolutionDepthWise conv11/dw 1 1 conv10_conv10/relu conv11/dw_conv11/dw/relu -23330=4,3,26,26,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv11 1 1 conv11/dw_conv11/dw/relu conv11_conv11/relu -23330=4,3,26,26,512 0=512 1=1 5=1 6=262144 9=1
|
||||
Split splitncnn_0 1 2 conv11_conv11/relu conv11_conv11/relu_splitncnn_0 conv11_conv11/relu_splitncnn_1 -23330=8,3,26,26,512,3,26,26,512
|
||||
ConvolutionDepthWise conv12/dw 1 1 conv11_conv11/relu_splitncnn_1 conv12/dw_conv12/dw/relu -23330=4,3,13,13,512 0=512 1=3 3=2 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv12 1 1 conv12/dw_conv12/dw/relu conv12_conv12/relu -23330=4,3,13,13,1024 0=1024 1=1 5=1 6=524288 9=1
|
||||
ConvolutionDepthWise conv13/dw 1 1 conv12_conv12/relu conv13/dw_conv13/dw/relu -23330=4,3,13,13,1024 0=1024 1=3 4=1 5=1 6=9216 7=1024 9=1
|
||||
Convolution conv13 1 1 conv13/dw_conv13/dw/relu conv13_conv13/relu -23330=4,3,13,13,1024 0=1024 1=1 5=1 6=1048576 9=1
|
||||
ConvolutionDepthWise conv16/dw 1 1 conv13_conv13/relu conv16/dw_conv16/dw/relu -23330=4,3,13,13,1024 0=1024 1=3 4=1 5=1 6=9216 7=1024 9=1
|
||||
Convolution conv17 1 1 conv16/dw_conv16/dw/relu conv17_conv17/relu -23330=4,3,13,13,1024 0=1024 1=1 5=1 6=1048576 9=1
|
||||
Split splitncnn_1 1 2 conv17_conv17/relu conv17_conv17/relu_splitncnn_0 conv17_conv17/relu_splitncnn_1 -23330=8,3,13,13,1024,3,13,13,1024
|
||||
DeconvolutionDepthWise upsample 1 1 conv17_conv17/relu_splitncnn_1 upsample -23330=4,3,26,26,512 0=512 1=4 3=2 4=1 6=16384 7=512
|
||||
Eltwise conv_18/sum 2 1 conv11_conv11/relu_splitncnn_0 upsample conv_18/sum -23330=4,3,26,26,512 0=1
|
||||
ConvolutionDepthWise conv19/dw 1 1 conv_18/sum conv19/dw_conv19/dw/relu -23330=4,3,26,26,512 0=512 1=3 4=1 5=1 6=4608 7=512 9=1
|
||||
Convolution conv20 1 1 conv19/dw_conv19/dw/relu conv20_conv20/relu -23330=4,3,26,26,1024 0=1024 1=1 5=1 6=524288 9=1
|
||||
Convolution conv22_indoor 1 1 conv17_conv17/relu_splitncnn_0 conv22 -23330=4,3,13,13,125 0=125 1=1 5=1 6=128000
|
||||
Convolution conv23_indoor 1 1 conv20_conv20/relu conv23 -23330=4,3,26,26,125 0=125 1=1 5=1 6=128000
|
||||
YoloDetectionOutput detection_out 2 1 conv22 conv23 output -23330=4,3,13,13,125 2=4.000000e-01 -23304=10,1.080000e+00,1.190000e+00,3.420000e+00,4.410000e+00,6.630000e+00,1.138000e+01,9.420000e+00,5.110000e+00,1.662000e+01,1.052000e+01
|
89
3rdparty/ncnn/benchmark/mobilenetv2_yolov3.param
vendored
Normal file
89
3rdparty/ncnn/benchmark/mobilenetv2_yolov3.param
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
7767517
|
||||
87 99
|
||||
Input data 0 1 data -23330=4,3,352,352,3 0=352 1=352 2=3
|
||||
Convolution conv1 1 1 data conv1_relu1 -23330=4,3,176,176,32 0=32 1=3 3=2 4=1 5=1 6=864 9=1
|
||||
ConvolutionDepthWise conv2 1 1 conv1_relu1 conv2_relu2 -23330=4,3,176,176,32 0=32 1=3 4=1 5=1 6=288 7=32 9=1
|
||||
Convolution conv3 1 1 conv2_relu2 conv3 -23330=4,3,176,176,16 0=16 1=1 5=1 6=512
|
||||
Convolution conv4 1 1 conv3 conv4_relu3 -23330=4,3,176,176,96 0=96 1=1 5=1 6=1536 9=1
|
||||
ConvolutionDepthWise conv5 1 1 conv4_relu3 conv5_relu4 -23330=4,3,88,88,96 0=96 1=3 3=2 4=1 5=1 6=864 7=96 9=1
|
||||
Convolution conv6 1 1 conv5_relu4 conv6 -23330=4,3,88,88,24 0=24 1=1 5=1 6=2304
|
||||
Split splitncnn_0 1 2 conv6 conv6_splitncnn_0 conv6_splitncnn_1 -23330=8,3,88,88,24,3,88,88,24
|
||||
Convolution conv7 1 1 conv6_splitncnn_1 conv7_relu5 -23330=4,3,88,88,144 0=144 1=1 5=1 6=3456 9=1
|
||||
ConvolutionDepthWise conv8 1 1 conv7_relu5 conv8_relu6 -23330=4,3,88,88,144 0=144 1=3 4=1 5=1 6=1296 7=144 9=1
|
||||
Convolution conv9 1 1 conv8_relu6 conv9 -23330=4,3,88,88,24 0=24 1=1 5=1 6=3456
|
||||
Eltwise add1 2 1 conv6_splitncnn_0 conv9 add1 -23330=4,3,88,88,24 0=1
|
||||
Convolution conv10 1 1 add1 conv10_relu7 -23330=4,3,88,88,144 0=144 1=1 5=1 6=3456 9=1
|
||||
ConvolutionDepthWise conv11 1 1 conv10_relu7 conv11_relu8 -23330=4,3,44,44,144 0=144 1=3 3=2 4=1 5=1 6=1296 7=144 9=1
|
||||
Convolution conv12 1 1 conv11_relu8 conv12 -23330=4,3,44,44,32 0=32 1=1 5=1 6=4608
|
||||
Split splitncnn_1 1 2 conv12 conv12_splitncnn_0 conv12_splitncnn_1 -23330=8,3,44,44,32,3,44,44,32
|
||||
Convolution conv13 1 1 conv12_splitncnn_1 conv13_relu9 -23330=4,3,44,44,192 0=192 1=1 5=1 6=6144 9=1
|
||||
ConvolutionDepthWise conv14 1 1 conv13_relu9 conv14_relu10 -23330=4,3,44,44,192 0=192 1=3 4=1 5=1 6=1728 7=192 9=1
|
||||
Convolution conv15 1 1 conv14_relu10 conv15 -23330=4,3,44,44,32 0=32 1=1 5=1 6=6144
|
||||
Eltwise add2 2 1 conv12_splitncnn_0 conv15 add2 -23330=4,3,44,44,32 0=1
|
||||
Split splitncnn_2 1 2 add2 add2_splitncnn_0 add2_splitncnn_1 -23330=8,3,44,44,32,3,44,44,32
|
||||
Convolution conv16 1 1 add2_splitncnn_1 conv16_relu11 -23330=4,3,44,44,192 0=192 1=1 5=1 6=6144 9=1
|
||||
ConvolutionDepthWise conv17 1 1 conv16_relu11 conv17_relu12 -23330=4,3,44,44,192 0=192 1=3 4=1 5=1 6=1728 7=192 9=1
|
||||
Convolution conv18 1 1 conv17_relu12 conv18 -23330=4,3,44,44,32 0=32 1=1 5=1 6=6144
|
||||
Eltwise add3 2 1 add2_splitncnn_0 conv18 add3 -23330=4,3,44,44,32 0=1
|
||||
Convolution conv19 1 1 add3 conv19_relu13 -23330=4,3,44,44,192 0=192 1=1 5=1 6=6144 9=1
|
||||
ConvolutionDepthWise conv20 1 1 conv19_relu13 conv20_relu14 -23330=4,3,22,22,192 0=192 1=3 3=2 4=1 5=1 6=1728 7=192 9=1
|
||||
Convolution conv21 1 1 conv20_relu14 conv21 -23330=4,3,22,22,64 0=64 1=1 5=1 6=12288
|
||||
Split splitncnn_3 1 2 conv21 conv21_splitncnn_0 conv21_splitncnn_1 -23330=8,3,22,22,64,3,22,22,64
|
||||
Convolution conv22 1 1 conv21_splitncnn_1 conv22_relu15 -23330=4,3,22,22,384 0=384 1=1 5=1 6=24576 9=1
|
||||
ConvolutionDepthWise conv23 1 1 conv22_relu15 conv23_relu16 -23330=4,3,22,22,384 0=384 1=3 4=1 5=1 6=3456 7=384 9=1
|
||||
Convolution conv24 1 1 conv23_relu16 conv24 -23330=4,3,22,22,64 0=64 1=1 5=1 6=24576
|
||||
Eltwise add4 2 1 conv21_splitncnn_0 conv24 add4 -23330=4,3,22,22,64 0=1
|
||||
Split splitncnn_4 1 2 add4 add4_splitncnn_0 add4_splitncnn_1 -23330=8,3,22,22,64,3,22,22,64
|
||||
Convolution conv25 1 1 add4_splitncnn_1 conv25_relu17 -23330=4,3,22,22,384 0=384 1=1 5=1 6=24576 9=1
|
||||
ConvolutionDepthWise conv26 1 1 conv25_relu17 conv26_relu18 -23330=4,3,22,22,384 0=384 1=3 4=1 5=1 6=3456 7=384 9=1
|
||||
Convolution conv27 1 1 conv26_relu18 conv27 -23330=4,3,22,22,64 0=64 1=1 5=1 6=24576
|
||||
Eltwise add5 2 1 add4_splitncnn_0 conv27 add5 -23330=4,3,22,22,64 0=1
|
||||
Split splitncnn_5 1 2 add5 add5_splitncnn_0 add5_splitncnn_1 -23330=8,3,22,22,64,3,22,22,64
|
||||
Convolution conv28 1 1 add5_splitncnn_1 conv28_relu19 -23330=4,3,22,22,384 0=384 1=1 5=1 6=24576 9=1
|
||||
ConvolutionDepthWise conv29 1 1 conv28_relu19 conv29_relu20 -23330=4,3,22,22,384 0=384 1=3 4=1 5=1 6=3456 7=384 9=1
|
||||
Convolution conv30 1 1 conv29_relu20 conv30 -23330=4,3,22,22,64 0=64 1=1 5=1 6=24576
|
||||
Eltwise add6 2 1 add5_splitncnn_0 conv30 add6 -23330=4,3,22,22,64 0=1
|
||||
Convolution conv31 1 1 add6 conv31_relu21 -23330=4,3,22,22,384 0=384 1=1 5=1 6=24576 9=1
|
||||
ConvolutionDepthWise conv32 1 1 conv31_relu21 conv32_relu22 -23330=4,3,22,22,384 0=384 1=3 4=1 5=1 6=3456 7=384 9=1
|
||||
Convolution conv33 1 1 conv32_relu22 conv33 -23330=4,3,22,22,96 0=96 1=1 5=1 6=36864
|
||||
Split splitncnn_6 1 2 conv33 conv33_splitncnn_0 conv33_splitncnn_1 -23330=8,3,22,22,96,3,22,22,96
|
||||
Convolution conv34 1 1 conv33_splitncnn_1 conv34_relu23 -23330=4,3,22,22,576 0=576 1=1 5=1 6=55296 9=1
|
||||
ConvolutionDepthWise conv35 1 1 conv34_relu23 conv35_relu24 -23330=4,3,22,22,576 0=576 1=3 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution conv36 1 1 conv35_relu24 conv36 -23330=4,3,22,22,96 0=96 1=1 5=1 6=55296
|
||||
Eltwise add7 2 1 conv33_splitncnn_0 conv36 add7 -23330=4,3,22,22,96 0=1
|
||||
Split splitncnn_7 1 2 add7 add7_splitncnn_0 add7_splitncnn_1 -23330=8,3,22,22,96,3,22,22,96
|
||||
Convolution conv37 1 1 add7_splitncnn_1 conv37_relu25 -23330=4,3,22,22,576 0=576 1=1 5=1 6=55296 9=1
|
||||
ConvolutionDepthWise conv38 1 1 conv37_relu25 conv38_relu26 -23330=4,3,22,22,576 0=576 1=3 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution conv39 1 1 conv38_relu26 conv39 -23330=4,3,22,22,96 0=96 1=1 5=1 6=55296
|
||||
Eltwise add8 2 1 add7_splitncnn_0 conv39 add8 -23330=4,3,22,22,96 0=1
|
||||
Convolution conv40 1 1 add8 conv40_relu27 -23330=4,3,22,22,576 0=576 1=1 5=1 6=55296 9=1
|
||||
Split splitncnn_8 1 2 conv40_relu27 conv40_relu27_splitncnn_0 conv40_relu27_splitncnn_1 -23330=8,3,22,22,576,3,22,22,576
|
||||
ConvolutionDepthWise conv41 1 1 conv40_relu27_splitncnn_1 conv41_relu28 -23330=4,3,11,11,576 0=576 1=3 3=2 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution conv42 1 1 conv41_relu28 conv42 -23330=4,3,11,11,160 0=160 1=1 5=1 6=92160
|
||||
Split splitncnn_9 1 2 conv42 conv42_splitncnn_0 conv42_splitncnn_1 -23330=8,3,11,11,160,3,11,11,160
|
||||
Convolution conv43 1 1 conv42_splitncnn_1 conv43_relu29 -23330=4,3,11,11,960 0=960 1=1 5=1 6=153600 9=1
|
||||
ConvolutionDepthWise conv44 1 1 conv43_relu29 conv44_relu30 -23330=4,3,11,11,960 0=960 1=3 4=1 5=1 6=8640 7=960 9=1
|
||||
Convolution conv45 1 1 conv44_relu30 conv45 -23330=4,3,11,11,160 0=160 1=1 5=1 6=153600
|
||||
Eltwise add9 2 1 conv42_splitncnn_0 conv45 add9 -23330=4,3,11,11,160 0=1
|
||||
Split splitncnn_10 1 2 add9 add9_splitncnn_0 add9_splitncnn_1 -23330=8,3,11,11,160,3,11,11,160
|
||||
Convolution conv46 1 1 add9_splitncnn_1 conv46_relu31 -23330=4,3,11,11,960 0=960 1=1 5=1 6=153600 9=1
|
||||
ConvolutionDepthWise conv47 1 1 conv46_relu31 conv47_relu32 -23330=4,3,11,11,960 0=960 1=3 4=1 5=1 6=8640 7=960 9=1
|
||||
Convolution conv48 1 1 conv47_relu32 conv48 -23330=4,3,11,11,160 0=160 1=1 5=1 6=153600
|
||||
Eltwise add10 2 1 add9_splitncnn_0 conv48 add10 -23330=4,3,11,11,160 0=1
|
||||
Convolution conv49 1 1 add10 conv49_relu33 -23330=4,3,11,11,960 0=960 1=1 5=1 6=153600 9=1
|
||||
ConvolutionDepthWise conv50 1 1 conv49_relu33 conv50_relu34 -23330=4,3,11,11,960 0=960 1=3 4=1 5=1 6=8640 7=960 9=1
|
||||
Convolution conv51 1 1 conv50_relu34 conv51 -23330=4,3,11,11,320 0=320 1=1 5=1 6=307200
|
||||
Convolution conv52 1 1 conv51 conv52_relu35 -23330=4,3,11,11,1280 0=1280 1=1 5=1 6=409600 9=1
|
||||
ConvolutionDepthWise yolo/conv1/dw 1 1 conv52_relu35 yolo/conv1/dw_yolo/conv1/dw/relu -23330=4,3,11,11,1280 0=1280 1=3 4=1 5=1 6=11520 7=1280 9=1
|
||||
Convolution yolo/conv1 1 1 yolo/conv1/dw_yolo/conv1/dw/relu yolo/conv1_yolo/conv1/relu -23330=4,3,11,11,576 0=576 1=1 5=1 6=737280 9=1
|
||||
Split splitncnn_11 1 2 yolo/conv1_yolo/conv1/relu yolo/conv1_yolo/conv1/relu_splitncnn_0 yolo/conv1_yolo/conv1/relu_splitncnn_1 -23330=8,3,11,11,576,3,11,11,576
|
||||
DeconvolutionDepthWise upsample 1 1 yolo/conv1_yolo/conv1/relu_splitncnn_1 upsample -23330=4,3,21,21,576 0=576 1=1 3=2 6=576 7=576
|
||||
Pooling maxpool 1 1 upsample maxpool -23330=4,3,22,22,576 1=2 3=1
|
||||
ConvolutionDepthWise yolo/conv2/dw 1 1 conv40_relu27_splitncnn_0 yolo/conv2/dw_yolo/conv2/dw/relu -23330=4,3,22,22,576 0=576 1=3 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution yolo/conv2 1 1 yolo/conv2/dw_yolo/conv2/dw/relu yolo/conv2_yolo/conv2/relu -23330=4,3,22,22,576 0=576 1=1 5=1 6=331776 9=1
|
||||
Eltwise yolo/conv2/sum 2 1 maxpool yolo/conv2_yolo/conv2/relu yolo/conv2/sum -23330=4,3,22,22,576 0=1
|
||||
ConvolutionDepthWise yolo/conv3/dw 1 1 yolo/conv2/sum yolo/conv3/dw_yolo/conv3/dw/relu -23330=4,3,22,22,576 0=576 1=3 4=1 5=1 6=5184 7=576 9=1
|
||||
Convolution yolo/conv3 1 1 yolo/conv3/dw_yolo/conv3/dw/relu yolo/conv3_yolo/conv3/relu -23330=4,3,22,22,576 0=576 1=1 5=1 6=331776 9=1
|
||||
Convolution yolo/conv4 1 1 yolo/conv1_yolo/conv1/relu_splitncnn_0 yolo/conv4 -23330=4,3,11,11,75 0=75 1=1 5=1 6=43200
|
||||
Convolution yolo/conv5 1 1 yolo/conv3_yolo/conv3/relu yolo/conv5 -23330=4,3,22,22,75 0=75 1=1 5=1 6=43200
|
||||
Yolov3DetectionOutput detection_out 2 1 yolo/conv4 yolo/conv5 output 1=3 2=3.000000e-01 -23304=12,2.000000e+01,3.700000e+01,4.900000e+01,9.400000e+01,7.300000e+01,2.010000e+02,1.430000e+02,2.650000e+02,1.530000e+02,1.210000e+02,2.800000e+02,2.790000e+02 -23305=6,1077936128,1082130432,1084227584,0,1065353216,1073741824 -23306=2,3.200000e+01,1.600000e+01
|
182
3rdparty/ncnn/benchmark/nanodet_m.param
vendored
Normal file
182
3rdparty/ncnn/benchmark/nanodet_m.param
vendored
Normal file
@ -0,0 +1,182 @@
|
||||
7767517
|
||||
179 204
|
||||
Input input.1 0 1 input.1 -23330=4,3,320,320,3 0=320 1=320 2=3
|
||||
Convolution Conv_0 1 1 input.1 424 -23330=4,3,160,160,24 0=24 1=3 3=2 4=1 5=1 6=648 9=2 -23310=1,1.000000e-01
|
||||
Pooling MaxPool_2 1 1 424 425 -23330=4,3,80,80,24 1=3 2=2 3=1 5=1
|
||||
Split splitncnn_0 1 2 425 425_splitncnn_0 425_splitncnn_1 -23330=8,3,80,80,24,3,80,80,24
|
||||
ConvolutionDepthWise Conv_3 1 1 425_splitncnn_1 943 -23330=4,3,40,40,24 0=24 1=3 3=2 4=1 5=1 6=216 7=24
|
||||
Convolution Conv_4 1 1 943 430 -23330=4,3,40,40,58 0=58 1=1 5=1 6=1392 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_6 1 1 425_splitncnn_0 433 -23330=4,3,80,80,58 0=58 1=1 5=1 6=1392 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_8 1 1 433 952 -23330=4,3,40,40,58 0=58 1=3 3=2 4=1 5=1 6=522 7=58
|
||||
Convolution Conv_9 1 1 952 438 -23330=4,3,40,40,58 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_11 2 1 430 438 439 -23330=4,3,40,40,116
|
||||
ShuffleChannel Reshape_16 1 1 439 444 -23330=4,3,40,40,116 0=2
|
||||
Split splitncnn_1 1 2 444 444_splitncnn_0 444_splitncnn_1 -23330=8,3,40,40,116,3,40,40,116
|
||||
Crop Slice_27 1 1 444_splitncnn_1 455 -23330=4,3,40,40,58 -23309=1,0 -23310=1,58 -23311=1,0
|
||||
Crop Slice_30 1 1 444_splitncnn_0 458 -23330=4,3,40,40,58 -23309=1,58 -23310=1,116 -23311=1,0
|
||||
Convolution Conv_31 1 1 458 461 -23330=4,3,40,40,58 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_33 1 1 461 961 -23330=4,3,40,40,58 0=58 1=3 4=1 5=1 6=522 7=58
|
||||
Convolution Conv_34 1 1 961 466 -23330=4,3,40,40,58 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_36 2 1 455 466 467 -23330=4,3,40,40,116
|
||||
ShuffleChannel Reshape_41 1 1 467 472 -23330=4,3,40,40,116 0=2
|
||||
Split splitncnn_2 1 2 472 472_splitncnn_0 472_splitncnn_1 -23330=8,3,40,40,116,3,40,40,116
|
||||
Crop Slice_52 1 1 472_splitncnn_1 483 -23330=4,3,40,40,58 -23309=1,0 -23310=1,58 -23311=1,0
|
||||
Crop Slice_55 1 1 472_splitncnn_0 486 -23330=4,3,40,40,58 -23309=1,58 -23310=1,116 -23311=1,0
|
||||
Convolution Conv_56 1 1 486 489 -23330=4,3,40,40,58 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_58 1 1 489 970 -23330=4,3,40,40,58 0=58 1=3 4=1 5=1 6=522 7=58
|
||||
Convolution Conv_59 1 1 970 494 -23330=4,3,40,40,58 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_61 2 1 483 494 495 -23330=4,3,40,40,116
|
||||
ShuffleChannel Reshape_66 1 1 495 500 -23330=4,3,40,40,116 0=2
|
||||
Split splitncnn_3 1 2 500 500_splitncnn_0 500_splitncnn_1 -23330=8,3,40,40,116,3,40,40,116
|
||||
Crop Slice_77 1 1 500_splitncnn_1 511 -23330=4,3,40,40,58 -23309=1,0 -23310=1,58 -23311=1,0
|
||||
Crop Slice_80 1 1 500_splitncnn_0 514 -23330=4,3,40,40,58 -23309=1,58 -23310=1,116 -23311=1,0
|
||||
Convolution Conv_81 1 1 514 517 -23330=4,3,40,40,58 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_83 1 1 517 979 -23330=4,3,40,40,58 0=58 1=3 4=1 5=1 6=522 7=58
|
||||
Convolution Conv_84 1 1 979 522 -23330=4,3,40,40,58 0=58 1=1 5=1 6=3364 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_86 2 1 511 522 523 -23330=4,3,40,40,116
|
||||
ShuffleChannel Reshape_91 1 1 523 528 -23330=4,3,40,40,116 0=2
|
||||
Split splitncnn_4 1 3 528 528_splitncnn_0 528_splitncnn_1 528_splitncnn_2 -23330=12,3,40,40,116,3,40,40,116,3,40,40,116
|
||||
ConvolutionDepthWise Conv_92 1 1 528_splitncnn_2 985 -23330=4,3,20,20,116 0=116 1=3 3=2 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_93 1 1 985 533 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_95 1 1 528_splitncnn_1 536 -23330=4,3,40,40,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_97 1 1 536 994 -23330=4,3,20,20,116 0=116 1=3 3=2 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_98 1 1 994 541 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_100 2 1 533 541 542 -23330=4,3,20,20,232
|
||||
ShuffleChannel Reshape_105 1 1 542 547 -23330=4,3,20,20,232 0=2
|
||||
Split splitncnn_5 1 2 547 547_splitncnn_0 547_splitncnn_1 -23330=8,3,20,20,232,3,20,20,232
|
||||
Crop Slice_116 1 1 547_splitncnn_1 558 -23330=4,3,20,20,116 -23309=1,0 -23310=1,116 -23311=1,0
|
||||
Crop Slice_119 1 1 547_splitncnn_0 561 -23330=4,3,20,20,116 -23309=1,116 -23310=1,232 -23311=1,0
|
||||
Convolution Conv_120 1 1 561 564 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_122 1 1 564 1003 -23330=4,3,20,20,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_123 1 1 1003 569 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_125 2 1 558 569 570 -23330=4,3,20,20,232
|
||||
ShuffleChannel Reshape_130 1 1 570 575 -23330=4,3,20,20,232 0=2
|
||||
Split splitncnn_6 1 2 575 575_splitncnn_0 575_splitncnn_1 -23330=8,3,20,20,232,3,20,20,232
|
||||
Crop Slice_141 1 1 575_splitncnn_1 586 -23330=4,3,20,20,116 -23309=1,0 -23310=1,116 -23311=1,0
|
||||
Crop Slice_144 1 1 575_splitncnn_0 589 -23330=4,3,20,20,116 -23309=1,116 -23310=1,232 -23311=1,0
|
||||
Convolution Conv_145 1 1 589 592 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_147 1 1 592 1012 -23330=4,3,20,20,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_148 1 1 1012 597 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_150 2 1 586 597 598 -23330=4,3,20,20,232
|
||||
ShuffleChannel Reshape_155 1 1 598 603 -23330=4,3,20,20,232 0=2
|
||||
Split splitncnn_7 1 2 603 603_splitncnn_0 603_splitncnn_1 -23330=8,3,20,20,232,3,20,20,232
|
||||
Crop Slice_166 1 1 603_splitncnn_1 614 -23330=4,3,20,20,116 -23309=1,0 -23310=1,116 -23311=1,0
|
||||
Crop Slice_169 1 1 603_splitncnn_0 617 -23330=4,3,20,20,116 -23309=1,116 -23310=1,232 -23311=1,0
|
||||
Convolution Conv_170 1 1 617 620 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_172 1 1 620 1021 -23330=4,3,20,20,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_173 1 1 1021 625 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_175 2 1 614 625 626 -23330=4,3,20,20,232
|
||||
ShuffleChannel Reshape_180 1 1 626 631 -23330=4,3,20,20,232 0=2
|
||||
Split splitncnn_8 1 2 631 631_splitncnn_0 631_splitncnn_1 -23330=8,3,20,20,232,3,20,20,232
|
||||
Crop Slice_191 1 1 631_splitncnn_1 642 -23330=4,3,20,20,116 -23309=1,0 -23310=1,116 -23311=1,0
|
||||
Crop Slice_194 1 1 631_splitncnn_0 645 -23330=4,3,20,20,116 -23309=1,116 -23310=1,232 -23311=1,0
|
||||
Convolution Conv_195 1 1 645 648 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_197 1 1 648 1030 -23330=4,3,20,20,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_198 1 1 1030 653 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_200 2 1 642 653 654 -23330=4,3,20,20,232
|
||||
ShuffleChannel Reshape_205 1 1 654 659 -23330=4,3,20,20,232 0=2
|
||||
Split splitncnn_9 1 2 659 659_splitncnn_0 659_splitncnn_1 -23330=8,3,20,20,232,3,20,20,232
|
||||
Crop Slice_216 1 1 659_splitncnn_1 670 -23330=4,3,20,20,116 -23309=1,0 -23310=1,116 -23311=1,0
|
||||
Crop Slice_219 1 1 659_splitncnn_0 673 -23330=4,3,20,20,116 -23309=1,116 -23310=1,232 -23311=1,0
|
||||
Convolution Conv_220 1 1 673 676 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_222 1 1 676 1039 -23330=4,3,20,20,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_223 1 1 1039 681 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_225 2 1 670 681 682 -23330=4,3,20,20,232
|
||||
ShuffleChannel Reshape_230 1 1 682 687 -23330=4,3,20,20,232 0=2
|
||||
Split splitncnn_10 1 2 687 687_splitncnn_0 687_splitncnn_1 -23330=8,3,20,20,232,3,20,20,232
|
||||
Crop Slice_241 1 1 687_splitncnn_1 698 -23330=4,3,20,20,116 -23309=1,0 -23310=1,116 -23311=1,0
|
||||
Crop Slice_244 1 1 687_splitncnn_0 701 -23330=4,3,20,20,116 -23309=1,116 -23310=1,232 -23311=1,0
|
||||
Convolution Conv_245 1 1 701 704 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_247 1 1 704 1048 -23330=4,3,20,20,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_248 1 1 1048 709 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_250 2 1 698 709 710 -23330=4,3,20,20,232
|
||||
ShuffleChannel Reshape_255 1 1 710 715 -23330=4,3,20,20,232 0=2
|
||||
Split splitncnn_11 1 2 715 715_splitncnn_0 715_splitncnn_1 -23330=8,3,20,20,232,3,20,20,232
|
||||
Crop Slice_266 1 1 715_splitncnn_1 726 -23330=4,3,20,20,116 -23309=1,0 -23310=1,116 -23311=1,0
|
||||
Crop Slice_269 1 1 715_splitncnn_0 729 -23330=4,3,20,20,116 -23309=1,116 -23310=1,232 -23311=1,0
|
||||
Convolution Conv_270 1 1 729 732 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_272 1 1 732 1057 -23330=4,3,20,20,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution Conv_273 1 1 1057 737 -23330=4,3,20,20,116 0=116 1=1 5=1 6=13456 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_275 2 1 726 737 738 -23330=4,3,20,20,232
|
||||
ShuffleChannel Reshape_280 1 1 738 743 -23330=4,3,20,20,232 0=2
|
||||
Split splitncnn_12 1 3 743 743_splitncnn_0 743_splitncnn_1 743_splitncnn_2 -23330=12,3,20,20,232,3,20,20,232,3,20,20,232
|
||||
ConvolutionDepthWise Conv_281 1 1 743_splitncnn_2 1063 -23330=4,3,10,10,232 0=232 1=3 3=2 4=1 5=1 6=2088 7=232
|
||||
Convolution Conv_282 1 1 1063 748 -23330=4,3,10,10,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_284 1 1 743_splitncnn_1 751 -23330=4,3,20,20,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_286 1 1 751 1072 -23330=4,3,10,10,232 0=232 1=3 3=2 4=1 5=1 6=2088 7=232
|
||||
Convolution Conv_287 1 1 1072 756 -23330=4,3,10,10,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_289 2 1 748 756 757 -23330=4,3,10,10,464
|
||||
ShuffleChannel Reshape_294 1 1 757 762 -23330=4,3,10,10,464 0=2
|
||||
Split splitncnn_13 1 2 762 762_splitncnn_0 762_splitncnn_1 -23330=8,3,10,10,464,3,10,10,464
|
||||
Crop Slice_305 1 1 762_splitncnn_1 773 -23330=4,3,10,10,232 -23309=1,0 -23310=1,232 -23311=1,0
|
||||
Crop Slice_308 1 1 762_splitncnn_0 776 -23330=4,3,10,10,232 -23309=1,232 -23310=1,464 -23311=1,0
|
||||
Convolution Conv_309 1 1 776 779 -23330=4,3,10,10,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_311 1 1 779 1081 -23330=4,3,10,10,232 0=232 1=3 4=1 5=1 6=2088 7=232
|
||||
Convolution Conv_312 1 1 1081 784 -23330=4,3,10,10,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_314 2 1 773 784 785 -23330=4,3,10,10,464
|
||||
ShuffleChannel Reshape_319 1 1 785 790 -23330=4,3,10,10,464 0=2
|
||||
Split splitncnn_14 1 2 790 790_splitncnn_0 790_splitncnn_1 -23330=8,3,10,10,464,3,10,10,464
|
||||
Crop Slice_330 1 1 790_splitncnn_1 801 -23330=4,3,10,10,232 -23309=1,0 -23310=1,232 -23311=1,0
|
||||
Crop Slice_333 1 1 790_splitncnn_0 804 -23330=4,3,10,10,232 -23309=1,232 -23310=1,464 -23311=1,0
|
||||
Convolution Conv_334 1 1 804 807 -23330=4,3,10,10,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_336 1 1 807 1090 -23330=4,3,10,10,232 0=232 1=3 4=1 5=1 6=2088 7=232
|
||||
Convolution Conv_337 1 1 1090 812 -23330=4,3,10,10,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_339 2 1 801 812 813 -23330=4,3,10,10,464
|
||||
ShuffleChannel Reshape_344 1 1 813 818 -23330=4,3,10,10,464 0=2
|
||||
Split splitncnn_15 1 2 818 818_splitncnn_0 818_splitncnn_1 -23330=8,3,10,10,464,3,10,10,464
|
||||
Crop Slice_355 1 1 818_splitncnn_1 829 -23330=4,3,10,10,232 -23309=1,0 -23310=1,232 -23311=1,0
|
||||
Crop Slice_358 1 1 818_splitncnn_0 832 -23330=4,3,10,10,232 -23309=1,232 -23310=1,464 -23311=1,0
|
||||
Convolution Conv_359 1 1 832 835 -23330=4,3,10,10,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_361 1 1 835 1099 -23330=4,3,10,10,232 0=232 1=3 4=1 5=1 6=2088 7=232
|
||||
Convolution Conv_362 1 1 1099 840 -23330=4,3,10,10,232 0=232 1=1 5=1 6=53824 9=2 -23310=1,1.000000e-01
|
||||
Concat Concat_364 2 1 829 840 841 -23330=4,3,10,10,464
|
||||
ShuffleChannel Reshape_369 1 1 841 846 -23330=4,3,10,10,464 0=2
|
||||
Convolution Conv_370 1 1 528_splitncnn_0 847 -23330=4,3,40,40,96 0=96 1=1 5=1 6=11136
|
||||
Convolution Conv_371 1 1 743_splitncnn_0 848 -23330=4,3,20,20,96 0=96 1=1 5=1 6=22272
|
||||
Convolution Conv_372 1 1 846 849 -23330=4,3,10,10,96 0=96 1=1 5=1 6=44544
|
||||
Split splitncnn_16 1 2 849 849_splitncnn_0 849_splitncnn_1 -23330=8,3,10,10,96,3,10,10,96
|
||||
Interp Resize_374 1 1 849_splitncnn_1 854 -23330=4,3,20,20,96 0=2 1=2.000000e+00 2=2.000000e+00
|
||||
BinaryOp Add_375 2 1 848 854 855 -23330=4,3,20,20,96
|
||||
Split splitncnn_17 1 2 855 855_splitncnn_0 855_splitncnn_1 -23330=8,3,20,20,96,3,20,20,96
|
||||
Interp Resize_377 1 1 855_splitncnn_1 860 -23330=4,3,40,40,96 0=2 1=2.000000e+00 2=2.000000e+00
|
||||
BinaryOp Add_378 2 1 847 860 861 -23330=4,3,40,40,96
|
||||
Split splitncnn_18 1 2 861 861_splitncnn_0 861_splitncnn_1 -23330=8,3,40,40,96,3,40,40,96
|
||||
Interp Resize_380 1 1 861_splitncnn_1 866 -23330=4,3,20,20,96 0=2 1=5.000000e-01 2=5.000000e-01
|
||||
BinaryOp Add_381 2 1 855_splitncnn_0 866 867 -23330=4,3,20,20,96
|
||||
Split splitncnn_19 1 2 867 867_splitncnn_0 867_splitncnn_1 -23330=8,3,20,20,96,3,20,20,96
|
||||
Interp Resize_383 1 1 867_splitncnn_1 872 -23330=4,3,10,10,96 0=2 1=5.000000e-01 2=5.000000e-01
|
||||
BinaryOp Add_384 2 1 849_splitncnn_0 872 873 -23330=4,3,10,10,96
|
||||
ConvolutionDepthWise Conv_385 1 1 861_splitncnn_0 876 -23330=4,3,40,40,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_387 1 1 876 879 -23330=4,3,40,40,96 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_389 1 1 879 882 -23330=4,3,40,40,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_391 1 1 882 885 -23330=4,3,40,40,96 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_393 1 1 885 886 -23330=4,3,40,40,112 0=112 1=1 5=1 6=10752
|
||||
Slice Split_394 1 2 886 887 888 -23330=8,3,40,40,80,3,40,40,32 -23300=2,80,-233
|
||||
Sigmoid Sigmoid_395 1 1 887 889 -23330=4,3,40,40,80
|
||||
Reshape Reshape_397 1 1 889 891 -23330=4,2,1600,80,1 0=-1 1=80
|
||||
Permute Transpose_398 1 1 891 cls_pred_stride_8 -23330=4,2,80,1600,1 0=1
|
||||
Reshape Reshape_400 1 1 888 894 -23330=4,2,1600,32,1 0=-1 1=32
|
||||
Permute Transpose_401 1 1 894 dis_pred_stride_8 -23330=4,2,32,1600,1 0=1
|
||||
ConvolutionDepthWise Conv_402 1 1 867_splitncnn_0 898 -23330=4,3,20,20,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_404 1 1 898 901 -23330=4,3,20,20,96 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_406 1 1 901 904 -23330=4,3,20,20,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_408 1 1 904 907 -23330=4,3,20,20,96 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_410 1 1 907 908 -23330=4,3,20,20,112 0=112 1=1 5=1 6=10752
|
||||
Slice Split_411 1 2 908 909 910 -23330=8,3,20,20,80,3,20,20,32 -23300=2,80,-233
|
||||
Sigmoid Sigmoid_412 1 1 909 911 -23330=4,3,20,20,80
|
||||
Reshape Reshape_414 1 1 911 913 -23330=4,2,400,80,1 0=-1 1=80
|
||||
Permute Transpose_415 1 1 913 cls_pred_stride_16 -23330=4,2,80,400,1 0=1
|
||||
Reshape Reshape_417 1 1 910 916 -23330=4,2,400,32,1 0=-1 1=32
|
||||
Permute Transpose_418 1 1 916 dis_pred_stride_16 -23330=4,2,32,400,1 0=1
|
||||
ConvolutionDepthWise Conv_419 1 1 873 920 -23330=4,3,10,10,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_421 1 1 920 923 -23330=4,3,10,10,96 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise Conv_423 1 1 923 926 -23330=4,3,10,10,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_425 1 1 926 929 -23330=4,3,10,10,96 0=96 1=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
|
||||
Convolution Conv_427 1 1 929 930 -23330=4,3,10,10,112 0=112 1=1 5=1 6=10752
|
||||
Slice Split_428 1 2 930 931 932 -23330=8,3,10,10,80,3,10,10,32 -23300=2,80,-233
|
||||
Sigmoid Sigmoid_429 1 1 931 933 -23330=4,3,10,10,80
|
||||
Reshape Reshape_431 1 1 933 935 -23330=4,2,100,80,1 0=-1 1=80
|
||||
Permute Transpose_432 1 1 935 cls_pred_stride_32 -23330=4,2,80,100,1 0=1
|
||||
Reshape Reshape_434 1 1 932 938 -23330=4,2,100,32,1 0=-1 1=32
|
||||
Permute Transpose_435 1 1 938 dis_pred_stride_32 -23330=4,2,32,100,1 0=1
|
||||
Noop Output 6 1 cls_pred_stride_8 cls_pred_stride_16 cls_pred_stride_32 dis_pred_stride_8 dis_pred_stride_16 dis_pred_stride_32 output
|
93
3rdparty/ncnn/benchmark/proxylessnasnet.param
vendored
Normal file
93
3rdparty/ncnn/benchmark/proxylessnasnet.param
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
7767517
|
||||
91 104
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution first-3x3-conv 1 1 data first-3x3-conv_relu -23330=4,3,112,112,32 0=32 1=3 3=2 4=1 5=1 6=864 9=1
|
||||
ConvolutionDepthWise A0_dw 1 1 first-3x3-conv_relu A0_dw_relu -23330=4,3,112,112,32 0=32 1=3 4=1 5=1 6=288 7=32 9=1
|
||||
Convolution A0_linear 1 1 A0_dw_relu A0_linear_bn -23330=4,3,112,112,32 0=32 1=1 5=1 6=1024
|
||||
Convolution B0_expand 1 1 A0_linear_bn B0_expand_relu -23330=4,3,112,112,48 0=48 1=1 5=1 6=1536 9=1
|
||||
ConvolutionDepthWise B0_dw 1 1 B0_expand_relu B0_dw_relu -23330=4,3,56,56,48 0=48 1=5 3=2 4=2 5=1 6=1200 7=48 9=1
|
||||
Convolution B0_linear 1 1 B0_dw_relu B0_linear_bn -23330=4,3,56,56,32 0=32 1=1 5=1 6=1536
|
||||
Split splitncnn_0 1 2 B0_linear_bn B0_linear_bn_splitncnn_0 B0_linear_bn_splitncnn_1 -23330=8,3,56,56,32,3,56,56,32
|
||||
Convolution B1_expand 1 1 B0_linear_bn_splitncnn_1 B1_expand_relu -23330=4,3,56,56,96 0=96 1=1 5=1 6=3072 9=1
|
||||
ConvolutionDepthWise B1_dw 1 1 B1_expand_relu B1_dw_relu -23330=4,3,56,56,96 0=96 1=3 4=1 5=1 6=864 7=96 9=1
|
||||
Convolution B1_linear 1 1 B1_dw_relu B1_linear_bn -23330=4,3,56,56,32 0=32 1=1 5=1 6=3072
|
||||
BinaryOp unknownncnn_0 2 1 B0_linear_bn_splitncnn_0 B1_linear_bn unknownncnn_0 -23330=4,3,56,56,32
|
||||
Convolution C0_expand 1 1 unknownncnn_0 C0_expand_relu -23330=4,3,56,56,96 0=96 1=1 5=1 6=3072 9=1
|
||||
ConvolutionDepthWise C0_dw 1 1 C0_expand_relu C0_dw_relu -23330=4,3,28,28,96 0=96 1=7 3=2 4=3 5=1 6=4704 7=96 9=1
|
||||
Convolution C0_linear 1 1 C0_dw_relu C0_linear_bn -23330=4,3,28,28,40 0=40 1=1 5=1 6=3840
|
||||
Split splitncnn_1 1 2 C0_linear_bn C0_linear_bn_splitncnn_0 C0_linear_bn_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
|
||||
Convolution C1_expand 1 1 C0_linear_bn_splitncnn_1 C1_expand_relu -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
|
||||
ConvolutionDepthWise C1_dw 1 1 C1_expand_relu C1_dw_relu -23330=4,3,28,28,120 0=120 1=3 4=1 5=1 6=1080 7=120 9=1
|
||||
Convolution C1_linear 1 1 C1_dw_relu C1_linear_bn -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
|
||||
BinaryOp unknownncnn_1 2 1 C0_linear_bn_splitncnn_0 C1_linear_bn unknownncnn_1 -23330=4,3,28,28,40
|
||||
Split splitncnn_2 1 2 unknownncnn_1 unknownncnn_1_splitncnn_0 unknownncnn_1_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
|
||||
Convolution C2_expand 1 1 unknownncnn_1_splitncnn_1 C2_expand_relu -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
|
||||
ConvolutionDepthWise C2_dw 1 1 C2_expand_relu C2_dw_relu -23330=4,3,28,28,120 0=120 1=5 4=2 5=1 6=3000 7=120 9=1
|
||||
Convolution C2_linear 1 1 C2_dw_relu C2_linear_bn -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
|
||||
BinaryOp unknownncnn_2 2 1 unknownncnn_1_splitncnn_0 C2_linear_bn unknownncnn_2 -23330=4,3,28,28,40
|
||||
Split splitncnn_3 1 2 unknownncnn_2 unknownncnn_2_splitncnn_0 unknownncnn_2_splitncnn_1 -23330=8,3,28,28,40,3,28,28,40
|
||||
Convolution C3_expand 1 1 unknownncnn_2_splitncnn_1 C3_expand_relu -23330=4,3,28,28,120 0=120 1=1 5=1 6=4800 9=1
|
||||
ConvolutionDepthWise C3_dw 1 1 C3_expand_relu C3_dw_relu -23330=4,3,28,28,120 0=120 1=5 4=2 5=1 6=3000 7=120 9=1
|
||||
Convolution C3_linear 1 1 C3_dw_relu C3_linear_bn -23330=4,3,28,28,40 0=40 1=1 5=1 6=4800
|
||||
BinaryOp unknownncnn_3 2 1 unknownncnn_2_splitncnn_0 C3_linear_bn unknownncnn_3 -23330=4,3,28,28,40
|
||||
Convolution D0_expand 1 1 unknownncnn_3 D0_expand_relu -23330=4,3,28,28,240 0=240 1=1 5=1 6=9600 9=1
|
||||
ConvolutionDepthWise D0_dw 1 1 D0_expand_relu D0_dw_relu -23330=4,3,14,14,240 0=240 1=7 3=2 4=3 5=1 6=11760 7=240 9=1
|
||||
Convolution D0_linear 1 1 D0_dw_relu D0_linear_bn -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
|
||||
Split splitncnn_4 1 2 D0_linear_bn D0_linear_bn_splitncnn_0 D0_linear_bn_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution D1_expand 1 1 D0_linear_bn_splitncnn_1 D1_expand_relu -23330=4,3,14,14,240 0=240 1=1 5=1 6=19200 9=1
|
||||
ConvolutionDepthWise D1_dw 1 1 D1_expand_relu D1_dw_relu -23330=4,3,14,14,240 0=240 1=5 4=2 5=1 6=6000 7=240 9=1
|
||||
Convolution D1_linear 1 1 D1_dw_relu D1_linear_bn -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
|
||||
BinaryOp unknownncnn_4 2 1 D0_linear_bn_splitncnn_0 D1_linear_bn unknownncnn_4 -23330=4,3,14,14,80
|
||||
Split splitncnn_5 1 2 unknownncnn_4 unknownncnn_4_splitncnn_0 unknownncnn_4_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution D2_expand 1 1 unknownncnn_4_splitncnn_1 D2_expand_relu -23330=4,3,14,14,240 0=240 1=1 5=1 6=19200 9=1
|
||||
ConvolutionDepthWise D2_dw 1 1 D2_expand_relu D2_dw_relu -23330=4,3,14,14,240 0=240 1=5 4=2 5=1 6=6000 7=240 9=1
|
||||
Convolution D2_linear 1 1 D2_dw_relu D2_linear_bn -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
|
||||
BinaryOp unknownncnn_5 2 1 unknownncnn_4_splitncnn_0 D2_linear_bn unknownncnn_5 -23330=4,3,14,14,80
|
||||
Split splitncnn_6 1 2 unknownncnn_5 unknownncnn_5_splitncnn_0 unknownncnn_5_splitncnn_1 -23330=8,3,14,14,80,3,14,14,80
|
||||
Convolution D3_expand 1 1 unknownncnn_5_splitncnn_1 D3_expand_relu -23330=4,3,14,14,240 0=240 1=1 5=1 6=19200 9=1
|
||||
ConvolutionDepthWise D3_dw 1 1 D3_expand_relu D3_dw_relu -23330=4,3,14,14,240 0=240 1=5 4=2 5=1 6=6000 7=240 9=1
|
||||
Convolution D3_linear 1 1 D3_dw_relu D3_linear_bn -23330=4,3,14,14,80 0=80 1=1 5=1 6=19200
|
||||
BinaryOp unknownncnn_6 2 1 unknownncnn_5_splitncnn_0 D3_linear_bn unknownncnn_6 -23330=4,3,14,14,80
|
||||
Convolution E0_expand 1 1 unknownncnn_6 E0_expand_relu -23330=4,3,14,14,480 0=480 1=1 5=1 6=38400 9=1
|
||||
ConvolutionDepthWise E0_dw 1 1 E0_expand_relu E0_dw_relu -23330=4,3,14,14,480 0=480 1=5 4=2 5=1 6=12000 7=480 9=1
|
||||
Convolution E0_linear 1 1 E0_dw_relu E0_linear_bn -23330=4,3,14,14,96 0=96 1=1 5=1 6=46080
|
||||
Split splitncnn_7 1 2 E0_linear_bn E0_linear_bn_splitncnn_0 E0_linear_bn_splitncnn_1 -23330=8,3,14,14,96,3,14,14,96
|
||||
Convolution E1_expand 1 1 E0_linear_bn_splitncnn_1 E1_expand_relu -23330=4,3,14,14,288 0=288 1=1 5=1 6=27648 9=1
|
||||
ConvolutionDepthWise E1_dw 1 1 E1_expand_relu E1_dw_relu -23330=4,3,14,14,288 0=288 1=5 4=2 5=1 6=7200 7=288 9=1
|
||||
Convolution E1_linear 1 1 E1_dw_relu E1_linear_bn -23330=4,3,14,14,96 0=96 1=1 5=1 6=27648
|
||||
BinaryOp unknownncnn_7 2 1 E0_linear_bn_splitncnn_0 E1_linear_bn unknownncnn_7 -23330=4,3,14,14,96
|
||||
Split splitncnn_8 1 2 unknownncnn_7 unknownncnn_7_splitncnn_0 unknownncnn_7_splitncnn_1 -23330=8,3,14,14,96,3,14,14,96
|
||||
Convolution E2_expand 1 1 unknownncnn_7_splitncnn_1 E2_expand_relu -23330=4,3,14,14,288 0=288 1=1 5=1 6=27648 9=1
|
||||
ConvolutionDepthWise E2_dw 1 1 E2_expand_relu E2_dw_relu -23330=4,3,14,14,288 0=288 1=5 4=2 5=1 6=7200 7=288 9=1
|
||||
Convolution E2_linear 1 1 E2_dw_relu E2_linear_bn -23330=4,3,14,14,96 0=96 1=1 5=1 6=27648
|
||||
BinaryOp unknownncnn_8 2 1 unknownncnn_7_splitncnn_0 E2_linear_bn unknownncnn_8 -23330=4,3,14,14,96
|
||||
Split splitncnn_9 1 2 unknownncnn_8 unknownncnn_8_splitncnn_0 unknownncnn_8_splitncnn_1 -23330=8,3,14,14,96,3,14,14,96
|
||||
Convolution E3_expand 1 1 unknownncnn_8_splitncnn_1 E3_expand_relu -23330=4,3,14,14,288 0=288 1=1 5=1 6=27648 9=1
|
||||
ConvolutionDepthWise E3_dw 1 1 E3_expand_relu E3_dw_relu -23330=4,3,14,14,288 0=288 1=5 4=2 5=1 6=7200 7=288 9=1
|
||||
Convolution E3_linear 1 1 E3_dw_relu E3_linear_bn -23330=4,3,14,14,96 0=96 1=1 5=1 6=27648
|
||||
BinaryOp unknownncnn_9 2 1 unknownncnn_8_splitncnn_0 E3_linear_bn unknownncnn_9 -23330=4,3,14,14,96
|
||||
Convolution F0_expand 1 1 unknownncnn_9 F0_expand_relu -23330=4,3,14,14,576 0=576 1=1 5=1 6=55296 9=1
|
||||
ConvolutionDepthWise F0_dw 1 1 F0_expand_relu F0_dw_relu -23330=4,3,7,7,576 0=576 1=7 3=2 4=3 5=1 6=28224 7=576 9=1
|
||||
Convolution F0_linear 1 1 F0_dw_relu F0_linear_bn -23330=4,3,7,7,192 0=192 1=1 5=1 6=110592
|
||||
Split splitncnn_10 1 2 F0_linear_bn F0_linear_bn_splitncnn_0 F0_linear_bn_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution F1_expand 1 1 F0_linear_bn_splitncnn_1 F1_expand_relu -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184 9=1
|
||||
ConvolutionDepthWise F1_dw 1 1 F1_expand_relu F1_dw_relu -23330=4,3,7,7,1152 0=1152 1=7 4=3 5=1 6=56448 7=1152 9=1
|
||||
Convolution F1_linear 1 1 F1_dw_relu F1_linear_bn -23330=4,3,7,7,192 0=192 1=1 5=1 6=221184
|
||||
BinaryOp unknownncnn_10 2 1 F0_linear_bn_splitncnn_0 F1_linear_bn unknownncnn_10 -23330=4,3,7,7,192
|
||||
Split splitncnn_11 1 2 unknownncnn_10 unknownncnn_10_splitncnn_0 unknownncnn_10_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution F2_expand 1 1 unknownncnn_10_splitncnn_1 F2_expand_relu -23330=4,3,7,7,576 0=576 1=1 5=1 6=110592 9=1
|
||||
ConvolutionDepthWise F2_dw 1 1 F2_expand_relu F2_dw_relu -23330=4,3,7,7,576 0=576 1=7 4=3 5=1 6=28224 7=576 9=1
|
||||
Convolution F2_linear 1 1 F2_dw_relu F2_linear_bn -23330=4,3,7,7,192 0=192 1=1 5=1 6=110592
|
||||
BinaryOp unknownncnn_11 2 1 unknownncnn_10_splitncnn_0 F2_linear_bn unknownncnn_11 -23330=4,3,7,7,192
|
||||
Split splitncnn_12 1 2 unknownncnn_11 unknownncnn_11_splitncnn_0 unknownncnn_11_splitncnn_1 -23330=8,3,7,7,192,3,7,7,192
|
||||
Convolution F3_expand 1 1 unknownncnn_11_splitncnn_1 F3_expand_relu -23330=4,3,7,7,576 0=576 1=1 5=1 6=110592 9=1
|
||||
ConvolutionDepthWise F3_dw 1 1 F3_expand_relu F3_dw_relu -23330=4,3,7,7,576 0=576 1=7 4=3 5=1 6=28224 7=576 9=1
|
||||
Convolution F3_linear 1 1 F3_dw_relu F3_linear_bn -23330=4,3,7,7,192 0=192 1=1 5=1 6=110592
|
||||
BinaryOp unknownncnn_12 2 1 unknownncnn_11_splitncnn_0 F3_linear_bn unknownncnn_12 -23330=4,3,7,7,192
|
||||
Convolution G0_expand 1 1 unknownncnn_12 G0_expand_relu -23330=4,3,7,7,1152 0=1152 1=1 5=1 6=221184 9=1
|
||||
ConvolutionDepthWise G0_dw 1 1 G0_expand_relu G0_dw_relu -23330=4,3,7,7,1152 0=1152 1=7 4=3 5=1 6=56448 7=1152 9=1
|
||||
Convolution G0_linear 1 1 G0_dw_relu G0_linear_bn -23330=4,3,7,7,320 0=320 1=1 5=1 6=368640
|
||||
Convolution last-1x1-conv 1 1 G0_linear_bn last-1x1-conv_relu -23330=4,3,7,7,1280 0=1280 1=1 5=1 6=409600 9=1
|
||||
Pooling avgpool 1 1 last-1x1-conv_relu flatten -23330=4,1,1280,1,1 0=1 1=7 4=1 5=1
|
||||
InnerProduct fc 1 1 flatten fc -23330=4,1,1000,1,1 0=1000 1=1 2=1280000
|
||||
Softmax prob 1 1 fc output -23330=4,1,1000,1,1
|
187
3rdparty/ncnn/benchmark/regnety_400m.param
vendored
Normal file
187
3rdparty/ncnn/benchmark/regnety_400m.param
vendored
Normal file
@ -0,0 +1,187 @@
|
||||
7767517
|
||||
185 217
|
||||
Input input.1 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution Conv_0 1 1 data 387 -23330=4,3,112,112,32 0=32 1=3 3=2 4=1 5=1 6=864 9=1
|
||||
Split splitncnn_0 1 2 387 387_splitncnn_0 387_splitncnn_1 -23330=8,3,112,112,32,3,112,112,32
|
||||
Convolution Conv_3 1 1 387_splitncnn_1 389 -23330=4,3,56,56,48 0=48 1=1 3=2 5=1 6=1536
|
||||
Convolution Conv_5 1 1 387_splitncnn_0 392 -23330=4,3,112,112,48 0=48 1=1 5=1 6=1536 9=1
|
||||
ConvolutionDepthWise Conv_8 1 1 392 395 -23330=4,3,56,56,48 0=48 1=3 3=2 4=1 5=1 6=3456 7=6 9=1
|
||||
Split splitncnn_1 1 2 395 395_splitncnn_0 395_splitncnn_1 -23330=8,3,56,56,48,3,56,56,48
|
||||
Pooling GlobalAveragePool_11 1 1 395_splitncnn_1 396 -23330=4,1,48,1,1 0=1 4=1
|
||||
InnerProduct Conv_12 1 1 396 398 -23330=4,1,8,1,1 0=8 1=1 2=384 9=1
|
||||
InnerProduct Conv_14 1 1 398 400 -23330=4,1,48,1,1 0=48 1=1 2=384 9=4
|
||||
BinaryOp Mul_16 2 1 395_splitncnn_0 400 401 -23330=4,3,56,56,48 0=2
|
||||
Convolution Conv_17 1 1 401 403 -23330=4,3,56,56,48 0=48 1=1 5=1 6=2304
|
||||
BinaryOp Add_19 2 1 389 403 404 -23330=4,3,56,56,48
|
||||
ReLU Relu_20 1 1 404 405 -23330=4,3,56,56,48
|
||||
Split splitncnn_2 1 2 405 405_splitncnn_0 405_splitncnn_1 -23330=8,3,56,56,48,3,56,56,48
|
||||
Convolution Conv_21 1 1 405_splitncnn_1 407 -23330=4,3,28,28,104 0=104 1=1 3=2 5=1 6=4992
|
||||
Convolution Conv_23 1 1 405_splitncnn_0 410 -23330=4,3,56,56,104 0=104 1=1 5=1 6=4992 9=1
|
||||
ConvolutionDepthWise Conv_26 1 1 410 413 -23330=4,3,28,28,104 0=104 1=3 3=2 4=1 5=1 6=7488 7=13 9=1
|
||||
Split splitncnn_3 1 2 413 413_splitncnn_0 413_splitncnn_1 -23330=8,3,28,28,104,3,28,28,104
|
||||
Pooling GlobalAveragePool_29 1 1 413_splitncnn_1 414 -23330=4,1,104,1,1 0=1 4=1
|
||||
InnerProduct Conv_30 1 1 414 416 -23330=4,1,12,1,1 0=12 1=1 2=1248 9=1
|
||||
InnerProduct Conv_32 1 1 416 418 -23330=4,1,104,1,1 0=104 1=1 2=1248 9=4
|
||||
BinaryOp Mul_34 2 1 413_splitncnn_0 418 419 -23330=4,3,28,28,104 0=2
|
||||
Convolution Conv_35 1 1 419 421 -23330=4,3,28,28,104 0=104 1=1 5=1 6=10816
|
||||
BinaryOp Add_37 2 1 407 421 422 -23330=4,3,28,28,104
|
||||
ReLU Relu_38 1 1 422 423 -23330=4,3,28,28,104
|
||||
Split splitncnn_4 1 2 423 423_splitncnn_0 423_splitncnn_1 -23330=8,3,28,28,104,3,28,28,104
|
||||
Convolution Conv_39 1 1 423_splitncnn_1 426 -23330=4,3,28,28,104 0=104 1=1 5=1 6=10816 9=1
|
||||
ConvolutionDepthWise Conv_42 1 1 426 429 -23330=4,3,28,28,104 0=104 1=3 4=1 5=1 6=7488 7=13 9=1
|
||||
Split splitncnn_5 1 2 429 429_splitncnn_0 429_splitncnn_1 -23330=8,3,28,28,104,3,28,28,104
|
||||
Pooling GlobalAveragePool_45 1 1 429_splitncnn_1 430 -23330=4,1,104,1,1 0=1 4=1
|
||||
InnerProduct Conv_46 1 1 430 432 -23330=4,1,26,1,1 0=26 1=1 2=2704 9=1
|
||||
InnerProduct Conv_48 1 1 432 434 -23330=4,1,104,1,1 0=104 1=1 2=2704 9=4
|
||||
BinaryOp Mul_50 2 1 429_splitncnn_0 434 435 -23330=4,3,28,28,104 0=2
|
||||
Convolution Conv_51 1 1 435 437 -23330=4,3,28,28,104 0=104 1=1 5=1 6=10816
|
||||
BinaryOp Add_53 2 1 423_splitncnn_0 437 438 -23330=4,3,28,28,104
|
||||
ReLU Relu_54 1 1 438 439 -23330=4,3,28,28,104
|
||||
Split splitncnn_6 1 2 439 439_splitncnn_0 439_splitncnn_1 -23330=8,3,28,28,104,3,28,28,104
|
||||
Convolution Conv_55 1 1 439_splitncnn_1 442 -23330=4,3,28,28,104 0=104 1=1 5=1 6=10816 9=1
|
||||
ConvolutionDepthWise Conv_58 1 1 442 445 -23330=4,3,28,28,104 0=104 1=3 4=1 5=1 6=7488 7=13 9=1
|
||||
Split splitncnn_7 1 2 445 445_splitncnn_0 445_splitncnn_1 -23330=8,3,28,28,104,3,28,28,104
|
||||
Pooling GlobalAveragePool_61 1 1 445_splitncnn_1 446 -23330=4,1,104,1,1 0=1 4=1
|
||||
InnerProduct Conv_62 1 1 446 448 -23330=4,1,26,1,1 0=26 1=1 2=2704 9=1
|
||||
InnerProduct Conv_64 1 1 448 450 -23330=4,1,104,1,1 0=104 1=1 2=2704 9=4
|
||||
BinaryOp Mul_66 2 1 445_splitncnn_0 450 451 -23330=4,3,28,28,104 0=2
|
||||
Convolution Conv_67 1 1 451 453 -23330=4,3,28,28,104 0=104 1=1 5=1 6=10816
|
||||
BinaryOp Add_69 2 1 439_splitncnn_0 453 454 -23330=4,3,28,28,104
|
||||
ReLU Relu_70 1 1 454 455 -23330=4,3,28,28,104
|
||||
Split splitncnn_8 1 2 455 455_splitncnn_0 455_splitncnn_1 -23330=8,3,28,28,104,3,28,28,104
|
||||
Convolution Conv_71 1 1 455_splitncnn_1 457 -23330=4,3,14,14,208 0=208 1=1 3=2 5=1 6=21632
|
||||
Convolution Conv_73 1 1 455_splitncnn_0 460 -23330=4,3,28,28,208 0=208 1=1 5=1 6=21632 9=1
|
||||
ConvolutionDepthWise Conv_76 1 1 460 463 -23330=4,3,14,14,208 0=208 1=3 3=2 4=1 5=1 6=14976 7=26 9=1
|
||||
Split splitncnn_9 1 2 463 463_splitncnn_0 463_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Pooling GlobalAveragePool_79 1 1 463_splitncnn_1 464 -23330=4,1,208,1,1 0=1 4=1
|
||||
InnerProduct Conv_80 1 1 464 466 -23330=4,1,26,1,1 0=26 1=1 2=5408 9=1
|
||||
InnerProduct Conv_82 1 1 466 468 -23330=4,1,208,1,1 0=208 1=1 2=5408 9=4
|
||||
BinaryOp Mul_84 2 1 463_splitncnn_0 468 469 -23330=4,3,14,14,208 0=2
|
||||
Convolution Conv_85 1 1 469 471 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264
|
||||
BinaryOp Add_87 2 1 457 471 472 -23330=4,3,14,14,208
|
||||
ReLU Relu_88 1 1 472 473 -23330=4,3,14,14,208
|
||||
Split splitncnn_10 1 2 473 473_splitncnn_0 473_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Convolution Conv_89 1 1 473_splitncnn_1 476 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264 9=1
|
||||
ConvolutionDepthWise Conv_92 1 1 476 479 -23330=4,3,14,14,208 0=208 1=3 4=1 5=1 6=14976 7=26 9=1
|
||||
Split splitncnn_11 1 2 479 479_splitncnn_0 479_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Pooling GlobalAveragePool_95 1 1 479_splitncnn_1 480 -23330=4,1,208,1,1 0=1 4=1
|
||||
InnerProduct Conv_96 1 1 480 482 -23330=4,1,52,1,1 0=52 1=1 2=10816 9=1
|
||||
InnerProduct Conv_98 1 1 482 484 -23330=4,1,208,1,1 0=208 1=1 2=10816 9=4
|
||||
BinaryOp Mul_100 2 1 479_splitncnn_0 484 485 -23330=4,3,14,14,208 0=2
|
||||
Convolution Conv_101 1 1 485 487 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264
|
||||
BinaryOp Add_103 2 1 473_splitncnn_0 487 488 -23330=4,3,14,14,208
|
||||
ReLU Relu_104 1 1 488 489 -23330=4,3,14,14,208
|
||||
Split splitncnn_12 1 2 489 489_splitncnn_0 489_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Convolution Conv_105 1 1 489_splitncnn_1 492 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264 9=1
|
||||
ConvolutionDepthWise Conv_108 1 1 492 495 -23330=4,3,14,14,208 0=208 1=3 4=1 5=1 6=14976 7=26 9=1
|
||||
Split splitncnn_13 1 2 495 495_splitncnn_0 495_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Pooling GlobalAveragePool_111 1 1 495_splitncnn_1 496 -23330=4,1,208,1,1 0=1 4=1
|
||||
InnerProduct Conv_112 1 1 496 498 -23330=4,1,52,1,1 0=52 1=1 2=10816 9=1
|
||||
InnerProduct Conv_114 1 1 498 500 -23330=4,1,208,1,1 0=208 1=1 2=10816 9=4
|
||||
BinaryOp Mul_116 2 1 495_splitncnn_0 500 501 -23330=4,3,14,14,208 0=2
|
||||
Convolution Conv_117 1 1 501 503 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264
|
||||
BinaryOp Add_119 2 1 489_splitncnn_0 503 504 -23330=4,3,14,14,208
|
||||
ReLU Relu_120 1 1 504 505 -23330=4,3,14,14,208
|
||||
Split splitncnn_14 1 2 505 505_splitncnn_0 505_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Convolution Conv_121 1 1 505_splitncnn_1 508 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264 9=1
|
||||
ConvolutionDepthWise Conv_124 1 1 508 511 -23330=4,3,14,14,208 0=208 1=3 4=1 5=1 6=14976 7=26 9=1
|
||||
Split splitncnn_15 1 2 511 511_splitncnn_0 511_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Pooling GlobalAveragePool_127 1 1 511_splitncnn_1 512 -23330=4,1,208,1,1 0=1 4=1
|
||||
InnerProduct Conv_128 1 1 512 514 -23330=4,1,52,1,1 0=52 1=1 2=10816 9=1
|
||||
InnerProduct Conv_130 1 1 514 516 -23330=4,1,208,1,1 0=208 1=1 2=10816 9=4
|
||||
BinaryOp Mul_132 2 1 511_splitncnn_0 516 517 -23330=4,3,14,14,208 0=2
|
||||
Convolution Conv_133 1 1 517 519 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264
|
||||
BinaryOp Add_135 2 1 505_splitncnn_0 519 520 -23330=4,3,14,14,208
|
||||
ReLU Relu_136 1 1 520 521 -23330=4,3,14,14,208
|
||||
Split splitncnn_16 1 2 521 521_splitncnn_0 521_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Convolution Conv_137 1 1 521_splitncnn_1 524 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264 9=1
|
||||
ConvolutionDepthWise Conv_140 1 1 524 527 -23330=4,3,14,14,208 0=208 1=3 4=1 5=1 6=14976 7=26 9=1
|
||||
Split splitncnn_17 1 2 527 527_splitncnn_0 527_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Pooling GlobalAveragePool_143 1 1 527_splitncnn_1 528 -23330=4,1,208,1,1 0=1 4=1
|
||||
InnerProduct Conv_144 1 1 528 530 -23330=4,1,52,1,1 0=52 1=1 2=10816 9=1
|
||||
InnerProduct Conv_146 1 1 530 532 -23330=4,1,208,1,1 0=208 1=1 2=10816 9=4
|
||||
BinaryOp Mul_148 2 1 527_splitncnn_0 532 533 -23330=4,3,14,14,208 0=2
|
||||
Convolution Conv_149 1 1 533 535 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264
|
||||
BinaryOp Add_151 2 1 521_splitncnn_0 535 536 -23330=4,3,14,14,208
|
||||
ReLU Relu_152 1 1 536 537 -23330=4,3,14,14,208
|
||||
Split splitncnn_18 1 2 537 537_splitncnn_0 537_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Convolution Conv_153 1 1 537_splitncnn_1 540 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264 9=1
|
||||
ConvolutionDepthWise Conv_156 1 1 540 543 -23330=4,3,14,14,208 0=208 1=3 4=1 5=1 6=14976 7=26 9=1
|
||||
Split splitncnn_19 1 2 543 543_splitncnn_0 543_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Pooling GlobalAveragePool_159 1 1 543_splitncnn_1 544 -23330=4,1,208,1,1 0=1 4=1
|
||||
InnerProduct Conv_160 1 1 544 546 -23330=4,1,52,1,1 0=52 1=1 2=10816 9=1
|
||||
InnerProduct Conv_162 1 1 546 548 -23330=4,1,208,1,1 0=208 1=1 2=10816 9=4
|
||||
BinaryOp Mul_164 2 1 543_splitncnn_0 548 549 -23330=4,3,14,14,208 0=2
|
||||
Convolution Conv_165 1 1 549 551 -23330=4,3,14,14,208 0=208 1=1 5=1 6=43264
|
||||
BinaryOp Add_167 2 1 537_splitncnn_0 551 552 -23330=4,3,14,14,208
|
||||
ReLU Relu_168 1 1 552 553 -23330=4,3,14,14,208
|
||||
Split splitncnn_20 1 2 553 553_splitncnn_0 553_splitncnn_1 -23330=8,3,14,14,208,3,14,14,208
|
||||
Convolution Conv_169 1 1 553_splitncnn_1 555 -23330=4,3,7,7,440 0=440 1=1 3=2 5=1 6=91520
|
||||
Convolution Conv_171 1 1 553_splitncnn_0 558 -23330=4,3,14,14,440 0=440 1=1 5=1 6=91520 9=1
|
||||
ConvolutionDepthWise Conv_174 1 1 558 561 -23330=4,3,7,7,440 0=440 1=3 3=2 4=1 5=1 6=31680 7=55 9=1
|
||||
Split splitncnn_21 1 2 561 561_splitncnn_0 561_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Pooling GlobalAveragePool_177 1 1 561_splitncnn_1 562 -23330=4,1,440,1,1 0=1 4=1
|
||||
InnerProduct Conv_178 1 1 562 564 -23330=4,1,52,1,1 0=52 1=1 2=22880 9=1
|
||||
InnerProduct Conv_180 1 1 564 566 -23330=4,1,440,1,1 0=440 1=1 2=22880 9=4
|
||||
BinaryOp Mul_182 2 1 561_splitncnn_0 566 567 -23330=4,3,7,7,440 0=2
|
||||
Convolution Conv_183 1 1 567 569 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600
|
||||
BinaryOp Add_185 2 1 555 569 570 -23330=4,3,7,7,440
|
||||
ReLU Relu_186 1 1 570 571 -23330=4,3,7,7,440
|
||||
Split splitncnn_22 1 2 571 571_splitncnn_0 571_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Convolution Conv_187 1 1 571_splitncnn_1 574 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600 9=1
|
||||
ConvolutionDepthWise Conv_190 1 1 574 577 -23330=4,3,7,7,440 0=440 1=3 4=1 5=1 6=31680 7=55 9=1
|
||||
Split splitncnn_23 1 2 577 577_splitncnn_0 577_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Pooling GlobalAveragePool_193 1 1 577_splitncnn_1 578 -23330=4,1,440,1,1 0=1 4=1
|
||||
InnerProduct Conv_194 1 1 578 580 -23330=4,1,110,1,1 0=110 1=1 2=48400 9=1
|
||||
InnerProduct Conv_196 1 1 580 582 -23330=4,1,440,1,1 0=440 1=1 2=48400 9=4
|
||||
BinaryOp Mul_198 2 1 577_splitncnn_0 582 583 -23330=4,3,7,7,440 0=2
|
||||
Convolution Conv_199 1 1 583 585 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600
|
||||
BinaryOp Add_201 2 1 571_splitncnn_0 585 586 -23330=4,3,7,7,440
|
||||
ReLU Relu_202 1 1 586 587 -23330=4,3,7,7,440
|
||||
Split splitncnn_24 1 2 587 587_splitncnn_0 587_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Convolution Conv_203 1 1 587_splitncnn_1 590 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600 9=1
|
||||
ConvolutionDepthWise Conv_206 1 1 590 593 -23330=4,3,7,7,440 0=440 1=3 4=1 5=1 6=31680 7=55 9=1
|
||||
Split splitncnn_25 1 2 593 593_splitncnn_0 593_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Pooling GlobalAveragePool_209 1 1 593_splitncnn_1 594 -23330=4,1,440,1,1 0=1 4=1
|
||||
InnerProduct Conv_210 1 1 594 596 -23330=4,1,110,1,1 0=110 1=1 2=48400 9=1
|
||||
InnerProduct Conv_212 1 1 596 598 -23330=4,1,440,1,1 0=440 1=1 2=48400 9=4
|
||||
BinaryOp Mul_214 2 1 593_splitncnn_0 598 599 -23330=4,3,7,7,440 0=2
|
||||
Convolution Conv_215 1 1 599 601 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600
|
||||
BinaryOp Add_217 2 1 587_splitncnn_0 601 602 -23330=4,3,7,7,440
|
||||
ReLU Relu_218 1 1 602 603 -23330=4,3,7,7,440
|
||||
Split splitncnn_26 1 2 603 603_splitncnn_0 603_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Convolution Conv_219 1 1 603_splitncnn_1 606 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600 9=1
|
||||
ConvolutionDepthWise Conv_222 1 1 606 609 -23330=4,3,7,7,440 0=440 1=3 4=1 5=1 6=31680 7=55 9=1
|
||||
Split splitncnn_27 1 2 609 609_splitncnn_0 609_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Pooling GlobalAveragePool_225 1 1 609_splitncnn_1 610 -23330=4,1,440,1,1 0=1 4=1
|
||||
InnerProduct Conv_226 1 1 610 612 -23330=4,1,110,1,1 0=110 1=1 2=48400 9=1
|
||||
InnerProduct Conv_228 1 1 612 614 -23330=4,1,440,1,1 0=440 1=1 2=48400 9=4
|
||||
BinaryOp Mul_230 2 1 609_splitncnn_0 614 615 -23330=4,3,7,7,440 0=2
|
||||
Convolution Conv_231 1 1 615 617 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600
|
||||
BinaryOp Add_233 2 1 603_splitncnn_0 617 618 -23330=4,3,7,7,440
|
||||
ReLU Relu_234 1 1 618 619 -23330=4,3,7,7,440
|
||||
Split splitncnn_28 1 2 619 619_splitncnn_0 619_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Convolution Conv_235 1 1 619_splitncnn_1 622 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600 9=1
|
||||
ConvolutionDepthWise Conv_238 1 1 622 625 -23330=4,3,7,7,440 0=440 1=3 4=1 5=1 6=31680 7=55 9=1
|
||||
Split splitncnn_29 1 2 625 625_splitncnn_0 625_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Pooling GlobalAveragePool_241 1 1 625_splitncnn_1 626 -23330=4,1,440,1,1 0=1 4=1
|
||||
InnerProduct Conv_242 1 1 626 628 -23330=4,1,110,1,1 0=110 1=1 2=48400 9=1
|
||||
InnerProduct Conv_244 1 1 628 630 -23330=4,1,440,1,1 0=440 1=1 2=48400 9=4
|
||||
BinaryOp Mul_246 2 1 625_splitncnn_0 630 631 -23330=4,3,7,7,440 0=2
|
||||
Convolution Conv_247 1 1 631 633 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600
|
||||
BinaryOp Add_249 2 1 619_splitncnn_0 633 634 -23330=4,3,7,7,440
|
||||
ReLU Relu_250 1 1 634 635 -23330=4,3,7,7,440
|
||||
Split splitncnn_30 1 2 635 635_splitncnn_0 635_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Convolution Conv_251 1 1 635_splitncnn_1 638 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600 9=1
|
||||
ConvolutionDepthWise Conv_254 1 1 638 641 -23330=4,3,7,7,440 0=440 1=3 4=1 5=1 6=31680 7=55 9=1
|
||||
Split splitncnn_31 1 2 641 641_splitncnn_0 641_splitncnn_1 -23330=8,3,7,7,440,3,7,7,440
|
||||
Pooling GlobalAveragePool_257 1 1 641_splitncnn_1 642 -23330=4,1,440,1,1 0=1 4=1
|
||||
InnerProduct Conv_258 1 1 642 644 -23330=4,1,110,1,1 0=110 1=1 2=48400 9=1
|
||||
InnerProduct Conv_260 1 1 644 646 -23330=4,1,440,1,1 0=440 1=1 2=48400 9=4
|
||||
BinaryOp Mul_262 2 1 641_splitncnn_0 646 647 -23330=4,3,7,7,440 0=2
|
||||
Convolution Conv_263 1 1 647 649 -23330=4,3,7,7,440 0=440 1=1 5=1 6=193600
|
||||
BinaryOp Add_265 2 1 635_splitncnn_0 649 650 -23330=4,3,7,7,440
|
||||
ReLU Relu_266 1 1 650 651 -23330=4,3,7,7,440
|
||||
Pooling GlobalAveragePool_267 1 1 651 660 -23330=4,1,440,1,1 0=1 4=1
|
||||
InnerProduct Gemm_274 1 1 660 661 -23330=4,1,1000,1,1 0=1000 1=1 2=440000
|
||||
Softmax prob 1 1 661 output -23330=4,1,1000,1,1
|
52
3rdparty/ncnn/benchmark/resnet18.param
vendored
Normal file
52
3rdparty/ncnn/benchmark/resnet18.param
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
7767517
|
||||
50 58
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1_conv1_relu -23330=4,3,112,112,64 0=64 1=7 3=2 4=3 5=1 6=9408 9=1
|
||||
Pooling pool1 1 1 conv1_conv1_relu pool1 -23330=4,3,56,56,64 1=3 2=2
|
||||
Split splitncnn_0 1 2 pool1 pool1_splitncnn_0 pool1_splitncnn_1 -23330=8,3,56,56,64,3,56,56,64
|
||||
Convolution res2a_branch1 1 1 pool1_splitncnn_1 res2a_branch1_scale2a_branch1 -23330=4,3,56,56,64 0=64 1=1 5=1 6=4096
|
||||
Convolution res2a_branch2a 1 1 pool1_splitncnn_0 res2a_branch2a_res2a_branch2a_relu -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=36864 9=1
|
||||
Convolution res2a_branch2b 1 1 res2a_branch2a_res2a_branch2a_relu res2a_branch2b_scale2a_branch2b -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=36864
|
||||
Eltwise res2a 2 1 res2a_branch1_scale2a_branch1 res2a_branch2b_scale2a_branch2b res2a -23330=4,3,56,56,64 0=1
|
||||
ReLU res2a_relu 1 1 res2a res2a_res2a_relu -23330=4,3,56,56,64
|
||||
Split splitncnn_1 1 2 res2a_res2a_relu res2a_res2a_relu_splitncnn_0 res2a_res2a_relu_splitncnn_1 -23330=8,3,56,56,64,3,56,56,64
|
||||
Convolution res2b_branch2a 1 1 res2a_res2a_relu_splitncnn_1 res2b_branch2a_res2b_branch2a_relu -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=36864 9=1
|
||||
Convolution res2b_branch2b 1 1 res2b_branch2a_res2b_branch2a_relu res2b_branch2b_scale2b_branch2b -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=36864
|
||||
Eltwise res2b 2 1 res2a_res2a_relu_splitncnn_0 res2b_branch2b_scale2b_branch2b res2b -23330=4,3,56,56,64 0=1
|
||||
ReLU res2b_relu 1 1 res2b res2b_res2b_relu -23330=4,3,56,56,64
|
||||
Split splitncnn_2 1 2 res2b_res2b_relu res2b_res2b_relu_splitncnn_0 res2b_res2b_relu_splitncnn_1 -23330=8,3,56,56,64,3,56,56,64
|
||||
Convolution res3a_branch1 1 1 res2b_res2b_relu_splitncnn_1 res3a_branch1_scale3a_branch1 -23330=4,3,28,28,128 0=128 1=1 3=2 5=1 6=8192
|
||||
Convolution res3a_branch2a 1 1 res2b_res2b_relu_splitncnn_0 res3a_branch2a_res3a_branch2a_relu -23330=4,3,28,28,128 0=128 1=3 3=2 4=1 5=1 6=73728 9=1
|
||||
Convolution res3a_branch2b 1 1 res3a_branch2a_res3a_branch2a_relu res3a_branch2b_scale3a_branch2b -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=147456
|
||||
Eltwise res3a 2 1 res3a_branch1_scale3a_branch1 res3a_branch2b_scale3a_branch2b res3a -23330=4,3,28,28,128 0=1
|
||||
ReLU res3a_relu 1 1 res3a res3a_res3a_relu -23330=4,3,28,28,128
|
||||
Split splitncnn_3 1 2 res3a_res3a_relu res3a_res3a_relu_splitncnn_0 res3a_res3a_relu_splitncnn_1 -23330=8,3,28,28,128,3,28,28,128
|
||||
Convolution res3b_branch2a 1 1 res3a_res3a_relu_splitncnn_1 res3b_branch2a_res3b_branch2a_relu -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=147456 9=1
|
||||
Convolution res3b_branch2b 1 1 res3b_branch2a_res3b_branch2a_relu res3b_branch2b_scale3b_branch2b -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=147456
|
||||
Eltwise res3b 2 1 res3a_res3a_relu_splitncnn_0 res3b_branch2b_scale3b_branch2b res3b -23330=4,3,28,28,128 0=1
|
||||
ReLU res3b_relu 1 1 res3b res3b_res3b_relu -23330=4,3,28,28,128
|
||||
Split splitncnn_4 1 2 res3b_res3b_relu res3b_res3b_relu_splitncnn_0 res3b_res3b_relu_splitncnn_1 -23330=8,3,28,28,128,3,28,28,128
|
||||
Convolution res4a_branch1 1 1 res3b_res3b_relu_splitncnn_1 res4a_branch1_scale4a_branch1 -23330=4,3,14,14,256 0=256 1=1 3=2 5=1 6=32768
|
||||
Convolution res4a_branch2a 1 1 res3b_res3b_relu_splitncnn_0 res4a_branch2a_res4a_branch2a_relu -23330=4,3,14,14,256 0=256 1=3 3=2 4=1 5=1 6=294912 9=1
|
||||
Convolution res4a_branch2b 1 1 res4a_branch2a_res4a_branch2a_relu res4a_branch2b_scale4a_branch2b -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824
|
||||
Eltwise res4a 2 1 res4a_branch1_scale4a_branch1 res4a_branch2b_scale4a_branch2b res4a -23330=4,3,14,14,256 0=1
|
||||
ReLU res4a_relu 1 1 res4a res4a_res4a_relu -23330=4,3,14,14,256
|
||||
Split splitncnn_5 1 2 res4a_res4a_relu res4a_res4a_relu_splitncnn_0 res4a_res4a_relu_splitncnn_1 -23330=8,3,14,14,256,3,14,14,256
|
||||
Convolution res4b_branch2a 1 1 res4a_res4a_relu_splitncnn_1 res4b_branch2a_res4b_branch2a_relu -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Convolution res4b_branch2b 1 1 res4b_branch2a_res4b_branch2a_relu res4b_branch2b_scale4b_branch2b -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824
|
||||
Eltwise res4b 2 1 res4a_res4a_relu_splitncnn_0 res4b_branch2b_scale4b_branch2b res4b -23330=4,3,14,14,256 0=1
|
||||
ReLU res4b_relu 1 1 res4b res4b_res4b_relu -23330=4,3,14,14,256
|
||||
Split splitncnn_6 1 2 res4b_res4b_relu res4b_res4b_relu_splitncnn_0 res4b_res4b_relu_splitncnn_1 -23330=8,3,14,14,256,3,14,14,256
|
||||
Convolution res5a_branch1 1 1 res4b_res4b_relu_splitncnn_1 res5a_branch1_scale5a_branch1 -23330=4,3,7,7,512 0=512 1=1 3=2 5=1 6=131072
|
||||
Convolution res5a_branch2a 1 1 res4b_res4b_relu_splitncnn_0 res5a_branch2a_res5a_branch2a_relu -23330=4,3,7,7,512 0=512 1=3 3=2 4=1 5=1 6=1179648 9=1
|
||||
Convolution res5a_branch2b 1 1 res5a_branch2a_res5a_branch2a_relu res5a_branch2b_scale5a_branch2b -23330=4,3,7,7,512 0=512 1=3 4=1 5=1 6=2359296
|
||||
Eltwise res5a 2 1 res5a_branch1_scale5a_branch1 res5a_branch2b_scale5a_branch2b res5a -23330=4,3,7,7,512 0=1
|
||||
ReLU res5a_relu 1 1 res5a res5a_res5a_relu -23330=4,3,7,7,512
|
||||
Split splitncnn_7 1 2 res5a_res5a_relu res5a_res5a_relu_splitncnn_0 res5a_res5a_relu_splitncnn_1 -23330=8,3,7,7,512,3,7,7,512
|
||||
Convolution res5b_branch2a 1 1 res5a_res5a_relu_splitncnn_1 res5b_branch2a_res5b_branch2a_relu -23330=4,3,7,7,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Convolution res5b_branch2b 1 1 res5b_branch2a_res5b_branch2a_relu res5b_branch2b_scale5b_branch2b -23330=4,3,7,7,512 0=512 1=3 4=1 5=1 6=2359296
|
||||
Eltwise res5b 2 1 res5a_res5a_relu_splitncnn_0 res5b_branch2b_scale5b_branch2b res5b -23330=4,3,7,7,512 0=1
|
||||
ReLU res5b_relu 1 1 res5b res5b_res5b_relu -23330=4,3,7,7,512
|
||||
Pooling pool5 1 1 res5b_res5b_relu pool5 -23330=4,3,1,1,512 0=1 1=7
|
||||
InnerProduct fc1000 1 1 pool5 fc1000 -23330=4,1,1000,1,1 0=1000 1=1 2=512000
|
||||
Softmax prob 1 1 fc1000 output -23330=4,1,1000,1,1
|
52
3rdparty/ncnn/benchmark/resnet18_int8.param
vendored
Normal file
52
3rdparty/ncnn/benchmark/resnet18_int8.param
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
7767517
|
||||
50 58
|
||||
Input data 0 1 data 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1_conv1_relu 0=64 1=7 3=2 4=3 5=1 6=9408 8=2 9=1
|
||||
Pooling pool1 1 1 conv1_conv1_relu pool1 1=3 2=2
|
||||
Split splitncnn_0 1 2 pool1 pool1_splitncnn_0 pool1_splitncnn_1
|
||||
Convolution res2a_branch1 1 1 pool1_splitncnn_1 res2a_branch1_scale2a_branch1 0=64 1=1 5=1 6=4096 8=2
|
||||
Convolution res2a_branch2a 1 1 pool1_splitncnn_0 res2a_branch2a_res2a_branch2a_relu 0=64 1=3 4=1 5=1 6=36864 8=102 9=1
|
||||
Convolution res2a_branch2b 1 1 res2a_branch2a_res2a_branch2a_relu res2a_branch2b_scale2a_branch2b 0=64 1=3 4=1 5=1 6=36864 8=2
|
||||
Eltwise res2a 2 1 res2a_branch1_scale2a_branch1 res2a_branch2b_scale2a_branch2b res2a 0=1
|
||||
ReLU res2a_relu 1 1 res2a res2a_res2a_relu
|
||||
Split splitncnn_1 1 2 res2a_res2a_relu res2a_res2a_relu_splitncnn_0 res2a_res2a_relu_splitncnn_1
|
||||
Convolution res2b_branch2a 1 1 res2a_res2a_relu_splitncnn_1 res2b_branch2a_res2b_branch2a_relu 0=64 1=3 4=1 5=1 6=36864 8=102 9=1
|
||||
Convolution res2b_branch2b 1 1 res2b_branch2a_res2b_branch2a_relu res2b_branch2b_scale2b_branch2b 0=64 1=3 4=1 5=1 6=36864 8=2
|
||||
Eltwise res2b 2 1 res2a_res2a_relu_splitncnn_0 res2b_branch2b_scale2b_branch2b res2b 0=1
|
||||
ReLU res2b_relu 1 1 res2b res2b_res2b_relu
|
||||
Split splitncnn_2 1 2 res2b_res2b_relu res2b_res2b_relu_splitncnn_0 res2b_res2b_relu_splitncnn_1
|
||||
Convolution res3a_branch1 1 1 res2b_res2b_relu_splitncnn_1 res3a_branch1_scale3a_branch1 0=128 1=1 3=2 5=1 6=8192 8=2
|
||||
Convolution res3a_branch2a 1 1 res2b_res2b_relu_splitncnn_0 res3a_branch2a_res3a_branch2a_relu 0=128 1=3 3=2 4=1 5=1 6=73728 8=102 9=1
|
||||
Convolution res3a_branch2b 1 1 res3a_branch2a_res3a_branch2a_relu res3a_branch2b_scale3a_branch2b 0=128 1=3 4=1 5=1 6=147456 8=2
|
||||
Eltwise res3a 2 1 res3a_branch1_scale3a_branch1 res3a_branch2b_scale3a_branch2b res3a 0=1
|
||||
ReLU res3a_relu 1 1 res3a res3a_res3a_relu
|
||||
Split splitncnn_3 1 2 res3a_res3a_relu res3a_res3a_relu_splitncnn_0 res3a_res3a_relu_splitncnn_1
|
||||
Convolution res3b_branch2a 1 1 res3a_res3a_relu_splitncnn_1 res3b_branch2a_res3b_branch2a_relu 0=128 1=3 4=1 5=1 6=147456 8=102 9=1
|
||||
Convolution res3b_branch2b 1 1 res3b_branch2a_res3b_branch2a_relu res3b_branch2b_scale3b_branch2b 0=128 1=3 4=1 5=1 6=147456 8=2
|
||||
Eltwise res3b 2 1 res3a_res3a_relu_splitncnn_0 res3b_branch2b_scale3b_branch2b res3b 0=1
|
||||
ReLU res3b_relu 1 1 res3b res3b_res3b_relu
|
||||
Split splitncnn_4 1 2 res3b_res3b_relu res3b_res3b_relu_splitncnn_0 res3b_res3b_relu_splitncnn_1
|
||||
Convolution res4a_branch1 1 1 res3b_res3b_relu_splitncnn_1 res4a_branch1_scale4a_branch1 0=256 1=1 3=2 5=1 6=32768 8=2
|
||||
Convolution res4a_branch2a 1 1 res3b_res3b_relu_splitncnn_0 res4a_branch2a_res4a_branch2a_relu 0=256 1=3 3=2 4=1 5=1 6=294912 8=102 9=1
|
||||
Convolution res4a_branch2b 1 1 res4a_branch2a_res4a_branch2a_relu res4a_branch2b_scale4a_branch2b 0=256 1=3 4=1 5=1 6=589824 8=2
|
||||
Eltwise res4a 2 1 res4a_branch1_scale4a_branch1 res4a_branch2b_scale4a_branch2b res4a 0=1
|
||||
ReLU res4a_relu 1 1 res4a res4a_res4a_relu
|
||||
Split splitncnn_5 1 2 res4a_res4a_relu res4a_res4a_relu_splitncnn_0 res4a_res4a_relu_splitncnn_1
|
||||
Convolution res4b_branch2a 1 1 res4a_res4a_relu_splitncnn_1 res4b_branch2a_res4b_branch2a_relu 0=256 1=3 4=1 5=1 6=589824 8=102 9=1
|
||||
Convolution res4b_branch2b 1 1 res4b_branch2a_res4b_branch2a_relu res4b_branch2b_scale4b_branch2b 0=256 1=3 4=1 5=1 6=589824 8=2
|
||||
Eltwise res4b 2 1 res4a_res4a_relu_splitncnn_0 res4b_branch2b_scale4b_branch2b res4b 0=1
|
||||
ReLU res4b_relu 1 1 res4b res4b_res4b_relu
|
||||
Split splitncnn_6 1 2 res4b_res4b_relu res4b_res4b_relu_splitncnn_0 res4b_res4b_relu_splitncnn_1
|
||||
Convolution res5a_branch1 1 1 res4b_res4b_relu_splitncnn_1 res5a_branch1_scale5a_branch1 0=512 1=1 3=2 5=1 6=131072 8=2
|
||||
Convolution res5a_branch2a 1 1 res4b_res4b_relu_splitncnn_0 res5a_branch2a_res5a_branch2a_relu 0=512 1=3 3=2 4=1 5=1 6=1179648 8=102 9=1
|
||||
Convolution res5a_branch2b 1 1 res5a_branch2a_res5a_branch2a_relu res5a_branch2b_scale5a_branch2b 0=512 1=3 4=1 5=1 6=2359296 8=2
|
||||
Eltwise res5a 2 1 res5a_branch1_scale5a_branch1 res5a_branch2b_scale5a_branch2b res5a 0=1
|
||||
ReLU res5a_relu 1 1 res5a res5a_res5a_relu
|
||||
Split splitncnn_7 1 2 res5a_res5a_relu res5a_res5a_relu_splitncnn_0 res5a_res5a_relu_splitncnn_1
|
||||
Convolution res5b_branch2a 1 1 res5a_res5a_relu_splitncnn_1 res5b_branch2a_res5b_branch2a_relu 0=512 1=3 4=1 5=1 6=2359296 8=102 9=1
|
||||
Convolution res5b_branch2b 1 1 res5b_branch2a_res5b_branch2a_relu res5b_branch2b_scale5b_branch2b 0=512 1=3 4=1 5=1 6=2359296 8=2
|
||||
Eltwise res5b 2 1 res5a_res5a_relu_splitncnn_0 res5b_branch2b_scale5b_branch2b res5b 0=1
|
||||
ReLU res5b_relu 1 1 res5b res5b_res5b_relu
|
||||
Pooling pool5 1 1 res5b_res5b_relu pool5 0=1 1=7
|
||||
InnerProduct fc1000 1 1 pool5 fc1000 0=1000 1=1 2=512000
|
||||
Softmax prob 1 1 fc1000 output
|
108
3rdparty/ncnn/benchmark/resnet50.param
vendored
Normal file
108
3rdparty/ncnn/benchmark/resnet50.param
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
7767517
|
||||
106 122
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1_conv1_relu -23330=4,3,112,112,64 0=64 1=7 3=2 4=3 5=1 6=9408 9=1
|
||||
Pooling pool1 1 1 conv1_conv1_relu pool1 -23330=4,3,56,56,64 1=3 2=2
|
||||
Split splitncnn_0 1 2 pool1 pool1_splitncnn_0 pool1_splitncnn_1 -23330=8,3,56,56,64,3,56,56,64
|
||||
Convolution res2a_branch1 1 1 pool1_splitncnn_1 res2a_branch1_scale2a_branch1 -23330=4,3,56,56,256 0=256 1=1 5=1 6=16384
|
||||
Convolution res2a_branch2a 1 1 pool1_splitncnn_0 res2a_branch2a_res2a_branch2a_relu -23330=4,3,56,56,64 0=64 1=1 5=1 6=4096 9=1
|
||||
Convolution res2a_branch2b 1 1 res2a_branch2a_res2a_branch2a_relu res2a_branch2b_res2a_branch2b_relu -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=36864 9=1
|
||||
Convolution res2a_branch2c 1 1 res2a_branch2b_res2a_branch2b_relu res2a_branch2c_scale2a_branch2c -23330=4,3,56,56,256 0=256 1=1 5=1 6=16384
|
||||
Eltwise res2a 2 1 res2a_branch1_scale2a_branch1 res2a_branch2c_scale2a_branch2c res2a -23330=4,3,56,56,256 0=1
|
||||
ReLU res2a_relu 1 1 res2a res2a_res2a_relu -23330=4,3,56,56,256
|
||||
Split splitncnn_1 1 2 res2a_res2a_relu res2a_res2a_relu_splitncnn_0 res2a_res2a_relu_splitncnn_1 -23330=8,3,56,56,256,3,56,56,256
|
||||
Convolution res2b_branch2a 1 1 res2a_res2a_relu_splitncnn_1 res2b_branch2a_res2b_branch2a_relu -23330=4,3,56,56,64 0=64 1=1 5=1 6=16384 9=1
|
||||
Convolution res2b_branch2b 1 1 res2b_branch2a_res2b_branch2a_relu res2b_branch2b_res2b_branch2b_relu -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=36864 9=1
|
||||
Convolution res2b_branch2c 1 1 res2b_branch2b_res2b_branch2b_relu res2b_branch2c_scale2b_branch2c -23330=4,3,56,56,256 0=256 1=1 5=1 6=16384
|
||||
Eltwise res2b 2 1 res2a_res2a_relu_splitncnn_0 res2b_branch2c_scale2b_branch2c res2b -23330=4,3,56,56,256 0=1
|
||||
ReLU res2b_relu 1 1 res2b res2b_res2b_relu -23330=4,3,56,56,256
|
||||
Split splitncnn_2 1 2 res2b_res2b_relu res2b_res2b_relu_splitncnn_0 res2b_res2b_relu_splitncnn_1 -23330=8,3,56,56,256,3,56,56,256
|
||||
Convolution res2c_branch2a 1 1 res2b_res2b_relu_splitncnn_1 res2c_branch2a_res2c_branch2a_relu -23330=4,3,56,56,64 0=64 1=1 5=1 6=16384 9=1
|
||||
Convolution res2c_branch2b 1 1 res2c_branch2a_res2c_branch2a_relu res2c_branch2b_res2c_branch2b_relu -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=36864 9=1
|
||||
Convolution res2c_branch2c 1 1 res2c_branch2b_res2c_branch2b_relu res2c_branch2c_scale2c_branch2c -23330=4,3,56,56,256 0=256 1=1 5=1 6=16384
|
||||
Eltwise res2c 2 1 res2b_res2b_relu_splitncnn_0 res2c_branch2c_scale2c_branch2c res2c -23330=4,3,56,56,256 0=1
|
||||
ReLU res2c_relu 1 1 res2c res2c_res2c_relu -23330=4,3,56,56,256
|
||||
Split splitncnn_3 1 2 res2c_res2c_relu res2c_res2c_relu_splitncnn_0 res2c_res2c_relu_splitncnn_1 -23330=8,3,56,56,256,3,56,56,256
|
||||
Convolution res3a_branch1 1 1 res2c_res2c_relu_splitncnn_1 res3a_branch1_scale3a_branch1 -23330=4,3,28,28,512 0=512 1=1 3=2 5=1 6=131072
|
||||
Convolution res3a_branch2a 1 1 res2c_res2c_relu_splitncnn_0 res3a_branch2a_res3a_branch2a_relu -23330=4,3,28,28,128 0=128 1=1 3=2 5=1 6=32768 9=1
|
||||
Convolution res3a_branch2b 1 1 res3a_branch2a_res3a_branch2a_relu res3a_branch2b_res3a_branch2b_relu -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=147456 9=1
|
||||
Convolution res3a_branch2c 1 1 res3a_branch2b_res3a_branch2b_relu res3a_branch2c_scale3a_branch2c -23330=4,3,28,28,512 0=512 1=1 5=1 6=65536
|
||||
Eltwise res3a 2 1 res3a_branch1_scale3a_branch1 res3a_branch2c_scale3a_branch2c res3a -23330=4,3,28,28,512 0=1
|
||||
ReLU res3a_relu 1 1 res3a res3a_res3a_relu -23330=4,3,28,28,512
|
||||
Split splitncnn_4 1 2 res3a_res3a_relu res3a_res3a_relu_splitncnn_0 res3a_res3a_relu_splitncnn_1 -23330=8,3,28,28,512,3,28,28,512
|
||||
Convolution res3b_branch2a 1 1 res3a_res3a_relu_splitncnn_1 res3b_branch2a_res3b_branch2a_relu -23330=4,3,28,28,128 0=128 1=1 5=1 6=65536 9=1
|
||||
Convolution res3b_branch2b 1 1 res3b_branch2a_res3b_branch2a_relu res3b_branch2b_res3b_branch2b_relu -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=147456 9=1
|
||||
Convolution res3b_branch2c 1 1 res3b_branch2b_res3b_branch2b_relu res3b_branch2c_scale3b_branch2c -23330=4,3,28,28,512 0=512 1=1 5=1 6=65536
|
||||
Eltwise res3b 2 1 res3a_res3a_relu_splitncnn_0 res3b_branch2c_scale3b_branch2c res3b -23330=4,3,28,28,512 0=1
|
||||
ReLU res3b_relu 1 1 res3b res3b_res3b_relu -23330=4,3,28,28,512
|
||||
Split splitncnn_5 1 2 res3b_res3b_relu res3b_res3b_relu_splitncnn_0 res3b_res3b_relu_splitncnn_1 -23330=8,3,28,28,512,3,28,28,512
|
||||
Convolution res3c_branch2a 1 1 res3b_res3b_relu_splitncnn_1 res3c_branch2a_res3c_branch2a_relu -23330=4,3,28,28,128 0=128 1=1 5=1 6=65536 9=1
|
||||
Convolution res3c_branch2b 1 1 res3c_branch2a_res3c_branch2a_relu res3c_branch2b_res3c_branch2b_relu -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=147456 9=1
|
||||
Convolution res3c_branch2c 1 1 res3c_branch2b_res3c_branch2b_relu res3c_branch2c_scale3c_branch2c -23330=4,3,28,28,512 0=512 1=1 5=1 6=65536
|
||||
Eltwise res3c 2 1 res3b_res3b_relu_splitncnn_0 res3c_branch2c_scale3c_branch2c res3c -23330=4,3,28,28,512 0=1
|
||||
ReLU res3c_relu 1 1 res3c res3c_res3c_relu -23330=4,3,28,28,512
|
||||
Split splitncnn_6 1 2 res3c_res3c_relu res3c_res3c_relu_splitncnn_0 res3c_res3c_relu_splitncnn_1 -23330=8,3,28,28,512,3,28,28,512
|
||||
Convolution res3d_branch2a 1 1 res3c_res3c_relu_splitncnn_1 res3d_branch2a_res3d_branch2a_relu -23330=4,3,28,28,128 0=128 1=1 5=1 6=65536 9=1
|
||||
Convolution res3d_branch2b 1 1 res3d_branch2a_res3d_branch2a_relu res3d_branch2b_res3d_branch2b_relu -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=147456 9=1
|
||||
Convolution res3d_branch2c 1 1 res3d_branch2b_res3d_branch2b_relu res3d_branch2c_scale3d_branch2c -23330=4,3,28,28,512 0=512 1=1 5=1 6=65536
|
||||
Eltwise res3d 2 1 res3c_res3c_relu_splitncnn_0 res3d_branch2c_scale3d_branch2c res3d -23330=4,3,28,28,512 0=1
|
||||
ReLU res3d_relu 1 1 res3d res3d_res3d_relu -23330=4,3,28,28,512
|
||||
Split splitncnn_7 1 2 res3d_res3d_relu res3d_res3d_relu_splitncnn_0 res3d_res3d_relu_splitncnn_1 -23330=8,3,28,28,512,3,28,28,512
|
||||
Convolution res4a_branch1 1 1 res3d_res3d_relu_splitncnn_1 res4a_branch1_scale4a_branch1 -23330=4,3,14,14,1024 0=1024 1=1 3=2 5=1 6=524288
|
||||
Convolution res4a_branch2a 1 1 res3d_res3d_relu_splitncnn_0 res4a_branch2a_res4a_branch2a_relu -23330=4,3,14,14,256 0=256 1=1 3=2 5=1 6=131072 9=1
|
||||
Convolution res4a_branch2b 1 1 res4a_branch2a_res4a_branch2a_relu res4a_branch2b_res4a_branch2b_relu -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Convolution res4a_branch2c 1 1 res4a_branch2b_res4a_branch2b_relu res4a_branch2c_scale4a_branch2c -23330=4,3,14,14,1024 0=1024 1=1 5=1 6=262144
|
||||
Eltwise res4a 2 1 res4a_branch1_scale4a_branch1 res4a_branch2c_scale4a_branch2c res4a -23330=4,3,14,14,1024 0=1
|
||||
ReLU res4a_relu 1 1 res4a res4a_res4a_relu -23330=4,3,14,14,1024
|
||||
Split splitncnn_8 1 2 res4a_res4a_relu res4a_res4a_relu_splitncnn_0 res4a_res4a_relu_splitncnn_1 -23330=8,3,14,14,1024,3,14,14,1024
|
||||
Convolution res4b_branch2a 1 1 res4a_res4a_relu_splitncnn_1 res4b_branch2a_res4b_branch2a_relu -23330=4,3,14,14,256 0=256 1=1 5=1 6=262144 9=1
|
||||
Convolution res4b_branch2b 1 1 res4b_branch2a_res4b_branch2a_relu res4b_branch2b_res4b_branch2b_relu -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Convolution res4b_branch2c 1 1 res4b_branch2b_res4b_branch2b_relu res4b_branch2c_scale4b_branch2c -23330=4,3,14,14,1024 0=1024 1=1 5=1 6=262144
|
||||
Eltwise res4b 2 1 res4a_res4a_relu_splitncnn_0 res4b_branch2c_scale4b_branch2c res4b -23330=4,3,14,14,1024 0=1
|
||||
ReLU res4b_relu 1 1 res4b res4b_res4b_relu -23330=4,3,14,14,1024
|
||||
Split splitncnn_9 1 2 res4b_res4b_relu res4b_res4b_relu_splitncnn_0 res4b_res4b_relu_splitncnn_1 -23330=8,3,14,14,1024,3,14,14,1024
|
||||
Convolution res4c_branch2a 1 1 res4b_res4b_relu_splitncnn_1 res4c_branch2a_res4c_branch2a_relu -23330=4,3,14,14,256 0=256 1=1 5=1 6=262144 9=1
|
||||
Convolution res4c_branch2b 1 1 res4c_branch2a_res4c_branch2a_relu res4c_branch2b_res4c_branch2b_relu -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Convolution res4c_branch2c 1 1 res4c_branch2b_res4c_branch2b_relu res4c_branch2c_scale4c_branch2c -23330=4,3,14,14,1024 0=1024 1=1 5=1 6=262144
|
||||
Eltwise res4c 2 1 res4b_res4b_relu_splitncnn_0 res4c_branch2c_scale4c_branch2c res4c -23330=4,3,14,14,1024 0=1
|
||||
ReLU res4c_relu 1 1 res4c res4c_res4c_relu -23330=4,3,14,14,1024
|
||||
Split splitncnn_10 1 2 res4c_res4c_relu res4c_res4c_relu_splitncnn_0 res4c_res4c_relu_splitncnn_1 -23330=8,3,14,14,1024,3,14,14,1024
|
||||
Convolution res4d_branch2a 1 1 res4c_res4c_relu_splitncnn_1 res4d_branch2a_res4d_branch2a_relu -23330=4,3,14,14,256 0=256 1=1 5=1 6=262144 9=1
|
||||
Convolution res4d_branch2b 1 1 res4d_branch2a_res4d_branch2a_relu res4d_branch2b_res4d_branch2b_relu -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Convolution res4d_branch2c 1 1 res4d_branch2b_res4d_branch2b_relu res4d_branch2c_scale4d_branch2c -23330=4,3,14,14,1024 0=1024 1=1 5=1 6=262144
|
||||
Eltwise res4d 2 1 res4c_res4c_relu_splitncnn_0 res4d_branch2c_scale4d_branch2c res4d -23330=4,3,14,14,1024 0=1
|
||||
ReLU res4d_relu 1 1 res4d res4d_res4d_relu -23330=4,3,14,14,1024
|
||||
Split splitncnn_11 1 2 res4d_res4d_relu res4d_res4d_relu_splitncnn_0 res4d_res4d_relu_splitncnn_1 -23330=8,3,14,14,1024,3,14,14,1024
|
||||
Convolution res4e_branch2a 1 1 res4d_res4d_relu_splitncnn_1 res4e_branch2a_res4e_branch2a_relu -23330=4,3,14,14,256 0=256 1=1 5=1 6=262144 9=1
|
||||
Convolution res4e_branch2b 1 1 res4e_branch2a_res4e_branch2a_relu res4e_branch2b_res4e_branch2b_relu -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Convolution res4e_branch2c 1 1 res4e_branch2b_res4e_branch2b_relu res4e_branch2c_scale4e_branch2c -23330=4,3,14,14,1024 0=1024 1=1 5=1 6=262144
|
||||
Eltwise res4e 2 1 res4d_res4d_relu_splitncnn_0 res4e_branch2c_scale4e_branch2c res4e -23330=4,3,14,14,1024 0=1
|
||||
ReLU res4e_relu 1 1 res4e res4e_res4e_relu -23330=4,3,14,14,1024
|
||||
Split splitncnn_12 1 2 res4e_res4e_relu res4e_res4e_relu_splitncnn_0 res4e_res4e_relu_splitncnn_1 -23330=8,3,14,14,1024,3,14,14,1024
|
||||
Convolution res4f_branch2a 1 1 res4e_res4e_relu_splitncnn_1 res4f_branch2a_res4f_branch2a_relu -23330=4,3,14,14,256 0=256 1=1 5=1 6=262144 9=1
|
||||
Convolution res4f_branch2b 1 1 res4f_branch2a_res4f_branch2a_relu res4f_branch2b_res4f_branch2b_relu -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Convolution res4f_branch2c 1 1 res4f_branch2b_res4f_branch2b_relu res4f_branch2c_scale4f_branch2c -23330=4,3,14,14,1024 0=1024 1=1 5=1 6=262144
|
||||
Eltwise res4f 2 1 res4e_res4e_relu_splitncnn_0 res4f_branch2c_scale4f_branch2c res4f -23330=4,3,14,14,1024 0=1
|
||||
ReLU res4f_relu 1 1 res4f res4f_res4f_relu -23330=4,3,14,14,1024
|
||||
Split splitncnn_13 1 2 res4f_res4f_relu res4f_res4f_relu_splitncnn_0 res4f_res4f_relu_splitncnn_1 -23330=8,3,14,14,1024,3,14,14,1024
|
||||
Convolution res5a_branch1 1 1 res4f_res4f_relu_splitncnn_1 res5a_branch1_scale5a_branch1 -23330=4,3,7,7,2048 0=2048 1=1 3=2 5=1 6=2097152
|
||||
Convolution res5a_branch2a 1 1 res4f_res4f_relu_splitncnn_0 res5a_branch2a_res5a_branch2a_relu -23330=4,3,7,7,512 0=512 1=1 3=2 5=1 6=524288 9=1
|
||||
Convolution res5a_branch2b 1 1 res5a_branch2a_res5a_branch2a_relu res5a_branch2b_res5a_branch2b_relu -23330=4,3,7,7,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Convolution res5a_branch2c 1 1 res5a_branch2b_res5a_branch2b_relu res5a_branch2c_scale5a_branch2c -23330=4,3,7,7,2048 0=2048 1=1 5=1 6=1048576
|
||||
Eltwise res5a 2 1 res5a_branch1_scale5a_branch1 res5a_branch2c_scale5a_branch2c res5a -23330=4,3,7,7,2048 0=1
|
||||
ReLU res5a_relu 1 1 res5a res5a_res5a_relu -23330=4,3,7,7,2048
|
||||
Split splitncnn_14 1 2 res5a_res5a_relu res5a_res5a_relu_splitncnn_0 res5a_res5a_relu_splitncnn_1 -23330=8,3,7,7,2048,3,7,7,2048
|
||||
Convolution res5b_branch2a 1 1 res5a_res5a_relu_splitncnn_1 res5b_branch2a_res5b_branch2a_relu -23330=4,3,7,7,512 0=512 1=1 5=1 6=1048576 9=1
|
||||
Convolution res5b_branch2b 1 1 res5b_branch2a_res5b_branch2a_relu res5b_branch2b_res5b_branch2b_relu -23330=4,3,7,7,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Convolution res5b_branch2c 1 1 res5b_branch2b_res5b_branch2b_relu res5b_branch2c_scale5b_branch2c -23330=4,3,7,7,2048 0=2048 1=1 5=1 6=1048576
|
||||
Eltwise res5b 2 1 res5a_res5a_relu_splitncnn_0 res5b_branch2c_scale5b_branch2c res5b -23330=4,3,7,7,2048 0=1
|
||||
ReLU res5b_relu 1 1 res5b res5b_res5b_relu -23330=4,3,7,7,2048
|
||||
Split splitncnn_15 1 2 res5b_res5b_relu res5b_res5b_relu_splitncnn_0 res5b_res5b_relu_splitncnn_1 -23330=8,3,7,7,2048,3,7,7,2048
|
||||
Convolution res5c_branch2a 1 1 res5b_res5b_relu_splitncnn_1 res5c_branch2a_res5c_branch2a_relu -23330=4,3,7,7,512 0=512 1=1 5=1 6=1048576 9=1
|
||||
Convolution res5c_branch2b 1 1 res5c_branch2a_res5c_branch2a_relu res5c_branch2b_res5c_branch2b_relu -23330=4,3,7,7,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Convolution res5c_branch2c 1 1 res5c_branch2b_res5c_branch2b_relu res5c_branch2c_scale5c_branch2c -23330=4,3,7,7,2048 0=2048 1=1 5=1 6=1048576
|
||||
Eltwise res5c 2 1 res5b_res5b_relu_splitncnn_0 res5c_branch2c_scale5c_branch2c res5c -23330=4,3,7,7,2048 0=1
|
||||
ReLU res5c_relu 1 1 res5c res5c_res5c_relu -23330=4,3,7,7,2048
|
||||
Pooling pool5 1 1 res5c_res5c_relu pool5 -23330=4,3,1,1,2048 0=1 1=7
|
||||
InnerProduct fc1000 1 1 pool5 fc1000 -23330=4,1,1000,1,1 0=1000 1=1 2=2048000
|
||||
Softmax prob 1 1 fc1000 output -23330=4,1,1000,1,1
|
108
3rdparty/ncnn/benchmark/resnet50_int8.param
vendored
Normal file
108
3rdparty/ncnn/benchmark/resnet50_int8.param
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
7767517
|
||||
106 122
|
||||
Input data 0 1 data 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1_conv1_relu 0=64 1=7 3=2 4=3 5=1 6=9408 8=2 9=1
|
||||
Pooling pool1 1 1 conv1_conv1_relu pool1 1=3 2=2
|
||||
Split splitncnn_0 1 2 pool1 pool1_splitncnn_0 pool1_splitncnn_1
|
||||
Convolution res2a_branch1 1 1 pool1_splitncnn_1 res2a_branch1_scale2a_branch1 0=256 1=1 5=1 6=16384 8=2
|
||||
Convolution res2a_branch2a 1 1 pool1_splitncnn_0 res2a_branch2a_res2a_branch2a_relu 0=64 1=1 5=1 6=4096 8=102 9=1
|
||||
Convolution res2a_branch2b 1 1 res2a_branch2a_res2a_branch2a_relu res2a_branch2b_res2a_branch2b_relu 0=64 1=3 4=1 5=1 6=36864 8=102 9=1
|
||||
Convolution res2a_branch2c 1 1 res2a_branch2b_res2a_branch2b_relu res2a_branch2c_scale2a_branch2c 0=256 1=1 5=1 6=16384 8=2
|
||||
Eltwise res2a 2 1 res2a_branch1_scale2a_branch1 res2a_branch2c_scale2a_branch2c res2a 0=1
|
||||
ReLU res2a_relu 1 1 res2a res2a_res2a_relu
|
||||
Split splitncnn_1 1 2 res2a_res2a_relu res2a_res2a_relu_splitncnn_0 res2a_res2a_relu_splitncnn_1
|
||||
Convolution res2b_branch2a 1 1 res2a_res2a_relu_splitncnn_1 res2b_branch2a_res2b_branch2a_relu 0=64 1=1 5=1 6=16384 8=102 9=1
|
||||
Convolution res2b_branch2b 1 1 res2b_branch2a_res2b_branch2a_relu res2b_branch2b_res2b_branch2b_relu 0=64 1=3 4=1 5=1 6=36864 8=102 9=1
|
||||
Convolution res2b_branch2c 1 1 res2b_branch2b_res2b_branch2b_relu res2b_branch2c_scale2b_branch2c 0=256 1=1 5=1 6=16384 8=2
|
||||
Eltwise res2b 2 1 res2a_res2a_relu_splitncnn_0 res2b_branch2c_scale2b_branch2c res2b 0=1
|
||||
ReLU res2b_relu 1 1 res2b res2b_res2b_relu
|
||||
Split splitncnn_2 1 2 res2b_res2b_relu res2b_res2b_relu_splitncnn_0 res2b_res2b_relu_splitncnn_1
|
||||
Convolution res2c_branch2a 1 1 res2b_res2b_relu_splitncnn_1 res2c_branch2a_res2c_branch2a_relu 0=64 1=1 5=1 6=16384 8=102 9=1
|
||||
Convolution res2c_branch2b 1 1 res2c_branch2a_res2c_branch2a_relu res2c_branch2b_res2c_branch2b_relu 0=64 1=3 4=1 5=1 6=36864 8=102 9=1
|
||||
Convolution res2c_branch2c 1 1 res2c_branch2b_res2c_branch2b_relu res2c_branch2c_scale2c_branch2c 0=256 1=1 5=1 6=16384 8=2
|
||||
Eltwise res2c 2 1 res2b_res2b_relu_splitncnn_0 res2c_branch2c_scale2c_branch2c res2c 0=1
|
||||
ReLU res2c_relu 1 1 res2c res2c_res2c_relu
|
||||
Split splitncnn_3 1 2 res2c_res2c_relu res2c_res2c_relu_splitncnn_0 res2c_res2c_relu_splitncnn_1
|
||||
Convolution res3a_branch1 1 1 res2c_res2c_relu_splitncnn_1 res3a_branch1_scale3a_branch1 0=512 1=1 3=2 5=1 6=131072 8=2
|
||||
Convolution res3a_branch2a 1 1 res2c_res2c_relu_splitncnn_0 res3a_branch2a_res3a_branch2a_relu 0=128 1=1 3=2 5=1 6=32768 8=102 9=1
|
||||
Convolution res3a_branch2b 1 1 res3a_branch2a_res3a_branch2a_relu res3a_branch2b_res3a_branch2b_relu 0=128 1=3 4=1 5=1 6=147456 8=102 9=1
|
||||
Convolution res3a_branch2c 1 1 res3a_branch2b_res3a_branch2b_relu res3a_branch2c_scale3a_branch2c 0=512 1=1 5=1 6=65536 8=2
|
||||
Eltwise res3a 2 1 res3a_branch1_scale3a_branch1 res3a_branch2c_scale3a_branch2c res3a 0=1
|
||||
ReLU res3a_relu 1 1 res3a res3a_res3a_relu
|
||||
Split splitncnn_4 1 2 res3a_res3a_relu res3a_res3a_relu_splitncnn_0 res3a_res3a_relu_splitncnn_1
|
||||
Convolution res3b_branch2a 1 1 res3a_res3a_relu_splitncnn_1 res3b_branch2a_res3b_branch2a_relu 0=128 1=1 5=1 6=65536 8=102 9=1
|
||||
Convolution res3b_branch2b 1 1 res3b_branch2a_res3b_branch2a_relu res3b_branch2b_res3b_branch2b_relu 0=128 1=3 4=1 5=1 6=147456 8=102 9=1
|
||||
Convolution res3b_branch2c 1 1 res3b_branch2b_res3b_branch2b_relu res3b_branch2c_scale3b_branch2c 0=512 1=1 5=1 6=65536 8=2
|
||||
Eltwise res3b 2 1 res3a_res3a_relu_splitncnn_0 res3b_branch2c_scale3b_branch2c res3b 0=1
|
||||
ReLU res3b_relu 1 1 res3b res3b_res3b_relu
|
||||
Split splitncnn_5 1 2 res3b_res3b_relu res3b_res3b_relu_splitncnn_0 res3b_res3b_relu_splitncnn_1
|
||||
Convolution res3c_branch2a 1 1 res3b_res3b_relu_splitncnn_1 res3c_branch2a_res3c_branch2a_relu 0=128 1=1 5=1 6=65536 8=102 9=1
|
||||
Convolution res3c_branch2b 1 1 res3c_branch2a_res3c_branch2a_relu res3c_branch2b_res3c_branch2b_relu 0=128 1=3 4=1 5=1 6=147456 8=102 9=1
|
||||
Convolution res3c_branch2c 1 1 res3c_branch2b_res3c_branch2b_relu res3c_branch2c_scale3c_branch2c 0=512 1=1 5=1 6=65536 8=2
|
||||
Eltwise res3c 2 1 res3b_res3b_relu_splitncnn_0 res3c_branch2c_scale3c_branch2c res3c 0=1
|
||||
ReLU res3c_relu 1 1 res3c res3c_res3c_relu
|
||||
Split splitncnn_6 1 2 res3c_res3c_relu res3c_res3c_relu_splitncnn_0 res3c_res3c_relu_splitncnn_1
|
||||
Convolution res3d_branch2a 1 1 res3c_res3c_relu_splitncnn_1 res3d_branch2a_res3d_branch2a_relu 0=128 1=1 5=1 6=65536 8=102 9=1
|
||||
Convolution res3d_branch2b 1 1 res3d_branch2a_res3d_branch2a_relu res3d_branch2b_res3d_branch2b_relu 0=128 1=3 4=1 5=1 6=147456 8=102 9=1
|
||||
Convolution res3d_branch2c 1 1 res3d_branch2b_res3d_branch2b_relu res3d_branch2c_scale3d_branch2c 0=512 1=1 5=1 6=65536 8=2
|
||||
Eltwise res3d 2 1 res3c_res3c_relu_splitncnn_0 res3d_branch2c_scale3d_branch2c res3d 0=1
|
||||
ReLU res3d_relu 1 1 res3d res3d_res3d_relu
|
||||
Split splitncnn_7 1 2 res3d_res3d_relu res3d_res3d_relu_splitncnn_0 res3d_res3d_relu_splitncnn_1
|
||||
Convolution res4a_branch1 1 1 res3d_res3d_relu_splitncnn_1 res4a_branch1_scale4a_branch1 0=1024 1=1 3=2 5=1 6=524288 8=2
|
||||
Convolution res4a_branch2a 1 1 res3d_res3d_relu_splitncnn_0 res4a_branch2a_res4a_branch2a_relu 0=256 1=1 3=2 5=1 6=131072 8=102 9=1
|
||||
Convolution res4a_branch2b 1 1 res4a_branch2a_res4a_branch2a_relu res4a_branch2b_res4a_branch2b_relu 0=256 1=3 4=1 5=1 6=589824 8=102 9=1
|
||||
Convolution res4a_branch2c 1 1 res4a_branch2b_res4a_branch2b_relu res4a_branch2c_scale4a_branch2c 0=1024 1=1 5=1 6=262144 8=2
|
||||
Eltwise res4a 2 1 res4a_branch1_scale4a_branch1 res4a_branch2c_scale4a_branch2c res4a 0=1
|
||||
ReLU res4a_relu 1 1 res4a res4a_res4a_relu
|
||||
Split splitncnn_8 1 2 res4a_res4a_relu res4a_res4a_relu_splitncnn_0 res4a_res4a_relu_splitncnn_1
|
||||
Convolution res4b_branch2a 1 1 res4a_res4a_relu_splitncnn_1 res4b_branch2a_res4b_branch2a_relu 0=256 1=1 5=1 6=262144 8=102 9=1
|
||||
Convolution res4b_branch2b 1 1 res4b_branch2a_res4b_branch2a_relu res4b_branch2b_res4b_branch2b_relu 0=256 1=3 4=1 5=1 6=589824 8=102 9=1
|
||||
Convolution res4b_branch2c 1 1 res4b_branch2b_res4b_branch2b_relu res4b_branch2c_scale4b_branch2c 0=1024 1=1 5=1 6=262144 8=2
|
||||
Eltwise res4b 2 1 res4a_res4a_relu_splitncnn_0 res4b_branch2c_scale4b_branch2c res4b 0=1
|
||||
ReLU res4b_relu 1 1 res4b res4b_res4b_relu
|
||||
Split splitncnn_9 1 2 res4b_res4b_relu res4b_res4b_relu_splitncnn_0 res4b_res4b_relu_splitncnn_1
|
||||
Convolution res4c_branch2a 1 1 res4b_res4b_relu_splitncnn_1 res4c_branch2a_res4c_branch2a_relu 0=256 1=1 5=1 6=262144 8=102 9=1
|
||||
Convolution res4c_branch2b 1 1 res4c_branch2a_res4c_branch2a_relu res4c_branch2b_res4c_branch2b_relu 0=256 1=3 4=1 5=1 6=589824 8=102 9=1
|
||||
Convolution res4c_branch2c 1 1 res4c_branch2b_res4c_branch2b_relu res4c_branch2c_scale4c_branch2c 0=1024 1=1 5=1 6=262144 8=2
|
||||
Eltwise res4c 2 1 res4b_res4b_relu_splitncnn_0 res4c_branch2c_scale4c_branch2c res4c 0=1
|
||||
ReLU res4c_relu 1 1 res4c res4c_res4c_relu
|
||||
Split splitncnn_10 1 2 res4c_res4c_relu res4c_res4c_relu_splitncnn_0 res4c_res4c_relu_splitncnn_1
|
||||
Convolution res4d_branch2a 1 1 res4c_res4c_relu_splitncnn_1 res4d_branch2a_res4d_branch2a_relu 0=256 1=1 5=1 6=262144 8=102 9=1
|
||||
Convolution res4d_branch2b 1 1 res4d_branch2a_res4d_branch2a_relu res4d_branch2b_res4d_branch2b_relu 0=256 1=3 4=1 5=1 6=589824 8=102 9=1
|
||||
Convolution res4d_branch2c 1 1 res4d_branch2b_res4d_branch2b_relu res4d_branch2c_scale4d_branch2c 0=1024 1=1 5=1 6=262144 8=2
|
||||
Eltwise res4d 2 1 res4c_res4c_relu_splitncnn_0 res4d_branch2c_scale4d_branch2c res4d 0=1
|
||||
ReLU res4d_relu 1 1 res4d res4d_res4d_relu
|
||||
Split splitncnn_11 1 2 res4d_res4d_relu res4d_res4d_relu_splitncnn_0 res4d_res4d_relu_splitncnn_1
|
||||
Convolution res4e_branch2a 1 1 res4d_res4d_relu_splitncnn_1 res4e_branch2a_res4e_branch2a_relu 0=256 1=1 5=1 6=262144 8=102 9=1
|
||||
Convolution res4e_branch2b 1 1 res4e_branch2a_res4e_branch2a_relu res4e_branch2b_res4e_branch2b_relu 0=256 1=3 4=1 5=1 6=589824 8=102 9=1
|
||||
Convolution res4e_branch2c 1 1 res4e_branch2b_res4e_branch2b_relu res4e_branch2c_scale4e_branch2c 0=1024 1=1 5=1 6=262144 8=2
|
||||
Eltwise res4e 2 1 res4d_res4d_relu_splitncnn_0 res4e_branch2c_scale4e_branch2c res4e 0=1
|
||||
ReLU res4e_relu 1 1 res4e res4e_res4e_relu
|
||||
Split splitncnn_12 1 2 res4e_res4e_relu res4e_res4e_relu_splitncnn_0 res4e_res4e_relu_splitncnn_1
|
||||
Convolution res4f_branch2a 1 1 res4e_res4e_relu_splitncnn_1 res4f_branch2a_res4f_branch2a_relu 0=256 1=1 5=1 6=262144 8=102 9=1
|
||||
Convolution res4f_branch2b 1 1 res4f_branch2a_res4f_branch2a_relu res4f_branch2b_res4f_branch2b_relu 0=256 1=3 4=1 5=1 6=589824 8=102 9=1
|
||||
Convolution res4f_branch2c 1 1 res4f_branch2b_res4f_branch2b_relu res4f_branch2c_scale4f_branch2c 0=1024 1=1 5=1 6=262144 8=2
|
||||
Eltwise res4f 2 1 res4e_res4e_relu_splitncnn_0 res4f_branch2c_scale4f_branch2c res4f 0=1
|
||||
ReLU res4f_relu 1 1 res4f res4f_res4f_relu
|
||||
Split splitncnn_13 1 2 res4f_res4f_relu res4f_res4f_relu_splitncnn_0 res4f_res4f_relu_splitncnn_1
|
||||
Convolution res5a_branch1 1 1 res4f_res4f_relu_splitncnn_1 res5a_branch1_scale5a_branch1 0=2048 1=1 3=2 5=1 6=2097152 8=2
|
||||
Convolution res5a_branch2a 1 1 res4f_res4f_relu_splitncnn_0 res5a_branch2a_res5a_branch2a_relu 0=512 1=1 3=2 5=1 6=524288 8=102 9=1
|
||||
Convolution res5a_branch2b 1 1 res5a_branch2a_res5a_branch2a_relu res5a_branch2b_res5a_branch2b_relu 0=512 1=3 4=1 5=1 6=2359296 8=102 9=1
|
||||
Convolution res5a_branch2c 1 1 res5a_branch2b_res5a_branch2b_relu res5a_branch2c_scale5a_branch2c 0=2048 1=1 5=1 6=1048576 8=2
|
||||
Eltwise res5a 2 1 res5a_branch1_scale5a_branch1 res5a_branch2c_scale5a_branch2c res5a 0=1
|
||||
ReLU res5a_relu 1 1 res5a res5a_res5a_relu
|
||||
Split splitncnn_14 1 2 res5a_res5a_relu res5a_res5a_relu_splitncnn_0 res5a_res5a_relu_splitncnn_1
|
||||
Convolution res5b_branch2a 1 1 res5a_res5a_relu_splitncnn_1 res5b_branch2a_res5b_branch2a_relu 0=512 1=1 5=1 6=1048576 8=102 9=1
|
||||
Convolution res5b_branch2b 1 1 res5b_branch2a_res5b_branch2a_relu res5b_branch2b_res5b_branch2b_relu 0=512 1=3 4=1 5=1 6=2359296 8=102 9=1
|
||||
Convolution res5b_branch2c 1 1 res5b_branch2b_res5b_branch2b_relu res5b_branch2c_scale5b_branch2c 0=2048 1=1 5=1 6=1048576 8=2
|
||||
Eltwise res5b 2 1 res5a_res5a_relu_splitncnn_0 res5b_branch2c_scale5b_branch2c res5b 0=1
|
||||
ReLU res5b_relu 1 1 res5b res5b_res5b_relu
|
||||
Split splitncnn_15 1 2 res5b_res5b_relu res5b_res5b_relu_splitncnn_0 res5b_res5b_relu_splitncnn_1
|
||||
Convolution res5c_branch2a 1 1 res5b_res5b_relu_splitncnn_1 res5c_branch2a_res5c_branch2a_relu 0=512 1=1 5=1 6=1048576 8=102 9=1
|
||||
Convolution res5c_branch2b 1 1 res5c_branch2a_res5c_branch2a_relu res5c_branch2b_res5c_branch2b_relu 0=512 1=3 4=1 5=1 6=2359296 8=102 9=1
|
||||
Convolution res5c_branch2c 1 1 res5c_branch2b_res5c_branch2b_relu res5c_branch2c_scale5c_branch2c 0=2048 1=1 5=1 6=1048576 8=2
|
||||
Eltwise res5c 2 1 res5b_res5b_relu_splitncnn_0 res5c_branch2c_scale5c_branch2c res5c 0=1
|
||||
ReLU res5c_relu 1 1 res5c res5c_res5c_relu
|
||||
Pooling pool5 1 1 res5c_res5c_relu pool5 0=1 1=7
|
||||
InnerProduct fc1000 1 1 pool5 fc1000 0=1000 1=1 2=2048000
|
||||
Softmax prob 1 1 fc1000 output
|
122
3rdparty/ncnn/benchmark/shufflenet.param
vendored
Normal file
122
3rdparty/ncnn/benchmark/shufflenet.param
vendored
Normal file
@ -0,0 +1,122 @@
|
||||
7767517
|
||||
120 136
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1_conv1_relu -23330=4,3,112,112,24 0=24 1=3 3=2 4=1 5=1 6=648 9=1
|
||||
Pooling pool1 1 1 conv1_conv1_relu pool1 -23330=4,3,56,56,24 1=3 2=2
|
||||
Split splitncnn_0 1 2 pool1 pool1_splitncnn_0 pool1_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
|
||||
Pooling resx1_match_conv 1 1 pool1_splitncnn_1 resx1_match_conv -23330=4,3,28,28,24 0=1 1=3 2=2
|
||||
Convolution resx1_conv1 1 1 pool1_splitncnn_0 resx1_conv1_resx1_conv1_relu -23330=4,3,56,56,54 0=54 1=1 5=1 6=1296 9=1
|
||||
ConvolutionDepthWise resx1_conv2 1 1 resx1_conv1_resx1_conv1_relu resx1_conv2_resx1_conv2_scale -23330=4,3,28,28,54 0=54 1=3 3=2 4=1 5=1 6=486 7=54
|
||||
ConvolutionDepthWise resx1_conv3 1 1 resx1_conv2_resx1_conv2_scale resx1_conv3_resx1_conv3_scale -23330=4,3,28,28,216 0=216 1=1 5=1 6=3888 7=3
|
||||
Concat resx1_concat 2 1 resx1_match_conv resx1_conv3_resx1_conv3_scale resx1_concat -23330=4,3,28,28,240
|
||||
ReLU resx1_concat_relu 1 1 resx1_concat resx1_concat_resx1_concat_relu -23330=4,3,28,28,240
|
||||
Split splitncnn_1 1 2 resx1_concat_resx1_concat_relu resx1_concat_resx1_concat_relu_splitncnn_0 resx1_concat_resx1_concat_relu_splitncnn_1 -23330=8,3,28,28,240,3,28,28,240
|
||||
ConvolutionDepthWise resx2_conv1 1 1 resx1_concat_resx1_concat_relu_splitncnn_1 resx2_conv1_resx2_conv1_relu -23330=4,3,28,28,60 0=60 1=1 5=1 6=4800 7=3 9=1
|
||||
ShuffleChannel shuffle2 1 1 resx2_conv1_resx2_conv1_relu shuffle2 -23330=4,3,28,28,60 0=3
|
||||
ConvolutionDepthWise resx2_conv2 1 1 shuffle2 resx2_conv2_resx2_conv2_scale -23330=4,3,28,28,60 0=60 1=3 4=1 5=1 6=540 7=60
|
||||
ConvolutionDepthWise resx2_conv3 1 1 resx2_conv2_resx2_conv2_scale resx2_conv3_resx2_conv3_scale -23330=4,3,28,28,240 0=240 1=1 5=1 6=4800 7=3
|
||||
Eltwise resx2_elewise 2 1 resx1_concat_resx1_concat_relu_splitncnn_0 resx2_conv3_resx2_conv3_scale resx2_elewise -23330=4,3,28,28,240 0=1
|
||||
ReLU resx2_elewise_relu 1 1 resx2_elewise resx2_elewise_resx2_elewise_relu -23330=4,3,28,28,240
|
||||
Split splitncnn_2 1 2 resx2_elewise_resx2_elewise_relu resx2_elewise_resx2_elewise_relu_splitncnn_0 resx2_elewise_resx2_elewise_relu_splitncnn_1 -23330=8,3,28,28,240,3,28,28,240
|
||||
ConvolutionDepthWise resx3_conv1 1 1 resx2_elewise_resx2_elewise_relu_splitncnn_1 resx3_conv1_resx3_conv1_relu -23330=4,3,28,28,60 0=60 1=1 5=1 6=4800 7=3 9=1
|
||||
ShuffleChannel shuffle3 1 1 resx3_conv1_resx3_conv1_relu shuffle3 -23330=4,3,28,28,60 0=3
|
||||
ConvolutionDepthWise resx3_conv2 1 1 shuffle3 resx3_conv2_resx3_conv2_scale -23330=4,3,28,28,60 0=60 1=3 4=1 5=1 6=540 7=60
|
||||
ConvolutionDepthWise resx3_conv3 1 1 resx3_conv2_resx3_conv2_scale resx3_conv3_resx3_conv3_scale -23330=4,3,28,28,240 0=240 1=1 5=1 6=4800 7=3
|
||||
Eltwise resx3_elewise 2 1 resx2_elewise_resx2_elewise_relu_splitncnn_0 resx3_conv3_resx3_conv3_scale resx3_elewise -23330=4,3,28,28,240 0=1
|
||||
ReLU resx3_elewise_relu 1 1 resx3_elewise resx3_elewise_resx3_elewise_relu -23330=4,3,28,28,240
|
||||
Split splitncnn_3 1 2 resx3_elewise_resx3_elewise_relu resx3_elewise_resx3_elewise_relu_splitncnn_0 resx3_elewise_resx3_elewise_relu_splitncnn_1 -23330=8,3,28,28,240,3,28,28,240
|
||||
ConvolutionDepthWise resx4_conv1 1 1 resx3_elewise_resx3_elewise_relu_splitncnn_1 resx4_conv1_resx4_conv1_relu -23330=4,3,28,28,60 0=60 1=1 5=1 6=4800 7=3 9=1
|
||||
ShuffleChannel shuffle4 1 1 resx4_conv1_resx4_conv1_relu shuffle4 -23330=4,3,28,28,60 0=3
|
||||
ConvolutionDepthWise resx4_conv2 1 1 shuffle4 resx4_conv2_resx4_conv2_scale -23330=4,3,28,28,60 0=60 1=3 4=1 5=1 6=540 7=60
|
||||
ConvolutionDepthWise resx4_conv3 1 1 resx4_conv2_resx4_conv2_scale resx4_conv3_resx4_conv3_scale -23330=4,3,28,28,240 0=240 1=1 5=1 6=4800 7=3
|
||||
Eltwise resx4_elewise 2 1 resx3_elewise_resx3_elewise_relu_splitncnn_0 resx4_conv3_resx4_conv3_scale resx4_elewise -23330=4,3,28,28,240 0=1
|
||||
ReLU resx4_elewise_relu 1 1 resx4_elewise resx4_elewise_resx4_elewise_relu -23330=4,3,28,28,240
|
||||
Split splitncnn_4 1 2 resx4_elewise_resx4_elewise_relu resx4_elewise_resx4_elewise_relu_splitncnn_0 resx4_elewise_resx4_elewise_relu_splitncnn_1 -23330=8,3,28,28,240,3,28,28,240
|
||||
Pooling resx5_match_conv 1 1 resx4_elewise_resx4_elewise_relu_splitncnn_1 resx5_match_conv -23330=4,3,14,14,240 0=1 1=3 2=2
|
||||
ConvolutionDepthWise resx5_conv1 1 1 resx4_elewise_resx4_elewise_relu_splitncnn_0 resx5_conv1_resx5_conv1_relu -23330=4,3,28,28,60 0=60 1=1 5=1 6=4800 7=3 9=1
|
||||
ShuffleChannel shuffle5 1 1 resx5_conv1_resx5_conv1_relu shuffle5 -23330=4,3,28,28,60 0=3
|
||||
ConvolutionDepthWise resx5_conv2 1 1 shuffle5 resx5_conv2_resx5_conv2_scale -23330=4,3,14,14,60 0=60 1=3 3=2 4=1 5=1 6=540 7=60
|
||||
ConvolutionDepthWise resx5_conv3 1 1 resx5_conv2_resx5_conv2_scale resx5_conv3_resx5_conv3_scale -23330=4,3,14,14,240 0=240 1=1 5=1 6=4800 7=3
|
||||
Concat resx5_concat 2 1 resx5_match_conv resx5_conv3_resx5_conv3_scale resx5_concat -23330=4,3,14,14,480
|
||||
ReLU resx5_concat_relu 1 1 resx5_concat resx5_concat_resx5_concat_relu -23330=4,3,14,14,480
|
||||
Split splitncnn_5 1 2 resx5_concat_resx5_concat_relu resx5_concat_resx5_concat_relu_splitncnn_0 resx5_concat_resx5_concat_relu_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
ConvolutionDepthWise resx6_conv1 1 1 resx5_concat_resx5_concat_relu_splitncnn_1 resx6_conv1_resx6_conv1_relu -23330=4,3,14,14,120 0=120 1=1 5=1 6=19200 7=3 9=1
|
||||
ShuffleChannel shuffle6 1 1 resx6_conv1_resx6_conv1_relu shuffle6 -23330=4,3,14,14,120 0=3
|
||||
ConvolutionDepthWise resx6_conv2 1 1 shuffle6 resx6_conv2_resx6_conv2_scale -23330=4,3,14,14,120 0=120 1=3 4=1 5=1 6=1080 7=120
|
||||
ConvolutionDepthWise resx6_conv3 1 1 resx6_conv2_resx6_conv2_scale resx6_conv3_resx6_conv3_scale -23330=4,3,14,14,480 0=480 1=1 5=1 6=19200 7=3
|
||||
Eltwise resx6_elewise 2 1 resx5_concat_resx5_concat_relu_splitncnn_0 resx6_conv3_resx6_conv3_scale resx6_elewise -23330=4,3,14,14,480 0=1
|
||||
ReLU resx6_elewise_relu 1 1 resx6_elewise resx6_elewise_resx6_elewise_relu -23330=4,3,14,14,480
|
||||
Split splitncnn_6 1 2 resx6_elewise_resx6_elewise_relu resx6_elewise_resx6_elewise_relu_splitncnn_0 resx6_elewise_resx6_elewise_relu_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
ConvolutionDepthWise resx7_conv1 1 1 resx6_elewise_resx6_elewise_relu_splitncnn_1 resx7_conv1_resx7_conv1_relu -23330=4,3,14,14,120 0=120 1=1 5=1 6=19200 7=3 9=1
|
||||
ShuffleChannel shuffle7 1 1 resx7_conv1_resx7_conv1_relu shuffle7 -23330=4,3,14,14,120 0=3
|
||||
ConvolutionDepthWise resx7_conv2 1 1 shuffle7 resx7_conv2_resx7_conv2_scale -23330=4,3,14,14,120 0=120 1=3 4=1 5=1 6=1080 7=120
|
||||
ConvolutionDepthWise resx7_conv3 1 1 resx7_conv2_resx7_conv2_scale resx7_conv3_resx7_conv3_scale -23330=4,3,14,14,480 0=480 1=1 5=1 6=19200 7=3
|
||||
Eltwise resx7_elewise 2 1 resx6_elewise_resx6_elewise_relu_splitncnn_0 resx7_conv3_resx7_conv3_scale resx7_elewise -23330=4,3,14,14,480 0=1
|
||||
ReLU resx7_elewise_relu 1 1 resx7_elewise resx7_elewise_resx7_elewise_relu -23330=4,3,14,14,480
|
||||
Split splitncnn_7 1 2 resx7_elewise_resx7_elewise_relu resx7_elewise_resx7_elewise_relu_splitncnn_0 resx7_elewise_resx7_elewise_relu_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
ConvolutionDepthWise resx8_conv1 1 1 resx7_elewise_resx7_elewise_relu_splitncnn_1 resx8_conv1_resx8_conv1_relu -23330=4,3,14,14,120 0=120 1=1 5=1 6=19200 7=3 9=1
|
||||
ShuffleChannel shuffle8 1 1 resx8_conv1_resx8_conv1_relu shuffle8 -23330=4,3,14,14,120 0=3
|
||||
ConvolutionDepthWise resx8_conv2 1 1 shuffle8 resx8_conv2_resx8_conv2_scale -23330=4,3,14,14,120 0=120 1=3 4=1 5=1 6=1080 7=120
|
||||
ConvolutionDepthWise resx8_conv3 1 1 resx8_conv2_resx8_conv2_scale resx8_conv3_resx8_conv3_scale -23330=4,3,14,14,480 0=480 1=1 5=1 6=19200 7=3
|
||||
Eltwise resx8_elewise 2 1 resx7_elewise_resx7_elewise_relu_splitncnn_0 resx8_conv3_resx8_conv3_scale resx8_elewise -23330=4,3,14,14,480 0=1
|
||||
ReLU resx8_elewise_relu 1 1 resx8_elewise resx8_elewise_resx8_elewise_relu -23330=4,3,14,14,480
|
||||
Split splitncnn_8 1 2 resx8_elewise_resx8_elewise_relu resx8_elewise_resx8_elewise_relu_splitncnn_0 resx8_elewise_resx8_elewise_relu_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
ConvolutionDepthWise resx9_conv1 1 1 resx8_elewise_resx8_elewise_relu_splitncnn_1 resx9_conv1_resx9_conv1_relu -23330=4,3,14,14,120 0=120 1=1 5=1 6=19200 7=3 9=1
|
||||
ShuffleChannel shuffle9 1 1 resx9_conv1_resx9_conv1_relu shuffle9 -23330=4,3,14,14,120 0=3
|
||||
ConvolutionDepthWise resx9_conv2 1 1 shuffle9 resx9_conv2_resx9_conv2_scale -23330=4,3,14,14,120 0=120 1=3 4=1 5=1 6=1080 7=120
|
||||
ConvolutionDepthWise resx9_conv3 1 1 resx9_conv2_resx9_conv2_scale resx9_conv3_resx9_conv3_scale -23330=4,3,14,14,480 0=480 1=1 5=1 6=19200 7=3
|
||||
Eltwise resx9_elewise 2 1 resx8_elewise_resx8_elewise_relu_splitncnn_0 resx9_conv3_resx9_conv3_scale resx9_elewise -23330=4,3,14,14,480 0=1
|
||||
ReLU resx9_elewise_relu 1 1 resx9_elewise resx9_elewise_resx9_elewise_relu -23330=4,3,14,14,480
|
||||
Split splitncnn_9 1 2 resx9_elewise_resx9_elewise_relu resx9_elewise_resx9_elewise_relu_splitncnn_0 resx9_elewise_resx9_elewise_relu_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
ConvolutionDepthWise resx10_conv1 1 1 resx9_elewise_resx9_elewise_relu_splitncnn_1 resx10_conv1_resx10_conv1_relu -23330=4,3,14,14,120 0=120 1=1 5=1 6=19200 7=3 9=1
|
||||
ShuffleChannel shuffle10 1 1 resx10_conv1_resx10_conv1_relu shuffle10 -23330=4,3,14,14,120 0=3
|
||||
ConvolutionDepthWise resx10_conv2 1 1 shuffle10 resx10_conv2_resx10_conv2_scale -23330=4,3,14,14,120 0=120 1=3 4=1 5=1 6=1080 7=120
|
||||
ConvolutionDepthWise resx10_conv3 1 1 resx10_conv2_resx10_conv2_scale resx10_conv3_resx10_conv3_scale -23330=4,3,14,14,480 0=480 1=1 5=1 6=19200 7=3
|
||||
Eltwise resx10_elewise 2 1 resx9_elewise_resx9_elewise_relu_splitncnn_0 resx10_conv3_resx10_conv3_scale resx10_elewise -23330=4,3,14,14,480 0=1
|
||||
ReLU resx10_elewise_relu 1 1 resx10_elewise resx10_elewise_resx10_elewise_relu -23330=4,3,14,14,480
|
||||
Split splitncnn_10 1 2 resx10_elewise_resx10_elewise_relu resx10_elewise_resx10_elewise_relu_splitncnn_0 resx10_elewise_resx10_elewise_relu_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
ConvolutionDepthWise resx11_conv1 1 1 resx10_elewise_resx10_elewise_relu_splitncnn_1 resx11_conv1_resx11_conv1_relu -23330=4,3,14,14,120 0=120 1=1 5=1 6=19200 7=3 9=1
|
||||
ShuffleChannel shuffle11 1 1 resx11_conv1_resx11_conv1_relu shuffle11 -23330=4,3,14,14,120 0=3
|
||||
ConvolutionDepthWise resx11_conv2 1 1 shuffle11 resx11_conv2_resx11_conv2_scale -23330=4,3,14,14,120 0=120 1=3 4=1 5=1 6=1080 7=120
|
||||
ConvolutionDepthWise resx11_conv3 1 1 resx11_conv2_resx11_conv2_scale resx11_conv3_resx11_conv3_scale -23330=4,3,14,14,480 0=480 1=1 5=1 6=19200 7=3
|
||||
Eltwise resx11_elewise 2 1 resx10_elewise_resx10_elewise_relu_splitncnn_0 resx11_conv3_resx11_conv3_scale resx11_elewise -23330=4,3,14,14,480 0=1
|
||||
ReLU resx11_elewise_relu 1 1 resx11_elewise resx11_elewise_resx11_elewise_relu -23330=4,3,14,14,480
|
||||
Split splitncnn_11 1 2 resx11_elewise_resx11_elewise_relu resx11_elewise_resx11_elewise_relu_splitncnn_0 resx11_elewise_resx11_elewise_relu_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
ConvolutionDepthWise resx12_conv1 1 1 resx11_elewise_resx11_elewise_relu_splitncnn_1 resx12_conv1_resx12_conv1_relu -23330=4,3,14,14,120 0=120 1=1 5=1 6=19200 7=3 9=1
|
||||
ShuffleChannel shuffle12 1 1 resx12_conv1_resx12_conv1_relu shuffle12 -23330=4,3,14,14,120 0=3
|
||||
ConvolutionDepthWise resx12_conv2 1 1 shuffle12 resx12_conv2_resx12_conv2_scale -23330=4,3,14,14,120 0=120 1=3 4=1 5=1 6=1080 7=120
|
||||
ConvolutionDepthWise resx12_conv3 1 1 resx12_conv2_resx12_conv2_scale resx12_conv3_resx12_conv3_scale -23330=4,3,14,14,480 0=480 1=1 5=1 6=19200 7=3
|
||||
Eltwise resx12_elewise 2 1 resx11_elewise_resx11_elewise_relu_splitncnn_0 resx12_conv3_resx12_conv3_scale resx12_elewise -23330=4,3,14,14,480 0=1
|
||||
ReLU resx12_elewise_relu 1 1 resx12_elewise resx12_elewise_resx12_elewise_relu -23330=4,3,14,14,480
|
||||
Split splitncnn_12 1 2 resx12_elewise_resx12_elewise_relu resx12_elewise_resx12_elewise_relu_splitncnn_0 resx12_elewise_resx12_elewise_relu_splitncnn_1 -23330=8,3,14,14,480,3,14,14,480
|
||||
Pooling resx13_match_conv 1 1 resx12_elewise_resx12_elewise_relu_splitncnn_1 resx13_match_conv -23330=4,3,7,7,480 0=1 1=3 2=2
|
||||
ConvolutionDepthWise resx13_conv1 1 1 resx12_elewise_resx12_elewise_relu_splitncnn_0 resx13_conv1_resx13_conv1_relu -23330=4,3,14,14,120 0=120 1=1 5=1 6=19200 7=3 9=1
|
||||
ShuffleChannel shuffle13 1 1 resx13_conv1_resx13_conv1_relu shuffle13 -23330=4,3,14,14,120 0=3
|
||||
ConvolutionDepthWise resx13_conv2 1 1 shuffle13 resx13_conv2_resx13_conv2_scale -23330=4,3,7,7,120 0=120 1=3 3=2 4=1 5=1 6=1080 7=120
|
||||
ConvolutionDepthWise resx13_conv3 1 1 resx13_conv2_resx13_conv2_scale resx13_conv3_resx13_conv3_scale -23330=4,3,7,7,480 0=480 1=1 5=1 6=19200 7=3
|
||||
Concat resx13_concat 2 1 resx13_match_conv resx13_conv3_resx13_conv3_scale resx13_concat -23330=4,3,7,7,960
|
||||
ReLU resx13_concat_relu 1 1 resx13_concat resx13_concat_resx13_concat_relu -23330=4,3,7,7,960
|
||||
Split splitncnn_13 1 2 resx13_concat_resx13_concat_relu resx13_concat_resx13_concat_relu_splitncnn_0 resx13_concat_resx13_concat_relu_splitncnn_1 -23330=8,3,7,7,960,3,7,7,960
|
||||
ConvolutionDepthWise resx14_conv1 1 1 resx13_concat_resx13_concat_relu_splitncnn_1 resx14_conv1_resx14_conv1_relu -23330=4,3,7,7,240 0=240 1=1 5=1 6=76800 7=3 9=1
|
||||
ShuffleChannel shuffle14 1 1 resx14_conv1_resx14_conv1_relu shuffle14 -23330=4,3,7,7,240 0=3
|
||||
ConvolutionDepthWise resx14_conv2 1 1 shuffle14 resx14_conv2_resx14_conv2_scale -23330=4,3,7,7,240 0=240 1=3 4=1 5=1 6=2160 7=240
|
||||
ConvolutionDepthWise resx14_conv3 1 1 resx14_conv2_resx14_conv2_scale resx14_conv3_resx14_conv3_scale -23330=4,3,7,7,960 0=960 1=1 5=1 6=76800 7=3
|
||||
Eltwise resx14_elewise 2 1 resx13_concat_resx13_concat_relu_splitncnn_0 resx14_conv3_resx14_conv3_scale resx14_elewise -23330=4,3,7,7,960 0=1
|
||||
ReLU resx14_elewise_relu 1 1 resx14_elewise resx14_elewise_resx14_elewise_relu -23330=4,3,7,7,960
|
||||
Split splitncnn_14 1 2 resx14_elewise_resx14_elewise_relu resx14_elewise_resx14_elewise_relu_splitncnn_0 resx14_elewise_resx14_elewise_relu_splitncnn_1 -23330=8,3,7,7,960,3,7,7,960
|
||||
ConvolutionDepthWise resx15_conv1 1 1 resx14_elewise_resx14_elewise_relu_splitncnn_1 resx15_conv1_resx15_conv1_relu -23330=4,3,7,7,240 0=240 1=1 5=1 6=76800 7=3 9=1
|
||||
ShuffleChannel shuffle15 1 1 resx15_conv1_resx15_conv1_relu shuffle15 -23330=4,3,7,7,240 0=3
|
||||
ConvolutionDepthWise resx15_conv2 1 1 shuffle15 resx15_conv2_resx15_conv2_scale -23330=4,3,7,7,240 0=240 1=3 4=1 5=1 6=2160 7=240
|
||||
ConvolutionDepthWise resx15_conv3 1 1 resx15_conv2_resx15_conv2_scale resx15_conv3_resx15_conv3_scale -23330=4,3,7,7,960 0=960 1=1 5=1 6=76800 7=3
|
||||
Eltwise resx15_elewise 2 1 resx14_elewise_resx14_elewise_relu_splitncnn_0 resx15_conv3_resx15_conv3_scale resx15_elewise -23330=4,3,7,7,960 0=1
|
||||
ReLU resx15_elewise_relu 1 1 resx15_elewise resx15_elewise_resx15_elewise_relu -23330=4,3,7,7,960
|
||||
Split splitncnn_15 1 2 resx15_elewise_resx15_elewise_relu resx15_elewise_resx15_elewise_relu_splitncnn_0 resx15_elewise_resx15_elewise_relu_splitncnn_1 -23330=8,3,7,7,960,3,7,7,960
|
||||
ConvolutionDepthWise resx16_conv1 1 1 resx15_elewise_resx15_elewise_relu_splitncnn_1 resx16_conv1_resx16_conv1_relu -23330=4,3,7,7,240 0=240 1=1 5=1 6=76800 7=3 9=1
|
||||
ShuffleChannel shuffle16 1 1 resx16_conv1_resx16_conv1_relu shuffle16 -23330=4,3,7,7,240 0=3
|
||||
ConvolutionDepthWise resx16_conv2 1 1 shuffle16 resx16_conv2_resx16_conv2_scale -23330=4,3,7,7,240 0=240 1=3 4=1 5=1 6=2160 7=240
|
||||
ConvolutionDepthWise resx16_conv3 1 1 resx16_conv2_resx16_conv2_scale resx16_conv3_resx16_conv3_scale -23330=4,3,7,7,960 0=960 1=1 5=1 6=76800 7=3
|
||||
Eltwise resx16_elewise 2 1 resx15_elewise_resx15_elewise_relu_splitncnn_0 resx16_conv3_resx16_conv3_scale resx16_elewise -23330=4,3,7,7,960 0=1
|
||||
ReLU resx16_elewise_relu 1 1 resx16_elewise resx16_elewise_resx16_elewise_relu -23330=4,3,7,7,960
|
||||
Pooling pool_ave 1 1 resx16_elewise_resx16_elewise_relu pool_ave -23330=4,1,960,1,1 0=1 4=1
|
||||
InnerProduct fc1000 1 1 pool_ave fc1000 -23330=4,1,1000,1,1 0=1000 1=1 2=960000
|
||||
Softmax prob 1 1 fc1000 output -23330=4,1,1000,1,1
|
111
3rdparty/ncnn/benchmark/shufflenet_v2.param
vendored
Normal file
111
3rdparty/ncnn/benchmark/shufflenet_v2.param
vendored
Normal file
@ -0,0 +1,111 @@
|
||||
7767517
|
||||
109 125
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution conv1 1 1 data conv1_conv1_relu -23330=4,3,112,112,24 0=24 1=3 3=2 4=1 5=1 6=648 9=1
|
||||
Pooling pool1 1 1 conv1_conv1_relu pool1 -23330=4,3,56,56,24 1=3 2=2
|
||||
Split splitncnn_0 1 2 pool1 pool1_splitncnn_0 pool1_splitncnn_1 -23330=8,3,56,56,24,3,56,56,24
|
||||
ConvolutionDepthWise branch1_1_conv1 1 1 pool1_splitncnn_1 branch1_1_conv1_branch1_1_conv1_scale -23330=4,3,28,28,24 0=24 1=3 3=2 4=1 5=1 6=216 7=24
|
||||
Convolution branch1_1_conv2 1 1 branch1_1_conv1_branch1_1_conv1_scale branch1_1_conv2_branch1_1_conv2_relu -23330=4,3,28,28,58 0=58 1=1 5=1 6=1392 9=1
|
||||
Convolution branch1_2_conv1 1 1 pool1_splitncnn_0 branch1_2_conv1_branch1_2_conv1_relu -23330=4,3,56,56,58 0=58 1=1 5=1 6=1392 9=1
|
||||
ConvolutionDepthWise branch1_2_conv2 1 1 branch1_2_conv1_branch1_2_conv1_relu branch1_2_conv2_branch1_2_conv2_scale -23330=4,3,28,28,58 0=58 1=3 3=2 4=1 5=1 6=522 7=58
|
||||
Convolution branch1_2_conv3 1 1 branch1_2_conv2_branch1_2_conv2_scale branch1_2_conv3_branch1_2_conv3_relu -23330=4,3,28,28,58 0=58 1=1 5=1 6=3364 9=1
|
||||
Concat concat1 2 1 branch1_1_conv2_branch1_1_conv2_relu branch1_2_conv3_branch1_2_conv3_relu concat1 -23330=4,3,28,28,116
|
||||
ShuffleChannel shuffle1 1 1 concat1 shuffle1 -23330=4,3,28,28,116 0=2
|
||||
Slice slice2 1 2 shuffle1 branch2_1 branch2_2 -23330=8,3,28,28,58,3,28,28,58 -23300=2,58,-233
|
||||
Convolution branch2_2_conv1 1 1 branch2_2 branch2_2_conv1_branch2_2_conv1_relu -23330=4,3,28,28,58 0=58 1=1 5=1 6=3364 9=1
|
||||
ConvolutionDepthWise branch2_2_conv2 1 1 branch2_2_conv1_branch2_2_conv1_relu branch2_2_conv2_branch2_2_conv2_scale -23330=4,3,28,28,58 0=58 1=3 4=1 5=1 6=522 7=58
|
||||
Convolution branch2_2_conv3 1 1 branch2_2_conv2_branch2_2_conv2_scale branch2_2_conv3_branch2_2_conv3_relu -23330=4,3,28,28,58 0=58 1=1 5=1 6=3364 9=1
|
||||
Concat concat2 2 1 branch2_1 branch2_2_conv3_branch2_2_conv3_relu concat2 -23330=4,3,28,28,116
|
||||
ShuffleChannel shuffle2 1 1 concat2 shuffle2 -23330=4,3,28,28,116 0=2
|
||||
Slice slice3 1 2 shuffle2 branch3_1 branch3_2 -23330=8,3,28,28,58,3,28,28,58 -23300=2,58,-233
|
||||
Convolution branch3_2_conv1 1 1 branch3_2 branch3_2_conv1_branch3_2_conv1_relu -23330=4,3,28,28,58 0=58 1=1 5=1 6=3364 9=1
|
||||
ConvolutionDepthWise branch3_2_conv2 1 1 branch3_2_conv1_branch3_2_conv1_relu branch3_2_conv2_branch3_2_conv2_scale -23330=4,3,28,28,58 0=58 1=3 4=1 5=1 6=522 7=58
|
||||
Convolution branch3_2_conv3 1 1 branch3_2_conv2_branch3_2_conv2_scale branch3_2_conv3_branch3_2_conv3_relu -23330=4,3,28,28,58 0=58 1=1 5=1 6=3364 9=1
|
||||
Concat concat3 2 1 branch3_1 branch3_2_conv3_branch3_2_conv3_relu concat3 -23330=4,3,28,28,116
|
||||
ShuffleChannel shuffle3 1 1 concat3 shuffle3 -23330=4,3,28,28,116 0=2
|
||||
Slice slice4 1 2 shuffle3 branch4_1 branch4_2 -23330=8,3,28,28,58,3,28,28,58 -23300=2,58,-233
|
||||
Convolution branch4_2_conv1 1 1 branch4_2 branch4_2_conv1_branch4_2_conv1_relu -23330=4,3,28,28,58 0=58 1=1 5=1 6=3364 9=1
|
||||
ConvolutionDepthWise branch4_2_conv2 1 1 branch4_2_conv1_branch4_2_conv1_relu branch4_2_conv2_branch4_2_conv2_scale -23330=4,3,28,28,58 0=58 1=3 4=1 5=1 6=522 7=58
|
||||
Convolution branch4_2_conv3 1 1 branch4_2_conv2_branch4_2_conv2_scale branch4_2_conv3_branch4_2_conv3_relu -23330=4,3,28,28,58 0=58 1=1 5=1 6=3364 9=1
|
||||
Concat concat4 2 1 branch4_1 branch4_2_conv3_branch4_2_conv3_relu concat4 -23330=4,3,28,28,116
|
||||
ShuffleChannel shuffle4 1 1 concat4 shuffle4 -23330=4,3,28,28,116 0=2
|
||||
Split splitncnn_1 1 2 shuffle4 shuffle4_splitncnn_0 shuffle4_splitncnn_1 -23330=8,3,28,28,116,3,28,28,116
|
||||
ConvolutionDepthWise branch5_1_conv1 1 1 shuffle4_splitncnn_1 branch5_1_conv1_branch5_1_conv1_scale -23330=4,3,14,14,116 0=116 1=3 3=2 4=1 5=1 6=1044 7=116
|
||||
Convolution branch5_1_conv2 1 1 branch5_1_conv1_branch5_1_conv1_scale branch5_1_conv2_branch5_1_conv2_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Convolution branch5_2_conv1 1 1 shuffle4_splitncnn_0 branch5_2_conv1_branch5_2_conv1_relu -23330=4,3,28,28,116 0=116 1=1 5=1 6=13456 9=1
|
||||
ConvolutionDepthWise branch5_2_conv2 1 1 branch5_2_conv1_branch5_2_conv1_relu branch5_2_conv2_branch5_2_conv2_scale -23330=4,3,14,14,116 0=116 1=3 3=2 4=1 5=1 6=1044 7=116
|
||||
Convolution branch5_2_conv3 1 1 branch5_2_conv2_branch5_2_conv2_scale branch5_2_conv3_branch5_2_conv3_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Concat concat5 2 1 branch5_1_conv2_branch5_1_conv2_relu branch5_2_conv3_branch5_2_conv3_relu concat5 -23330=4,3,14,14,232
|
||||
ShuffleChannel shuffle5 1 1 concat5 shuffle5 -23330=4,3,14,14,232 0=2
|
||||
Slice slice6 1 2 shuffle5 branch6_1 branch6_2 -23330=8,3,14,14,116,3,14,14,116 -23300=2,116,-233
|
||||
Convolution branch6_2_conv1 1 1 branch6_2 branch6_2_conv1_branch6_2_conv1_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
ConvolutionDepthWise branch6_2_conv2 1 1 branch6_2_conv1_branch6_2_conv1_relu branch6_2_conv2_branch6_2_conv2_scale -23330=4,3,14,14,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution branch6_2_conv3 1 1 branch6_2_conv2_branch6_2_conv2_scale branch6_2_conv3_branch6_2_conv3_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Concat concat6 2 1 branch6_1 branch6_2_conv3_branch6_2_conv3_relu concat6 -23330=4,3,14,14,232
|
||||
ShuffleChannel shuffle6 1 1 concat6 shuffle6 -23330=4,3,14,14,232 0=2
|
||||
Slice slice7 1 2 shuffle6 branch7_1 branch7_2 -23330=8,3,14,14,116,3,14,14,116 -23300=2,116,-233
|
||||
Convolution branch7_2_conv1 1 1 branch7_2 branch7_2_conv1_branch7_2_conv1_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
ConvolutionDepthWise branch7_2_conv2 1 1 branch7_2_conv1_branch7_2_conv1_relu branch7_2_conv2_branch7_2_conv2_scale -23330=4,3,14,14,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution branch7_2_conv3 1 1 branch7_2_conv2_branch7_2_conv2_scale branch7_2_conv3_branch7_2_conv3_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Concat concat7 2 1 branch7_1 branch7_2_conv3_branch7_2_conv3_relu concat7 -23330=4,3,14,14,232
|
||||
ShuffleChannel shuffle7 1 1 concat7 shuffle7 -23330=4,3,14,14,232 0=2
|
||||
Slice slice8 1 2 shuffle7 branch8_1 branch8_2 -23330=8,3,14,14,116,3,14,14,116 -23300=2,116,-233
|
||||
Convolution branch8_2_conv1 1 1 branch8_2 branch8_2_conv1_branch8_2_conv1_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
ConvolutionDepthWise branch8_2_conv2 1 1 branch8_2_conv1_branch8_2_conv1_relu branch8_2_conv2_branch8_2_conv2_scale -23330=4,3,14,14,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution branch8_2_conv3 1 1 branch8_2_conv2_branch8_2_conv2_scale branch8_2_conv3_branch8_2_conv3_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Concat concat8 2 1 branch8_1 branch8_2_conv3_branch8_2_conv3_relu concat8 -23330=4,3,14,14,232
|
||||
ShuffleChannel shuffle8 1 1 concat8 shuffle8 -23330=4,3,14,14,232 0=2
|
||||
Slice slice9 1 2 shuffle8 branch9_1 branch9_2 -23330=8,3,14,14,116,3,14,14,116 -23300=2,116,-233
|
||||
Convolution branch9_2_conv1 1 1 branch9_2 branch9_2_conv1_branch9_2_conv1_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
ConvolutionDepthWise branch9_2_conv2 1 1 branch9_2_conv1_branch9_2_conv1_relu branch9_2_conv2_branch9_2_conv2_scale -23330=4,3,14,14,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution branch9_2_conv3 1 1 branch9_2_conv2_branch9_2_conv2_scale branch9_2_conv3_branch9_2_conv3_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Concat concat9 2 1 branch9_1 branch9_2_conv3_branch9_2_conv3_relu concat9 -23330=4,3,14,14,232
|
||||
ShuffleChannel shuffle9 1 1 concat9 shuffle9 -23330=4,3,14,14,232 0=2
|
||||
Slice slice10 1 2 shuffle9 branch10_1 branch10_2 -23330=8,3,14,14,116,3,14,14,116 -23300=2,116,-233
|
||||
Convolution branch10_2_conv1 1 1 branch10_2 branch10_2_conv1_branch10_2_conv1_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
ConvolutionDepthWise branch10_2_conv2 1 1 branch10_2_conv1_branch10_2_conv1_relu branch10_2_conv2_branch10_2_conv2_scale -23330=4,3,14,14,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution branch10_2_conv3 1 1 branch10_2_conv2_branch10_2_conv2_scale branch10_2_conv3_branch10_2_conv3_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Concat concat10 2 1 branch10_1 branch10_2_conv3_branch10_2_conv3_relu concat10 -23330=4,3,14,14,232
|
||||
ShuffleChannel shuffle10 1 1 concat10 shuffle10 -23330=4,3,14,14,232 0=2
|
||||
Slice slice11 1 2 shuffle10 branch11_1 branch11_2 -23330=8,3,14,14,116,3,14,14,116 -23300=2,116,-233
|
||||
Convolution branch11_2_conv1 1 1 branch11_2 branch11_2_conv1_branch11_2_conv1_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
ConvolutionDepthWise branch11_2_conv2 1 1 branch11_2_conv1_branch11_2_conv1_relu branch11_2_conv2_branch11_2_conv2_scale -23330=4,3,14,14,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution branch11_2_conv3 1 1 branch11_2_conv2_branch11_2_conv2_scale branch11_2_conv3_branch11_2_conv3_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Concat concat11 2 1 branch11_1 branch11_2_conv3_branch11_2_conv3_relu concat11 -23330=4,3,14,14,232
|
||||
ShuffleChannel shuffle11 1 1 concat11 shuffle11 -23330=4,3,14,14,232 0=2
|
||||
Slice slice12 1 2 shuffle11 branch12_1 branch12_2 -23330=8,3,14,14,116,3,14,14,116 -23300=2,116,-233
|
||||
Convolution branch12_2_conv1 1 1 branch12_2 branch12_2_conv1_branch12_2_conv1_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
ConvolutionDepthWise branch12_2_conv2 1 1 branch12_2_conv1_branch12_2_conv1_relu branch12_2_conv2_branch12_2_conv2_scale -23330=4,3,14,14,116 0=116 1=3 4=1 5=1 6=1044 7=116
|
||||
Convolution branch12_2_conv3 1 1 branch12_2_conv2_branch12_2_conv2_scale branch12_2_conv3_branch12_2_conv3_relu -23330=4,3,14,14,116 0=116 1=1 5=1 6=13456 9=1
|
||||
Concat concat12 2 1 branch12_1 branch12_2_conv3_branch12_2_conv3_relu concat12 -23330=4,3,14,14,232
|
||||
ShuffleChannel shuffle12 1 1 concat12 shuffle12 -23330=4,3,14,14,232 0=2
|
||||
Split splitncnn_2 1 2 shuffle12 shuffle12_splitncnn_0 shuffle12_splitncnn_1 -23330=8,3,14,14,232,3,14,14,232
|
||||
ConvolutionDepthWise branch13_1_conv1 1 1 shuffle12_splitncnn_1 branch13_1_conv1_branch13_1_conv1_scale -23330=4,3,7,7,232 0=232 1=3 3=2 4=1 5=1 6=2088 7=232
|
||||
Convolution branch13_1_conv2 1 1 branch13_1_conv1_branch13_1_conv1_scale branch13_1_conv2_branch13_1_conv2_relu -23330=4,3,7,7,232 0=232 1=1 5=1 6=53824 9=1
|
||||
Convolution branch13_2_conv1 1 1 shuffle12_splitncnn_0 branch13_2_conv1_branch13_2_conv1_relu -23330=4,3,14,14,232 0=232 1=1 5=1 6=53824 9=1
|
||||
ConvolutionDepthWise branch13_2_conv2 1 1 branch13_2_conv1_branch13_2_conv1_relu branch13_2_conv2_branch13_2_conv2_scale -23330=4,3,7,7,232 0=232 1=3 3=2 4=1 5=1 6=2088 7=232
|
||||
Convolution branch13_2_conv3 1 1 branch13_2_conv2_branch13_2_conv2_scale branch13_2_conv3_branch13_2_conv3_relu -23330=4,3,7,7,232 0=232 1=1 5=1 6=53824 9=1
|
||||
Concat concat13 2 1 branch13_1_conv2_branch13_1_conv2_relu branch13_2_conv3_branch13_2_conv3_relu concat13 -23330=4,3,7,7,464
|
||||
ShuffleChannel shuffle13 1 1 concat13 shuffle13 -23330=4,3,7,7,464 0=2
|
||||
Slice slice14 1 2 shuffle13 branch14_1 branch14_2 -23330=8,3,7,7,232,3,7,7,232 -23300=2,232,-233
|
||||
Convolution branch14_2_conv1 1 1 branch14_2 branch14_2_conv1_branch14_2_conv1_relu -23330=4,3,7,7,232 0=232 1=1 5=1 6=53824 9=1
|
||||
ConvolutionDepthWise branch14_2_conv2 1 1 branch14_2_conv1_branch14_2_conv1_relu branch14_2_conv2_branch14_2_conv2_scale -23330=4,3,7,7,232 0=232 1=3 4=1 5=1 6=2088 7=232
|
||||
Convolution branch14_2_conv3 1 1 branch14_2_conv2_branch14_2_conv2_scale branch14_2_conv3_branch14_2_conv3_relu -23330=4,3,7,7,232 0=232 1=1 5=1 6=53824 9=1
|
||||
Concat concat14 2 1 branch14_1 branch14_2_conv3_branch14_2_conv3_relu concat14 -23330=4,3,7,7,464
|
||||
ShuffleChannel shuffle14 1 1 concat14 shuffle14 -23330=4,3,7,7,464 0=2
|
||||
Slice slice15 1 2 shuffle14 branch15_1 branch15_2 -23330=8,3,7,7,232,3,7,7,232 -23300=2,232,-233
|
||||
Convolution branch15_2_conv1 1 1 branch15_2 branch15_2_conv1_branch15_2_conv1_relu -23330=4,3,7,7,232 0=232 1=1 5=1 6=53824 9=1
|
||||
ConvolutionDepthWise branch15_2_conv2 1 1 branch15_2_conv1_branch15_2_conv1_relu branch15_2_conv2_branch15_2_conv2_scale -23330=4,3,7,7,232 0=232 1=3 4=1 5=1 6=2088 7=232
|
||||
Convolution branch15_2_conv3 1 1 branch15_2_conv2_branch15_2_conv2_scale branch15_2_conv3_branch15_2_conv3_relu -23330=4,3,7,7,232 0=232 1=1 5=1 6=53824 9=1
|
||||
Concat concat15 2 1 branch15_1 branch15_2_conv3_branch15_2_conv3_relu concat15 -23330=4,3,7,7,464
|
||||
ShuffleChannel shuffle15 1 1 concat15 shuffle15 -23330=4,3,7,7,464 0=2
|
||||
Slice slice16 1 2 shuffle15 branch16_1 branch16_2 -23330=8,3,7,7,232,3,7,7,232 -23300=2,232,-233
|
||||
Convolution branch16_2_conv1 1 1 branch16_2 branch16_2_conv1_branch16_2_conv1_relu -23330=4,3,7,7,232 0=232 1=1 5=1 6=53824 9=1
|
||||
ConvolutionDepthWise branch16_2_conv2 1 1 branch16_2_conv1_branch16_2_conv1_relu branch16_2_conv2_branch16_2_conv2_scale -23330=4,3,7,7,232 0=232 1=3 4=1 5=1 6=2088 7=232
|
||||
Convolution branch16_2_conv3 1 1 branch16_2_conv2_branch16_2_conv2_scale branch16_2_conv3_branch16_2_conv3_relu -23330=4,3,7,7,232 0=232 1=1 5=1 6=53824 9=1
|
||||
Concat concat16 2 1 branch16_1 branch16_2_conv3_branch16_2_conv3_relu concat16 -23330=4,3,7,7,464
|
||||
ShuffleChannel shuffle16 1 1 concat16 shuffle16 -23330=4,3,7,7,464 0=2
|
||||
Convolution conv5 1 1 shuffle16 conv5_conv5_relu -23330=4,3,7,7,1024 0=1024 1=1 5=1 6=475136 9=1
|
||||
Pooling pool_ave 1 1 conv5_conv5_relu pool_ave -23330=4,1,1024,1,1 0=1 4=1
|
||||
InnerProduct fc1000 1 1 pool_ave fc1000 -23330=4,1,1000,1,1 0=1000 1=1 2=1024000
|
||||
Softmax prob 1 1 fc1000 output -23330=4,1,1000,1,1
|
50
3rdparty/ncnn/benchmark/squeezenet.param
vendored
Normal file
50
3rdparty/ncnn/benchmark/squeezenet.param
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
7767517
|
||||
48 56
|
||||
Input data 0 1 data -23330=4,3,227,227,3 0=227 1=227 2=3
|
||||
Convolution conv1 1 1 data conv1_relu_conv1 -23330=4,3,113,113,64 0=64 1=3 3=2 5=1 6=1728 9=1
|
||||
Pooling pool1 1 1 conv1_relu_conv1 pool1 -23330=4,3,56,56,64 1=3 2=2
|
||||
Convolution fire2/squeeze1x1 1 1 pool1 fire2/squeeze1x1_fire2/relu_squeeze1x1 -23330=4,3,56,56,16 0=16 1=1 5=1 6=1024 9=1
|
||||
Split splitncnn_0 1 2 fire2/squeeze1x1_fire2/relu_squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 -23330=8,3,56,56,16,3,56,56,16
|
||||
Convolution fire2/expand1x1 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 fire2/expand1x1_fire2/relu_expand1x1 -23330=4,3,56,56,64 0=64 1=1 5=1 6=1024 9=1
|
||||
Convolution fire2/expand3x3 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/expand3x3_fire2/relu_expand3x3 -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=9216 9=1
|
||||
Concat fire2/concat 2 1 fire2/expand1x1_fire2/relu_expand1x1 fire2/expand3x3_fire2/relu_expand3x3 fire2/concat -23330=4,3,56,56,128
|
||||
Convolution fire3/squeeze1x1 1 1 fire2/concat fire3/squeeze1x1_fire3/relu_squeeze1x1 -23330=4,3,56,56,16 0=16 1=1 5=1 6=2048 9=1
|
||||
Split splitncnn_1 1 2 fire3/squeeze1x1_fire3/relu_squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 -23330=8,3,56,56,16,3,56,56,16
|
||||
Convolution fire3/expand1x1 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 fire3/expand1x1_fire3/relu_expand1x1 -23330=4,3,56,56,64 0=64 1=1 5=1 6=1024 9=1
|
||||
Convolution fire3/expand3x3 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/expand3x3_fire3/relu_expand3x3 -23330=4,3,56,56,64 0=64 1=3 4=1 5=1 6=9216 9=1
|
||||
Concat fire3/concat 2 1 fire3/expand1x1_fire3/relu_expand1x1 fire3/expand3x3_fire3/relu_expand3x3 fire3/concat -23330=4,3,56,56,128
|
||||
Pooling pool3 1 1 fire3/concat pool3 -23330=4,3,28,28,128 1=3 2=2
|
||||
Convolution fire4/squeeze1x1 1 1 pool3 fire4/squeeze1x1_fire4/relu_squeeze1x1 -23330=4,3,28,28,32 0=32 1=1 5=1 6=4096 9=1
|
||||
Split splitncnn_2 1 2 fire4/squeeze1x1_fire4/relu_squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 -23330=8,3,28,28,32,3,28,28,32
|
||||
Convolution fire4/expand1x1 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 fire4/expand1x1_fire4/relu_expand1x1 -23330=4,3,28,28,128 0=128 1=1 5=1 6=4096 9=1
|
||||
Convolution fire4/expand3x3 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/expand3x3_fire4/relu_expand3x3 -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=36864 9=1
|
||||
Concat fire4/concat 2 1 fire4/expand1x1_fire4/relu_expand1x1 fire4/expand3x3_fire4/relu_expand3x3 fire4/concat -23330=4,3,28,28,256
|
||||
Convolution fire5/squeeze1x1 1 1 fire4/concat fire5/squeeze1x1_fire5/relu_squeeze1x1 -23330=4,3,28,28,32 0=32 1=1 5=1 6=8192 9=1
|
||||
Split splitncnn_3 1 2 fire5/squeeze1x1_fire5/relu_squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 -23330=8,3,28,28,32,3,28,28,32
|
||||
Convolution fire5/expand1x1 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 fire5/expand1x1_fire5/relu_expand1x1 -23330=4,3,28,28,128 0=128 1=1 5=1 6=4096 9=1
|
||||
Convolution fire5/expand3x3 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/expand3x3_fire5/relu_expand3x3 -23330=4,3,28,28,128 0=128 1=3 4=1 5=1 6=36864 9=1
|
||||
Concat fire5/concat 2 1 fire5/expand1x1_fire5/relu_expand1x1 fire5/expand3x3_fire5/relu_expand3x3 fire5/concat -23330=4,3,28,28,256
|
||||
Pooling pool5 1 1 fire5/concat pool5 -23330=4,3,14,14,256 1=3 2=2
|
||||
Convolution fire6/squeeze1x1 1 1 pool5 fire6/squeeze1x1_fire6/relu_squeeze1x1 -23330=4,3,14,14,48 0=48 1=1 5=1 6=12288 9=1
|
||||
Split splitncnn_4 1 2 fire6/squeeze1x1_fire6/relu_squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 -23330=8,3,14,14,48,3,14,14,48
|
||||
Convolution fire6/expand1x1 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 fire6/expand1x1_fire6/relu_expand1x1 -23330=4,3,14,14,192 0=192 1=1 5=1 6=9216 9=1
|
||||
Convolution fire6/expand3x3 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/expand3x3_fire6/relu_expand3x3 -23330=4,3,14,14,192 0=192 1=3 4=1 5=1 6=82944 9=1
|
||||
Concat fire6/concat 2 1 fire6/expand1x1_fire6/relu_expand1x1 fire6/expand3x3_fire6/relu_expand3x3 fire6/concat -23330=4,3,14,14,384
|
||||
Convolution fire7/squeeze1x1 1 1 fire6/concat fire7/squeeze1x1_fire7/relu_squeeze1x1 -23330=4,3,14,14,48 0=48 1=1 5=1 6=18432 9=1
|
||||
Split splitncnn_5 1 2 fire7/squeeze1x1_fire7/relu_squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 -23330=8,3,14,14,48,3,14,14,48
|
||||
Convolution fire7/expand1x1 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 fire7/expand1x1_fire7/relu_expand1x1 -23330=4,3,14,14,192 0=192 1=1 5=1 6=9216 9=1
|
||||
Convolution fire7/expand3x3 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/expand3x3_fire7/relu_expand3x3 -23330=4,3,14,14,192 0=192 1=3 4=1 5=1 6=82944 9=1
|
||||
Concat fire7/concat 2 1 fire7/expand1x1_fire7/relu_expand1x1 fire7/expand3x3_fire7/relu_expand3x3 fire7/concat -23330=4,3,14,14,384
|
||||
Convolution fire8/squeeze1x1 1 1 fire7/concat fire8/squeeze1x1_fire8/relu_squeeze1x1 -23330=4,3,14,14,64 0=64 1=1 5=1 6=24576 9=1
|
||||
Split splitncnn_6 1 2 fire8/squeeze1x1_fire8/relu_squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 -23330=8,3,14,14,64,3,14,14,64
|
||||
Convolution fire8/expand1x1 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 fire8/expand1x1_fire8/relu_expand1x1 -23330=4,3,14,14,256 0=256 1=1 5=1 6=16384 9=1
|
||||
Convolution fire8/expand3x3 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/expand3x3_fire8/relu_expand3x3 -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=147456 9=1
|
||||
Concat fire8/concat 2 1 fire8/expand1x1_fire8/relu_expand1x1 fire8/expand3x3_fire8/relu_expand3x3 fire8/concat -23330=4,3,14,14,512
|
||||
Convolution fire9/squeeze1x1 1 1 fire8/concat fire9/squeeze1x1_fire9/relu_squeeze1x1 -23330=4,3,14,14,64 0=64 1=1 5=1 6=32768 9=1
|
||||
Split splitncnn_7 1 2 fire9/squeeze1x1_fire9/relu_squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 -23330=8,3,14,14,64,3,14,14,64
|
||||
Convolution fire9/expand1x1 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 fire9/expand1x1_fire9/relu_expand1x1 -23330=4,3,14,14,256 0=256 1=1 5=1 6=16384 9=1
|
||||
Convolution fire9/expand3x3 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/expand3x3_fire9/relu_expand3x3 -23330=4,3,14,14,256 0=256 1=3 4=1 5=1 6=147456 9=1
|
||||
Concat fire9/concat 2 1 fire9/expand1x1_fire9/relu_expand1x1 fire9/expand3x3_fire9/relu_expand3x3 fire9/concat_drop9 -23330=4,3,14,14,512
|
||||
Convolution conv10 1 1 fire9/concat_drop9 conv10_relu_conv10 -23330=4,3,16,16,1000 0=1000 1=1 4=1 5=1 6=512000 9=1
|
||||
Pooling pool10 1 1 conv10_relu_conv10 pool10 -23330=4,1,1000,1,1 0=1 4=1
|
||||
Softmax prob 1 1 pool10 output -23330=4,1,1000,1,1
|
50
3rdparty/ncnn/benchmark/squeezenet_int8.param
vendored
Normal file
50
3rdparty/ncnn/benchmark/squeezenet_int8.param
vendored
Normal file
@ -0,0 +1,50 @@
|
||||
7767517
|
||||
48 56
|
||||
Input data 0 1 data 0=227 1=227 2=3
|
||||
Convolution conv1 1 1 data conv1_relu_conv1 0=64 1=3 3=2 5=1 6=1728 8=2 9=1
|
||||
Pooling pool1 1 1 conv1_relu_conv1 pool1 1=3 2=2
|
||||
Convolution fire2/squeeze1x1 1 1 pool1 fire2/squeeze1x1_fire2/relu_squeeze1x1 0=16 1=1 5=1 6=1024 8=102 9=1
|
||||
Split splitncnn_0 1 2 fire2/squeeze1x1_fire2/relu_squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire2/expand1x1 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 fire2/expand1x1_fire2/relu_expand1x1 0=64 1=1 5=1 6=1024 8=2 9=1
|
||||
Convolution fire2/expand3x3 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/expand3x3_fire2/relu_expand3x3 0=64 1=3 4=1 5=1 6=9216 8=2 9=1
|
||||
Concat fire2/concat 2 1 fire2/expand1x1_fire2/relu_expand1x1 fire2/expand3x3_fire2/relu_expand3x3 fire2/concat
|
||||
Convolution fire3/squeeze1x1 1 1 fire2/concat fire3/squeeze1x1_fire3/relu_squeeze1x1 0=16 1=1 5=1 6=2048 8=102 9=1
|
||||
Split splitncnn_1 1 2 fire3/squeeze1x1_fire3/relu_squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire3/expand1x1 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 fire3/expand1x1_fire3/relu_expand1x1 0=64 1=1 5=1 6=1024 8=2 9=1
|
||||
Convolution fire3/expand3x3 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/expand3x3_fire3/relu_expand3x3 0=64 1=3 4=1 5=1 6=9216 8=2 9=1
|
||||
Concat fire3/concat 2 1 fire3/expand1x1_fire3/relu_expand1x1 fire3/expand3x3_fire3/relu_expand3x3 fire3/concat
|
||||
Pooling pool3 1 1 fire3/concat pool3 1=3 2=2
|
||||
Convolution fire4/squeeze1x1 1 1 pool3 fire4/squeeze1x1_fire4/relu_squeeze1x1 0=32 1=1 5=1 6=4096 8=102 9=1
|
||||
Split splitncnn_2 1 2 fire4/squeeze1x1_fire4/relu_squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire4/expand1x1 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 fire4/expand1x1_fire4/relu_expand1x1 0=128 1=1 5=1 6=4096 8=2 9=1
|
||||
Convolution fire4/expand3x3 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/expand3x3_fire4/relu_expand3x3 0=128 1=3 4=1 5=1 6=36864 8=2 9=1
|
||||
Concat fire4/concat 2 1 fire4/expand1x1_fire4/relu_expand1x1 fire4/expand3x3_fire4/relu_expand3x3 fire4/concat
|
||||
Convolution fire5/squeeze1x1 1 1 fire4/concat fire5/squeeze1x1_fire5/relu_squeeze1x1 0=32 1=1 5=1 6=8192 8=102 9=1
|
||||
Split splitncnn_3 1 2 fire5/squeeze1x1_fire5/relu_squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire5/expand1x1 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 fire5/expand1x1_fire5/relu_expand1x1 0=128 1=1 5=1 6=4096 8=2 9=1
|
||||
Convolution fire5/expand3x3 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/expand3x3_fire5/relu_expand3x3 0=128 1=3 4=1 5=1 6=36864 8=2 9=1
|
||||
Concat fire5/concat 2 1 fire5/expand1x1_fire5/relu_expand1x1 fire5/expand3x3_fire5/relu_expand3x3 fire5/concat
|
||||
Pooling pool5 1 1 fire5/concat pool5 1=3 2=2
|
||||
Convolution fire6/squeeze1x1 1 1 pool5 fire6/squeeze1x1_fire6/relu_squeeze1x1 0=48 1=1 5=1 6=12288 8=102 9=1
|
||||
Split splitncnn_4 1 2 fire6/squeeze1x1_fire6/relu_squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire6/expand1x1 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 fire6/expand1x1_fire6/relu_expand1x1 0=192 1=1 5=1 6=9216 8=2 9=1
|
||||
Convolution fire6/expand3x3 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/expand3x3_fire6/relu_expand3x3 0=192 1=3 4=1 5=1 6=82944 8=2 9=1
|
||||
Concat fire6/concat 2 1 fire6/expand1x1_fire6/relu_expand1x1 fire6/expand3x3_fire6/relu_expand3x3 fire6/concat
|
||||
Convolution fire7/squeeze1x1 1 1 fire6/concat fire7/squeeze1x1_fire7/relu_squeeze1x1 0=48 1=1 5=1 6=18432 8=102 9=1
|
||||
Split splitncnn_5 1 2 fire7/squeeze1x1_fire7/relu_squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire7/expand1x1 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 fire7/expand1x1_fire7/relu_expand1x1 0=192 1=1 5=1 6=9216 8=2 9=1
|
||||
Convolution fire7/expand3x3 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/expand3x3_fire7/relu_expand3x3 0=192 1=3 4=1 5=1 6=82944 8=2 9=1
|
||||
Concat fire7/concat 2 1 fire7/expand1x1_fire7/relu_expand1x1 fire7/expand3x3_fire7/relu_expand3x3 fire7/concat
|
||||
Convolution fire8/squeeze1x1 1 1 fire7/concat fire8/squeeze1x1_fire8/relu_squeeze1x1 0=64 1=1 5=1 6=24576 8=102 9=1
|
||||
Split splitncnn_6 1 2 fire8/squeeze1x1_fire8/relu_squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire8/expand1x1 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 fire8/expand1x1_fire8/relu_expand1x1 0=256 1=1 5=1 6=16384 8=2 9=1
|
||||
Convolution fire8/expand3x3 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/expand3x3_fire8/relu_expand3x3 0=256 1=3 4=1 5=1 6=147456 8=2 9=1
|
||||
Concat fire8/concat 2 1 fire8/expand1x1_fire8/relu_expand1x1 fire8/expand3x3_fire8/relu_expand3x3 fire8/concat
|
||||
Convolution fire9/squeeze1x1 1 1 fire8/concat fire9/squeeze1x1_fire9/relu_squeeze1x1 0=64 1=1 5=1 6=32768 8=102 9=1
|
||||
Split splitncnn_7 1 2 fire9/squeeze1x1_fire9/relu_squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire9/expand1x1 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 fire9/expand1x1_fire9/relu_expand1x1 0=256 1=1 5=1 6=16384 8=2 9=1
|
||||
Convolution fire9/expand3x3 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/expand3x3_fire9/relu_expand3x3 0=256 1=3 4=1 5=1 6=147456 8=2 9=1
|
||||
Concat fire9/concat 2 1 fire9/expand1x1_fire9/relu_expand1x1 fire9/expand3x3_fire9/relu_expand3x3 fire9/concat_drop9
|
||||
Convolution conv10 1 1 fire9/concat_drop9 conv10_relu_conv10 0=1000 1=1 4=1 5=1 6=512000 8=2 9=1
|
||||
Pooling pool10 1 1 conv10_relu_conv10 pool10 0=1 4=1
|
||||
Softmax prob 1 1 pool10 output
|
121
3rdparty/ncnn/benchmark/squeezenet_ssd.param
vendored
Normal file
121
3rdparty/ncnn/benchmark/squeezenet_ssd.param
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
7767517
|
||||
119 152
|
||||
Input data 0 1 data -23330=4,3,300,300,3 0=300 1=300 2=3
|
||||
Split splitncnn_0 1 7 data data_splitncnn_0 data_splitncnn_1 data_splitncnn_2 data_splitncnn_3 data_splitncnn_4 data_splitncnn_5 data_splitncnn_6 -23330=28,3,300,300,3,3,300,300,3,3,300,300,3,3,300,300,3,3,300,300,3,3,300,300,3,3,300,300,3
|
||||
Convolution conv1 1 1 data_splitncnn_6 conv1_relu_conv1 -23330=4,3,149,149,64 0=64 1=3 3=2 5=1 6=1728 9=1
|
||||
Pooling pool1 1 1 conv1_relu_conv1 pool1 -23330=4,3,74,74,64 1=3 2=2
|
||||
Convolution fire2/squeeze1x1 1 1 pool1 fire2/squeeze1x1_fire2/relu_squeeze1x1 -23330=4,3,74,74,16 0=16 1=1 5=1 6=1024 9=1
|
||||
Split splitncnn_1 1 2 fire2/squeeze1x1_fire2/relu_squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 -23330=8,3,74,74,16,3,74,74,16
|
||||
Convolution fire2/expand1x1 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 fire2/expand1x1_fire2/relu_expand1x1 -23330=4,3,74,74,64 0=64 1=1 5=1 6=1024 9=1
|
||||
Convolution fire2/expand3x3 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/expand3x3_fire2/relu_expand3x3 -23330=4,3,74,74,64 0=64 1=3 4=1 5=1 6=9216 9=1
|
||||
Concat fire2/concat 2 1 fire2/expand1x1_fire2/relu_expand1x1 fire2/expand3x3_fire2/relu_expand3x3 fire2/concat -23330=4,3,74,74,128
|
||||
Convolution fire3/squeeze1x1 1 1 fire2/concat fire3/squeeze1x1_fire3/relu_squeeze1x1 -23330=4,3,74,74,16 0=16 1=1 5=1 6=2048 9=1
|
||||
Split splitncnn_2 1 2 fire3/squeeze1x1_fire3/relu_squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 -23330=8,3,74,74,16,3,74,74,16
|
||||
Convolution fire3/expand1x1 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 fire3/expand1x1_fire3/relu_expand1x1 -23330=4,3,74,74,64 0=64 1=1 5=1 6=1024 9=1
|
||||
Convolution fire3/expand3x3 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/expand3x3_fire3/relu_expand3x3 -23330=4,3,74,74,64 0=64 1=3 4=1 5=1 6=9216 9=1
|
||||
Concat fire3/concat 2 1 fire3/expand1x1_fire3/relu_expand1x1 fire3/expand3x3_fire3/relu_expand3x3 fire3/concat -23330=4,3,74,74,128
|
||||
Pooling pool3 1 1 fire3/concat pool3 -23330=4,3,37,37,128 1=3 2=2
|
||||
Convolution fire4/squeeze1x1 1 1 pool3 fire4/squeeze1x1_fire4/relu_squeeze1x1 -23330=4,3,37,37,32 0=32 1=1 5=1 6=4096 9=1
|
||||
Split splitncnn_3 1 2 fire4/squeeze1x1_fire4/relu_squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 -23330=8,3,37,37,32,3,37,37,32
|
||||
Convolution fire4/expand1x1 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 fire4/expand1x1_fire4/relu_expand1x1 -23330=4,3,37,37,128 0=128 1=1 5=1 6=4096 9=1
|
||||
Convolution fire4/expand3x3 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/expand3x3_fire4/relu_expand3x3 -23330=4,3,37,37,128 0=128 1=3 4=1 5=1 6=36864 9=1
|
||||
Concat fire4/concat 2 1 fire4/expand1x1_fire4/relu_expand1x1 fire4/expand3x3_fire4/relu_expand3x3 fire4/concat -23330=4,3,37,37,256
|
||||
Convolution fire5/squeeze1x1 1 1 fire4/concat fire5/squeeze1x1_fire5/relu_squeeze1x1 -23330=4,3,37,37,32 0=32 1=1 5=1 6=8192 9=1
|
||||
Split splitncnn_4 1 2 fire5/squeeze1x1_fire5/relu_squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 -23330=8,3,37,37,32,3,37,37,32
|
||||
Convolution fire5/expand1x1 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 fire5/expand1x1_fire5/relu_expand1x1 -23330=4,3,37,37,128 0=128 1=1 5=1 6=4096 9=1
|
||||
Convolution fire5/expand3x3 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/expand3x3_fire5/relu_expand3x3 -23330=4,3,37,37,128 0=128 1=3 4=1 5=1 6=36864 9=1
|
||||
Concat fire5/concat 2 1 fire5/expand1x1_fire5/relu_expand1x1 fire5/expand3x3_fire5/relu_expand3x3 fire5/concat -23330=4,3,37,37,256
|
||||
Split splitncnn_5 1 2 fire5/concat fire5/concat_splitncnn_0 fire5/concat_splitncnn_1 -23330=8,3,37,37,256,3,37,37,256
|
||||
Pooling pool5 1 1 fire5/concat_splitncnn_1 pool5 -23330=4,3,18,18,256 1=3 2=2
|
||||
Convolution fire6/squeeze1x1 1 1 pool5 fire6/squeeze1x1_fire6/relu_squeeze1x1 -23330=4,3,18,18,48 0=48 1=1 5=1 6=12288 9=1
|
||||
Split splitncnn_6 1 2 fire6/squeeze1x1_fire6/relu_squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 -23330=8,3,18,18,48,3,18,18,48
|
||||
Convolution fire6/expand1x1 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 fire6/expand1x1_fire6/relu_expand1x1 -23330=4,3,18,18,192 0=192 1=1 5=1 6=9216 9=1
|
||||
Convolution fire6/expand3x3 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/expand3x3_fire6/relu_expand3x3 -23330=4,3,18,18,192 0=192 1=3 4=1 5=1 6=82944 9=1
|
||||
Concat fire6/concat 2 1 fire6/expand1x1_fire6/relu_expand1x1 fire6/expand3x3_fire6/relu_expand3x3 fire6/concat -23330=4,3,18,18,384
|
||||
Convolution fire7/squeeze1x1 1 1 fire6/concat fire7/squeeze1x1_fire7/relu_squeeze1x1 -23330=4,3,18,18,48 0=48 1=1 5=1 6=18432 9=1
|
||||
Split splitncnn_7 1 2 fire7/squeeze1x1_fire7/relu_squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 -23330=8,3,18,18,48,3,18,18,48
|
||||
Convolution fire7/expand1x1 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 fire7/expand1x1_fire7/relu_expand1x1 -23330=4,3,18,18,192 0=192 1=1 5=1 6=9216 9=1
|
||||
Convolution fire7/expand3x3 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/expand3x3_fire7/relu_expand3x3 -23330=4,3,18,18,192 0=192 1=3 4=1 5=1 6=82944 9=1
|
||||
Concat fire7/concat 2 1 fire7/expand1x1_fire7/relu_expand1x1 fire7/expand3x3_fire7/relu_expand3x3 fire7/concat -23330=4,3,18,18,384
|
||||
Convolution fire8/squeeze1x1 1 1 fire7/concat fire8/squeeze1x1_fire8/relu_squeeze1x1 -23330=4,3,18,18,64 0=64 1=1 5=1 6=24576 9=1
|
||||
Split splitncnn_8 1 2 fire8/squeeze1x1_fire8/relu_squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 -23330=8,3,18,18,64,3,18,18,64
|
||||
Convolution fire8/expand1x1 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 fire8/expand1x1_fire8/relu_expand1x1 -23330=4,3,18,18,256 0=256 1=1 5=1 6=16384 9=1
|
||||
Convolution fire8/expand3x3 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/expand3x3_fire8/relu_expand3x3 -23330=4,3,18,18,256 0=256 1=3 4=1 5=1 6=147456 9=1
|
||||
Concat fire8/concat 2 1 fire8/expand1x1_fire8/relu_expand1x1 fire8/expand3x3_fire8/relu_expand3x3 fire8/concat -23330=4,3,18,18,512
|
||||
Convolution fire9/squeeze1x1 1 1 fire8/concat fire9/squeeze1x1_fire9/relu_squeeze1x1 -23330=4,3,18,18,64 0=64 1=1 5=1 6=32768 9=1
|
||||
Split splitncnn_9 1 2 fire9/squeeze1x1_fire9/relu_squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 -23330=8,3,18,18,64,3,18,18,64
|
||||
Convolution fire9/expand1x1 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 fire9/expand1x1_fire9/relu_expand1x1 -23330=4,3,18,18,256 0=256 1=1 5=1 6=16384 9=1
|
||||
Convolution fire9/expand3x3 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/expand3x3_fire9/relu_expand3x3 -23330=4,3,18,18,256 0=256 1=3 4=1 5=1 6=147456 9=1
|
||||
Concat fire9/concat 2 1 fire9/expand1x1_fire9/relu_expand1x1 fire9/expand3x3_fire9/relu_expand3x3 fire9/concat -23330=4,3,18,18,512
|
||||
Split splitncnn_10 1 4 fire9/concat fire9/concat_splitncnn_0 fire9/concat_splitncnn_1 fire9/concat_splitncnn_2 fire9/concat_splitncnn_3 -23330=16,3,18,18,512,3,18,18,512,3,18,18,512,3,18,18,512
|
||||
Pooling pool9 1 1 fire9/concat_splitncnn_3 pool9 -23330=4,3,9,9,512 1=3 2=2
|
||||
Convolution fire10/squeeze1x1 1 1 pool9 fire10/squeeze1x1_fire10/relu_squeeze1x1 -23330=4,3,9,9,96 0=96 1=1 5=1 6=49152 9=1
|
||||
Split splitncnn_11 1 2 fire10/squeeze1x1_fire10/relu_squeeze1x1 fire10/squeeze1x1_fire10/relu_squeeze1x1_splitncnn_0 fire10/squeeze1x1_fire10/relu_squeeze1x1_splitncnn_1 -23330=8,3,9,9,96,3,9,9,96
|
||||
Convolution fire10/expand1x1 1 1 fire10/squeeze1x1_fire10/relu_squeeze1x1_splitncnn_1 fire10/expand1x1_fire10/relu_expand1x1 -23330=4,3,9,9,384 0=384 1=1 5=1 6=36864 9=1
|
||||
Convolution fire10/expand3x3 1 1 fire10/squeeze1x1_fire10/relu_squeeze1x1_splitncnn_0 fire10/expand3x3_fire10/relu_expand3x3 -23330=4,3,9,9,384 0=384 1=3 4=1 5=1 6=331776 9=1
|
||||
Concat fire10/concat 2 1 fire10/expand1x1_fire10/relu_expand1x1 fire10/expand3x3_fire10/relu_expand3x3 fire10/concat -23330=4,3,9,9,768
|
||||
Split splitncnn_12 1 4 fire10/concat fire10/concat_splitncnn_0 fire10/concat_splitncnn_1 fire10/concat_splitncnn_2 fire10/concat_splitncnn_3 -23330=16,3,9,9,768,3,9,9,768,3,9,9,768,3,9,9,768
|
||||
Pooling pool10 1 1 fire10/concat_splitncnn_3 pool10 -23330=4,3,4,4,768 1=3 2=2
|
||||
Convolution fire11/squeeze1x1 1 1 pool10 fire11/squeeze1x1_fire11/relu_squeeze1x1 -23330=4,3,4,4,96 0=96 1=1 5=1 6=73728 9=1
|
||||
Split splitncnn_13 1 2 fire11/squeeze1x1_fire11/relu_squeeze1x1 fire11/squeeze1x1_fire11/relu_squeeze1x1_splitncnn_0 fire11/squeeze1x1_fire11/relu_squeeze1x1_splitncnn_1 -23330=8,3,4,4,96,3,4,4,96
|
||||
Convolution fire11/expand1x1 1 1 fire11/squeeze1x1_fire11/relu_squeeze1x1_splitncnn_1 fire11/expand1x1_fire11/relu_expand1x1 -23330=4,3,4,4,384 0=384 1=1 5=1 6=36864 9=1
|
||||
Convolution fire11/expand3x3 1 1 fire11/squeeze1x1_fire11/relu_squeeze1x1_splitncnn_0 fire11/expand3x3_fire11/relu_expand3x3 -23330=4,3,4,4,384 0=384 1=3 4=1 5=1 6=331776 9=1
|
||||
Concat fire11/concat 2 1 fire11/expand1x1_fire11/relu_expand1x1 fire11/expand3x3_fire11/relu_expand3x3 fire11/concat -23330=4,3,4,4,768
|
||||
Split splitncnn_14 1 4 fire11/concat fire11/concat_splitncnn_0 fire11/concat_splitncnn_1 fire11/concat_splitncnn_2 fire11/concat_splitncnn_3 -23330=16,3,4,4,768,3,4,4,768,3,4,4,768,3,4,4,768
|
||||
Convolution conv12_1 1 1 fire11/concat_splitncnn_3 conv12_1_conv12_1/relu -23330=4,3,4,4,128 0=128 1=1 5=1 6=98304 9=1
|
||||
Convolution conv12_2 1 1 conv12_1_conv12_1/relu conv12_2_conv12_2/relu -23330=4,3,2,2,256 0=256 1=3 3=2 4=1 5=1 6=294912 9=1
|
||||
Split splitncnn_15 1 4 conv12_2_conv12_2/relu conv12_2_conv12_2/relu_splitncnn_0 conv12_2_conv12_2/relu_splitncnn_1 conv12_2_conv12_2/relu_splitncnn_2 conv12_2_conv12_2/relu_splitncnn_3 -23330=16,3,2,2,256,3,2,2,256,3,2,2,256,3,2,2,256
|
||||
Convolution conv13_1 1 1 conv12_2_conv12_2/relu_splitncnn_3 conv13_1_conv13_1/relu -23330=4,3,2,2,64 0=64 1=1 5=1 6=16384 9=1
|
||||
Convolution conv13_2 1 1 conv13_1_conv13_1/relu conv13_2_conv13_2/relu -23330=4,3,1,1,128 0=128 1=3 3=2 4=1 5=1 6=73728 9=1
|
||||
Split splitncnn_16 1 3 conv13_2_conv13_2/relu conv13_2_conv13_2/relu_splitncnn_0 conv13_2_conv13_2/relu_splitncnn_1 conv13_2_conv13_2/relu_splitncnn_2 -23330=12,3,1,1,128,3,1,1,128,3,1,1,128
|
||||
BatchNorm fire5/bn 1 1 fire5/concat_splitncnn_0 fire5/normal_fire5/scale -23330=4,3,37,37,256 0=256
|
||||
Split splitncnn_17 1 3 fire5/normal_fire5/scale fire5/normal_fire5/scale_splitncnn_0 fire5/normal_fire5/scale_splitncnn_1 fire5/normal_fire5/scale_splitncnn_2 -23330=12,3,37,37,256,3,37,37,256,3,37,37,256
|
||||
Convolution fire5_mbox_loc 1 1 fire5/normal_fire5/scale_splitncnn_2 fire5_mbox_loc -23330=4,3,37,37,16 0=16 1=3 4=1 5=1 6=36864
|
||||
Permute fire5_mbox_loc_perm 1 1 fire5_mbox_loc fire5_mbox_loc_perm -23330=4,3,16,37,37 0=3
|
||||
Flatten fire5_mbox_loc_flat 1 1 fire5_mbox_loc_perm fire5_mbox_loc_flat -23330=4,1,21904,1,1
|
||||
Convolution fire5_mbox_conf 1 1 fire5/normal_fire5/scale_splitncnn_1 fire5_mbox_conf -23330=4,3,37,37,84 0=84 1=3 4=1 5=1 6=193536
|
||||
Permute fire5_mbox_conf_perm 1 1 fire5_mbox_conf fire5_mbox_conf_perm -23330=4,3,84,37,37 0=3
|
||||
Flatten fire5_mbox_conf_flat 1 1 fire5_mbox_conf_perm fire5_mbox_conf_flat -23330=4,1,114996,1,1
|
||||
PriorBox fire5_mbox_priorbox 2 1 fire5/normal_fire5/scale_splitncnn_0 data_splitncnn_5 fire5_mbox_priorbox -23330=4,2,21904,2,1 -23300=1,2.100000e+01 -23301=1,4.500000e+01 -23302=1,2.000000e+00 9=-233 10=-233 11=8.000000e+00 12=8.000000e+00 13=5.000000e-01
|
||||
Convolution fire9_mbox_loc 1 1 fire9/concat_splitncnn_2 fire9_mbox_loc -23330=4,3,18,18,24 0=24 1=3 4=1 5=1 6=110592
|
||||
Permute fire9_mbox_loc_perm 1 1 fire9_mbox_loc fire9_mbox_loc_perm -23330=4,3,24,18,18 0=3
|
||||
Flatten fire9_mbox_loc_flat 1 1 fire9_mbox_loc_perm fire9_mbox_loc_flat -23330=4,1,7776,1,1
|
||||
Convolution fire9_mbox_conf 1 1 fire9/concat_splitncnn_1 fire9_mbox_conf -23330=4,3,18,18,126 0=126 1=3 4=1 5=1 6=580608
|
||||
Permute fire9_mbox_conf_perm 1 1 fire9_mbox_conf fire9_mbox_conf_perm -23330=4,3,126,18,18 0=3
|
||||
Flatten fire9_mbox_conf_flat 1 1 fire9_mbox_conf_perm fire9_mbox_conf_flat -23330=4,1,40824,1,1
|
||||
PriorBox fire9_mbox_priorbox 2 1 fire9/concat_splitncnn_0 data_splitncnn_4 fire9_mbox_priorbox -23330=4,2,7776,2,1 -23300=1,4.500000e+01 -23301=1,9.900000e+01 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 11=1.600000e+01 12=1.600000e+01 13=5.000000e-01
|
||||
Convolution fire10_mbox_loc 1 1 fire10/concat_splitncnn_2 fire10_mbox_loc -23330=4,3,9,9,24 0=24 1=3 4=1 5=1 6=165888
|
||||
Permute fire10_mbox_loc_perm 1 1 fire10_mbox_loc fire10_mbox_loc_perm -23330=4,3,24,9,9 0=3
|
||||
Flatten fire10_mbox_loc_flat 1 1 fire10_mbox_loc_perm fire10_mbox_loc_flat -23330=4,1,1944,1,1
|
||||
Convolution fire10_mbox_conf 1 1 fire10/concat_splitncnn_1 fire10_mbox_conf -23330=4,3,9,9,126 0=126 1=3 4=1 5=1 6=870912
|
||||
Permute fire10_mbox_conf_perm 1 1 fire10_mbox_conf fire10_mbox_conf_perm -23330=4,3,126,9,9 0=3
|
||||
Flatten fire10_mbox_conf_flat 1 1 fire10_mbox_conf_perm fire10_mbox_conf_flat -23330=4,1,10206,1,1
|
||||
PriorBox fire10_mbox_priorbox 2 1 fire10/concat_splitncnn_0 data_splitncnn_3 fire10_mbox_priorbox -23330=4,2,1944,2,1 -23300=1,9.900000e+01 -23301=1,1.530000e+02 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 11=3.200000e+01 12=3.200000e+01 13=5.000000e-01
|
||||
Convolution fire11_mbox_loc 1 1 fire11/concat_splitncnn_2 fire11_mbox_loc -23330=4,3,4,4,24 0=24 1=3 4=1 5=1 6=165888
|
||||
Permute fire11_mbox_loc_perm 1 1 fire11_mbox_loc fire11_mbox_loc_perm -23330=4,3,24,4,4 0=3
|
||||
Flatten fire11_mbox_loc_flat 1 1 fire11_mbox_loc_perm fire11_mbox_loc_flat -23330=4,1,384,1,1
|
||||
Convolution fire11_mbox_conf 1 1 fire11/concat_splitncnn_1 fire11_mbox_conf -23330=4,3,4,4,126 0=126 1=3 4=1 5=1 6=870912
|
||||
Permute fire11_mbox_conf_perm 1 1 fire11_mbox_conf fire11_mbox_conf_perm -23330=4,3,126,4,4 0=3
|
||||
Flatten fire11_mbox_conf_flat 1 1 fire11_mbox_conf_perm fire11_mbox_conf_flat -23330=4,1,2016,1,1
|
||||
PriorBox fire11_mbox_priorbox 2 1 fire11/concat_splitncnn_0 data_splitncnn_2 fire11_mbox_priorbox -23330=4,2,384,2,1 -23300=1,1.530000e+02 -23301=1,2.070000e+02 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 11=6.400000e+01 12=6.400000e+01 13=5.000000e-01
|
||||
Convolution conv12_2_mbox_loc 1 1 conv12_2_conv12_2/relu_splitncnn_2 conv12_2_mbox_loc -23330=4,3,2,2,24 0=24 1=3 4=1 5=1 6=55296
|
||||
Permute conv12_2_mbox_loc_perm 1 1 conv12_2_mbox_loc conv12_2_mbox_loc_perm -23330=4,3,24,2,2 0=3
|
||||
Flatten conv12_2_mbox_loc_flat 1 1 conv12_2_mbox_loc_perm conv12_2_mbox_loc_flat -23330=4,1,96,1,1
|
||||
Convolution conv12_2_mbox_conf 1 1 conv12_2_conv12_2/relu_splitncnn_1 conv12_2_mbox_conf -23330=4,3,2,2,126 0=126 1=3 4=1 5=1 6=290304
|
||||
Permute conv12_2_mbox_conf_perm 1 1 conv12_2_mbox_conf conv12_2_mbox_conf_perm -23330=4,3,126,2,2 0=3
|
||||
Flatten conv12_2_mbox_conf_flat 1 1 conv12_2_mbox_conf_perm conv12_2_mbox_conf_flat -23330=4,1,504,1,1
|
||||
PriorBox conv12_2_mbox_priorbox 2 1 conv12_2_conv12_2/relu_splitncnn_0 data_splitncnn_1 conv12_2_mbox_priorbox -23330=4,2,96,2,1 -23300=1,2.070000e+02 -23301=1,2.610000e+02 -23302=2,2.000000e+00,3.000000e+00 9=-233 10=-233 11=1.000000e+02 12=1.000000e+02 13=5.000000e-01
|
||||
Convolution conv13_2_mbox_loc 1 1 conv13_2_conv13_2/relu_splitncnn_2 conv13_2_mbox_loc -23330=4,3,1,1,16 0=16 1=3 4=1 5=1 6=18432
|
||||
Permute conv13_2_mbox_loc_perm 1 1 conv13_2_mbox_loc conv13_2_mbox_loc_perm -23330=4,3,16,1,1 0=3
|
||||
Flatten conv13_2_mbox_loc_flat 1 1 conv13_2_mbox_loc_perm conv13_2_mbox_loc_flat -23330=4,1,16,1,1
|
||||
Convolution conv13_2_mbox_conf 1 1 conv13_2_conv13_2/relu_splitncnn_1 conv13_2_mbox_conf -23330=4,3,1,1,84 0=84 1=3 4=1 5=1 6=96768
|
||||
Permute conv13_2_mbox_conf_perm 1 1 conv13_2_mbox_conf conv13_2_mbox_conf_perm -23330=4,3,84,1,1 0=3
|
||||
Flatten conv13_2_mbox_conf_flat 1 1 conv13_2_mbox_conf_perm conv13_2_mbox_conf_flat -23330=4,1,84,1,1
|
||||
PriorBox conv13_2_mbox_priorbox 2 1 conv13_2_conv13_2/relu_splitncnn_0 data_splitncnn_0 conv13_2_mbox_priorbox -23330=4,2,16,2,1 -23300=1,2.610000e+02 -23301=1,3.150000e+02 -23302=1,2.000000e+00 9=-233 10=-233 11=3.000000e+02 12=3.000000e+02 13=5.000000e-01
|
||||
Concat mbox_loc 6 1 fire5_mbox_loc_flat fire9_mbox_loc_flat fire10_mbox_loc_flat fire11_mbox_loc_flat conv12_2_mbox_loc_flat conv13_2_mbox_loc_flat mbox_loc -23330=4,1,32120,1,1
|
||||
Concat mbox_conf 6 1 fire5_mbox_conf_flat fire9_mbox_conf_flat fire10_mbox_conf_flat fire11_mbox_conf_flat conv12_2_mbox_conf_flat conv13_2_mbox_conf_flat mbox_conf -23330=4,1,168630,1,1
|
||||
Concat mbox_priorbox 6 1 fire5_mbox_priorbox fire9_mbox_priorbox fire10_mbox_priorbox fire11_mbox_priorbox conv12_2_mbox_priorbox conv13_2_mbox_priorbox mbox_priorbox -23330=4,2,32120,2,1 0=1
|
||||
Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape -23330=4,2,21,8030,1 0=21 1=-1
|
||||
Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax -23330=4,2,21,8030,1 0=1 1=1
|
||||
Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten -23330=4,1,168630,1,1
|
||||
DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox output 0=21 1=4.500000e-01 2=100 4=2.500000e-01
|
121
3rdparty/ncnn/benchmark/squeezenet_ssd_int8.param
vendored
Normal file
121
3rdparty/ncnn/benchmark/squeezenet_ssd_int8.param
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
7767517
|
||||
119 152
|
||||
Input data 0 1 data 0=300 1=300 2=3
|
||||
Split splitncnn_0 1 7 data data_splitncnn_0 data_splitncnn_1 data_splitncnn_2 data_splitncnn_3 data_splitncnn_4 data_splitncnn_5 data_splitncnn_6
|
||||
Convolution conv1 1 1 data_splitncnn_6 conv1_relu_conv1 0=64 1=3 3=2 5=1 6=1728 8=2 9=1
|
||||
Pooling pool1 1 1 conv1_relu_conv1 pool1 1=3 2=2
|
||||
Convolution fire2/squeeze1x1 1 1 pool1 fire2/squeeze1x1_fire2/relu_squeeze1x1 0=16 1=1 5=1 6=1024 8=102 9=1
|
||||
Split splitncnn_1 1 2 fire2/squeeze1x1_fire2/relu_squeeze1x1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire2/expand1x1 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_1 fire2/expand1x1_fire2/relu_expand1x1 0=64 1=1 5=1 6=1024 8=2 9=1
|
||||
Convolution fire2/expand3x3 1 1 fire2/squeeze1x1_fire2/relu_squeeze1x1_splitncnn_0 fire2/expand3x3_fire2/relu_expand3x3 0=64 1=3 4=1 5=1 6=9216 8=2 9=1
|
||||
Concat fire2/concat 2 1 fire2/expand1x1_fire2/relu_expand1x1 fire2/expand3x3_fire2/relu_expand3x3 fire2/concat
|
||||
Convolution fire3/squeeze1x1 1 1 fire2/concat fire3/squeeze1x1_fire3/relu_squeeze1x1 0=16 1=1 5=1 6=2048 8=102 9=1
|
||||
Split splitncnn_2 1 2 fire3/squeeze1x1_fire3/relu_squeeze1x1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire3/expand1x1 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_1 fire3/expand1x1_fire3/relu_expand1x1 0=64 1=1 5=1 6=1024 8=2 9=1
|
||||
Convolution fire3/expand3x3 1 1 fire3/squeeze1x1_fire3/relu_squeeze1x1_splitncnn_0 fire3/expand3x3_fire3/relu_expand3x3 0=64 1=3 4=1 5=1 6=9216 8=2 9=1
|
||||
Concat fire3/concat 2 1 fire3/expand1x1_fire3/relu_expand1x1 fire3/expand3x3_fire3/relu_expand3x3 fire3/concat
|
||||
Pooling pool3 1 1 fire3/concat pool3 1=3 2=2
|
||||
Convolution fire4/squeeze1x1 1 1 pool3 fire4/squeeze1x1_fire4/relu_squeeze1x1 0=32 1=1 5=1 6=4096 8=102 9=1
|
||||
Split splitncnn_3 1 2 fire4/squeeze1x1_fire4/relu_squeeze1x1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire4/expand1x1 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_1 fire4/expand1x1_fire4/relu_expand1x1 0=128 1=1 5=1 6=4096 8=2 9=1
|
||||
Convolution fire4/expand3x3 1 1 fire4/squeeze1x1_fire4/relu_squeeze1x1_splitncnn_0 fire4/expand3x3_fire4/relu_expand3x3 0=128 1=3 4=1 5=1 6=36864 8=2 9=1
|
||||
Concat fire4/concat 2 1 fire4/expand1x1_fire4/relu_expand1x1 fire4/expand3x3_fire4/relu_expand3x3 fire4/concat
|
||||
Convolution fire5/squeeze1x1 1 1 fire4/concat fire5/squeeze1x1_fire5/relu_squeeze1x1 0=32 1=1 5=1 6=8192 8=102 9=1
|
||||
Split splitncnn_4 1 2 fire5/squeeze1x1_fire5/relu_squeeze1x1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire5/expand1x1 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_1 fire5/expand1x1_fire5/relu_expand1x1 0=128 1=1 5=1 6=4096 8=2 9=1
|
||||
Convolution fire5/expand3x3 1 1 fire5/squeeze1x1_fire5/relu_squeeze1x1_splitncnn_0 fire5/expand3x3_fire5/relu_expand3x3 0=128 1=3 4=1 5=1 6=36864 8=2 9=1
|
||||
Concat fire5/concat 2 1 fire5/expand1x1_fire5/relu_expand1x1 fire5/expand3x3_fire5/relu_expand3x3 fire5/concat
|
||||
Split splitncnn_5 1 2 fire5/concat fire5/concat_splitncnn_0 fire5/concat_splitncnn_1
|
||||
Pooling pool5 1 1 fire5/concat_splitncnn_1 pool5 1=3 2=2
|
||||
Convolution fire6/squeeze1x1 1 1 pool5 fire6/squeeze1x1_fire6/relu_squeeze1x1 0=48 1=1 5=1 6=12288 8=102 9=1
|
||||
Split splitncnn_6 1 2 fire6/squeeze1x1_fire6/relu_squeeze1x1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire6/expand1x1 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_1 fire6/expand1x1_fire6/relu_expand1x1 0=192 1=1 5=1 6=9216 8=2 9=1
|
||||
Convolution fire6/expand3x3 1 1 fire6/squeeze1x1_fire6/relu_squeeze1x1_splitncnn_0 fire6/expand3x3_fire6/relu_expand3x3 0=192 1=3 4=1 5=1 6=82944 8=2 9=1
|
||||
Concat fire6/concat 2 1 fire6/expand1x1_fire6/relu_expand1x1 fire6/expand3x3_fire6/relu_expand3x3 fire6/concat
|
||||
Convolution fire7/squeeze1x1 1 1 fire6/concat fire7/squeeze1x1_fire7/relu_squeeze1x1 0=48 1=1 5=1 6=18432 8=102 9=1
|
||||
Split splitncnn_7 1 2 fire7/squeeze1x1_fire7/relu_squeeze1x1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire7/expand1x1 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_1 fire7/expand1x1_fire7/relu_expand1x1 0=192 1=1 5=1 6=9216 8=2 9=1
|
||||
Convolution fire7/expand3x3 1 1 fire7/squeeze1x1_fire7/relu_squeeze1x1_splitncnn_0 fire7/expand3x3_fire7/relu_expand3x3 0=192 1=3 4=1 5=1 6=82944 8=2 9=1
|
||||
Concat fire7/concat 2 1 fire7/expand1x1_fire7/relu_expand1x1 fire7/expand3x3_fire7/relu_expand3x3 fire7/concat
|
||||
Convolution fire8/squeeze1x1 1 1 fire7/concat fire8/squeeze1x1_fire8/relu_squeeze1x1 0=64 1=1 5=1 6=24576 8=102 9=1
|
||||
Split splitncnn_8 1 2 fire8/squeeze1x1_fire8/relu_squeeze1x1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire8/expand1x1 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_1 fire8/expand1x1_fire8/relu_expand1x1 0=256 1=1 5=1 6=16384 8=2 9=1
|
||||
Convolution fire8/expand3x3 1 1 fire8/squeeze1x1_fire8/relu_squeeze1x1_splitncnn_0 fire8/expand3x3_fire8/relu_expand3x3 0=256 1=3 4=1 5=1 6=147456 8=2 9=1
|
||||
Concat fire8/concat 2 1 fire8/expand1x1_fire8/relu_expand1x1 fire8/expand3x3_fire8/relu_expand3x3 fire8/concat
|
||||
Convolution fire9/squeeze1x1 1 1 fire8/concat fire9/squeeze1x1_fire9/relu_squeeze1x1 0=64 1=1 5=1 6=32768 8=102 9=1
|
||||
Split splitncnn_9 1 2 fire9/squeeze1x1_fire9/relu_squeeze1x1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire9/expand1x1 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_1 fire9/expand1x1_fire9/relu_expand1x1 0=256 1=1 5=1 6=16384 8=2 9=1
|
||||
Convolution fire9/expand3x3 1 1 fire9/squeeze1x1_fire9/relu_squeeze1x1_splitncnn_0 fire9/expand3x3_fire9/relu_expand3x3 0=256 1=3 4=1 5=1 6=147456 8=2 9=1
|
||||
Concat fire9/concat 2 1 fire9/expand1x1_fire9/relu_expand1x1 fire9/expand3x3_fire9/relu_expand3x3 fire9/concat
|
||||
Split splitncnn_10 1 4 fire9/concat fire9/concat_splitncnn_0 fire9/concat_splitncnn_1 fire9/concat_splitncnn_2 fire9/concat_splitncnn_3
|
||||
Pooling pool9 1 1 fire9/concat_splitncnn_3 pool9 1=3 2=2
|
||||
Convolution fire10/squeeze1x1 1 1 pool9 fire10/squeeze1x1_fire10/relu_squeeze1x1 0=96 1=1 5=1 6=49152 8=102 9=1
|
||||
Split splitncnn_11 1 2 fire10/squeeze1x1_fire10/relu_squeeze1x1 fire10/squeeze1x1_fire10/relu_squeeze1x1_splitncnn_0 fire10/squeeze1x1_fire10/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire10/expand1x1 1 1 fire10/squeeze1x1_fire10/relu_squeeze1x1_splitncnn_1 fire10/expand1x1_fire10/relu_expand1x1 0=384 1=1 5=1 6=36864 8=2 9=1
|
||||
Convolution fire10/expand3x3 1 1 fire10/squeeze1x1_fire10/relu_squeeze1x1_splitncnn_0 fire10/expand3x3_fire10/relu_expand3x3 0=384 1=3 4=1 5=1 6=331776 8=2 9=1
|
||||
Concat fire10/concat 2 1 fire10/expand1x1_fire10/relu_expand1x1 fire10/expand3x3_fire10/relu_expand3x3 fire10/concat
|
||||
Split splitncnn_12 1 4 fire10/concat fire10/concat_splitncnn_0 fire10/concat_splitncnn_1 fire10/concat_splitncnn_2 fire10/concat_splitncnn_3
|
||||
Pooling pool10 1 1 fire10/concat_splitncnn_3 pool10 1=3 2=2
|
||||
Convolution fire11/squeeze1x1 1 1 pool10 fire11/squeeze1x1_fire11/relu_squeeze1x1 0=96 1=1 5=1 6=73728 8=102 9=1
|
||||
Split splitncnn_13 1 2 fire11/squeeze1x1_fire11/relu_squeeze1x1 fire11/squeeze1x1_fire11/relu_squeeze1x1_splitncnn_0 fire11/squeeze1x1_fire11/relu_squeeze1x1_splitncnn_1
|
||||
Convolution fire11/expand1x1 1 1 fire11/squeeze1x1_fire11/relu_squeeze1x1_splitncnn_1 fire11/expand1x1_fire11/relu_expand1x1 0=384 1=1 5=1 6=36864 8=2 9=1
|
||||
Convolution fire11/expand3x3 1 1 fire11/squeeze1x1_fire11/relu_squeeze1x1_splitncnn_0 fire11/expand3x3_fire11/relu_expand3x3 0=384 1=3 4=1 5=1 6=331776 8=2 9=1
|
||||
Concat fire11/concat 2 1 fire11/expand1x1_fire11/relu_expand1x1 fire11/expand3x3_fire11/relu_expand3x3 fire11/concat
|
||||
Split splitncnn_14 1 4 fire11/concat fire11/concat_splitncnn_0 fire11/concat_splitncnn_1 fire11/concat_splitncnn_2 fire11/concat_splitncnn_3
|
||||
Convolution conv12_1 1 1 fire11/concat_splitncnn_3 conv12_1_conv12_1/relu 0=128 1=1 5=1 6=98304 8=102 9=1
|
||||
Convolution conv12_2 1 1 conv12_1_conv12_1/relu conv12_2_conv12_2/relu 0=256 1=3 3=2 4=1 5=1 6=294912 8=2 9=1
|
||||
Split splitncnn_15 1 4 conv12_2_conv12_2/relu conv12_2_conv12_2/relu_splitncnn_0 conv12_2_conv12_2/relu_splitncnn_1 conv12_2_conv12_2/relu_splitncnn_2 conv12_2_conv12_2/relu_splitncnn_3
|
||||
Convolution conv13_1 1 1 conv12_2_conv12_2/relu_splitncnn_3 conv13_1_conv13_1/relu 0=64 1=1 5=1 6=16384 8=102 9=1
|
||||
Convolution conv13_2 1 1 conv13_1_conv13_1/relu conv13_2_conv13_2/relu 0=128 1=3 3=2 4=1 5=1 6=73728 8=2 9=1
|
||||
Split splitncnn_16 1 3 conv13_2_conv13_2/relu conv13_2_conv13_2/relu_splitncnn_0 conv13_2_conv13_2/relu_splitncnn_1 conv13_2_conv13_2/relu_splitncnn_2
|
||||
BatchNorm fire5/bn 1 1 fire5/concat_splitncnn_0 fire5/normal_fire5/scale 0=256
|
||||
Split splitncnn_17 1 3 fire5/normal_fire5/scale fire5/normal_fire5/scale_splitncnn_0 fire5/normal_fire5/scale_splitncnn_1 fire5/normal_fire5/scale_splitncnn_2
|
||||
Convolution fire5_mbox_loc 1 1 fire5/normal_fire5/scale_splitncnn_2 fire5_mbox_loc 0=16 1=3 4=1 5=1 6=36864 8=2
|
||||
Permute fire5_mbox_loc_perm 1 1 fire5_mbox_loc fire5_mbox_loc_perm 0=3
|
||||
Flatten fire5_mbox_loc_flat 1 1 fire5_mbox_loc_perm fire5_mbox_loc_flat
|
||||
Convolution fire5_mbox_conf 1 1 fire5/normal_fire5/scale_splitncnn_1 fire5_mbox_conf 0=84 1=3 4=1 5=1 6=193536 8=2
|
||||
Permute fire5_mbox_conf_perm 1 1 fire5_mbox_conf fire5_mbox_conf_perm 0=3
|
||||
Flatten fire5_mbox_conf_flat 1 1 fire5_mbox_conf_perm fire5_mbox_conf_flat
|
||||
PriorBox fire5_mbox_priorbox 2 1 fire5/normal_fire5/scale_splitncnn_0 data_splitncnn_5 fire5_mbox_priorbox -23300=1,21.000000 -23301=1,45.000000 -23302=1,2.000000 9=-233 10=-233 11=8.000000 12=8.000000 13=0.500000
|
||||
Convolution fire9_mbox_loc 1 1 fire9/concat_splitncnn_2 fire9_mbox_loc 0=24 1=3 4=1 5=1 6=110592 8=2
|
||||
Permute fire9_mbox_loc_perm 1 1 fire9_mbox_loc fire9_mbox_loc_perm 0=3
|
||||
Flatten fire9_mbox_loc_flat 1 1 fire9_mbox_loc_perm fire9_mbox_loc_flat
|
||||
Convolution fire9_mbox_conf 1 1 fire9/concat_splitncnn_1 fire9_mbox_conf 0=126 1=3 4=1 5=1 6=580608 8=2
|
||||
Permute fire9_mbox_conf_perm 1 1 fire9_mbox_conf fire9_mbox_conf_perm 0=3
|
||||
Flatten fire9_mbox_conf_flat 1 1 fire9_mbox_conf_perm fire9_mbox_conf_flat
|
||||
PriorBox fire9_mbox_priorbox 2 1 fire9/concat_splitncnn_0 data_splitncnn_4 fire9_mbox_priorbox -23300=1,45.000000 -23301=1,99.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 11=16.000000 12=16.000000 13=0.500000
|
||||
Convolution fire10_mbox_loc 1 1 fire10/concat_splitncnn_2 fire10_mbox_loc 0=24 1=3 4=1 5=1 6=165888 8=2
|
||||
Permute fire10_mbox_loc_perm 1 1 fire10_mbox_loc fire10_mbox_loc_perm 0=3
|
||||
Flatten fire10_mbox_loc_flat 1 1 fire10_mbox_loc_perm fire10_mbox_loc_flat
|
||||
Convolution fire10_mbox_conf 1 1 fire10/concat_splitncnn_1 fire10_mbox_conf 0=126 1=3 4=1 5=1 6=870912 8=2
|
||||
Permute fire10_mbox_conf_perm 1 1 fire10_mbox_conf fire10_mbox_conf_perm 0=3
|
||||
Flatten fire10_mbox_conf_flat 1 1 fire10_mbox_conf_perm fire10_mbox_conf_flat
|
||||
PriorBox fire10_mbox_priorbox 2 1 fire10/concat_splitncnn_0 data_splitncnn_3 fire10_mbox_priorbox -23300=1,99.000000 -23301=1,153.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 11=32.000000 12=32.000000 13=0.500000
|
||||
Convolution fire11_mbox_loc 1 1 fire11/concat_splitncnn_2 fire11_mbox_loc 0=24 1=3 4=1 5=1 6=165888 8=2
|
||||
Permute fire11_mbox_loc_perm 1 1 fire11_mbox_loc fire11_mbox_loc_perm 0=3
|
||||
Flatten fire11_mbox_loc_flat 1 1 fire11_mbox_loc_perm fire11_mbox_loc_flat
|
||||
Convolution fire11_mbox_conf 1 1 fire11/concat_splitncnn_1 fire11_mbox_conf 0=126 1=3 4=1 5=1 6=870912 8=2
|
||||
Permute fire11_mbox_conf_perm 1 1 fire11_mbox_conf fire11_mbox_conf_perm 0=3
|
||||
Flatten fire11_mbox_conf_flat 1 1 fire11_mbox_conf_perm fire11_mbox_conf_flat
|
||||
PriorBox fire11_mbox_priorbox 2 1 fire11/concat_splitncnn_0 data_splitncnn_2 fire11_mbox_priorbox -23300=1,153.000000 -23301=1,207.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 11=64.000000 12=64.000000 13=0.500000
|
||||
Convolution conv12_2_mbox_loc 1 1 conv12_2_conv12_2/relu_splitncnn_2 conv12_2_mbox_loc 0=24 1=3 4=1 5=1 6=55296 8=2
|
||||
Permute conv12_2_mbox_loc_perm 1 1 conv12_2_mbox_loc conv12_2_mbox_loc_perm 0=3
|
||||
Flatten conv12_2_mbox_loc_flat 1 1 conv12_2_mbox_loc_perm conv12_2_mbox_loc_flat
|
||||
Convolution conv12_2_mbox_conf 1 1 conv12_2_conv12_2/relu_splitncnn_1 conv12_2_mbox_conf 0=126 1=3 4=1 5=1 6=290304 8=2
|
||||
Permute conv12_2_mbox_conf_perm 1 1 conv12_2_mbox_conf conv12_2_mbox_conf_perm 0=3
|
||||
Flatten conv12_2_mbox_conf_flat 1 1 conv12_2_mbox_conf_perm conv12_2_mbox_conf_flat
|
||||
PriorBox conv12_2_mbox_priorbox 2 1 conv12_2_conv12_2/relu_splitncnn_0 data_splitncnn_1 conv12_2_mbox_priorbox -23300=1,207.000000 -23301=1,261.000000 -23302=2,2.000000,3.000000 9=-233 10=-233 11=100.000000 12=100.000000 13=0.500000
|
||||
Convolution conv13_2_mbox_loc 1 1 conv13_2_conv13_2/relu_splitncnn_2 conv13_2_mbox_loc 0=16 1=3 4=1 5=1 6=18432 8=2
|
||||
Permute conv13_2_mbox_loc_perm 1 1 conv13_2_mbox_loc conv13_2_mbox_loc_perm 0=3
|
||||
Flatten conv13_2_mbox_loc_flat 1 1 conv13_2_mbox_loc_perm conv13_2_mbox_loc_flat
|
||||
Convolution conv13_2_mbox_conf 1 1 conv13_2_conv13_2/relu_splitncnn_1 conv13_2_mbox_conf 0=84 1=3 4=1 5=1 6=96768 8=2
|
||||
Permute conv13_2_mbox_conf_perm 1 1 conv13_2_mbox_conf conv13_2_mbox_conf_perm 0=3
|
||||
Flatten conv13_2_mbox_conf_flat 1 1 conv13_2_mbox_conf_perm conv13_2_mbox_conf_flat
|
||||
PriorBox conv13_2_mbox_priorbox 2 1 conv13_2_conv13_2/relu_splitncnn_0 data_splitncnn_0 conv13_2_mbox_priorbox -23300=1,261.000000 -23301=1,315.000000 -23302=1,2.000000 9=-233 10=-233 11=300.000000 12=300.000000 13=0.500000
|
||||
Concat mbox_loc 6 1 fire5_mbox_loc_flat fire9_mbox_loc_flat fire10_mbox_loc_flat fire11_mbox_loc_flat conv12_2_mbox_loc_flat conv13_2_mbox_loc_flat mbox_loc
|
||||
Concat mbox_conf 6 1 fire5_mbox_conf_flat fire9_mbox_conf_flat fire10_mbox_conf_flat fire11_mbox_conf_flat conv12_2_mbox_conf_flat conv13_2_mbox_conf_flat mbox_conf
|
||||
Concat mbox_priorbox 6 1 fire5_mbox_priorbox fire9_mbox_priorbox fire10_mbox_priorbox fire11_mbox_priorbox conv12_2_mbox_priorbox conv13_2_mbox_priorbox mbox_priorbox 0=1
|
||||
Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1
|
||||
Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax 0=1 1=1
|
||||
Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten
|
||||
DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox output 0=21 1=0.450000 2=100 4=0.250000
|
25
3rdparty/ncnn/benchmark/vgg16.param
vendored
Normal file
25
3rdparty/ncnn/benchmark/vgg16.param
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
7767517
|
||||
23 23
|
||||
Input data 0 1 data -23330=4,3,224,224,3 0=224 1=224 2=3
|
||||
Convolution conv1_1 1 1 data conv1_1_relu1_1 -23330=4,3,224,224,64 0=64 1=3 4=1 5=1 6=1728 9=1
|
||||
Convolution conv1_2 1 1 conv1_1_relu1_1 conv1_2_relu1_2 -23330=4,3,224,224,64 0=64 1=3 4=1 5=1 6=36864 9=1
|
||||
Pooling pool1 1 1 conv1_2_relu1_2 pool1 -23330=4,3,112,112,64 1=2 2=2
|
||||
Convolution conv2_1 1 1 pool1 conv2_1_relu2_1 -23330=4,3,112,112,128 0=128 1=3 4=1 5=1 6=73728 9=1
|
||||
Convolution conv2_2 1 1 conv2_1_relu2_1 conv2_2_relu2_2 -23330=4,3,112,112,128 0=128 1=3 4=1 5=1 6=147456 9=1
|
||||
Pooling pool2 1 1 conv2_2_relu2_2 pool2 -23330=4,3,56,56,128 1=2 2=2
|
||||
Convolution conv3_1 1 1 pool2 conv3_1_relu3_1 -23330=4,3,56,56,256 0=256 1=3 4=1 5=1 6=294912 9=1
|
||||
Convolution conv3_2 1 1 conv3_1_relu3_1 conv3_2_relu3_2 -23330=4,3,56,56,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Convolution conv3_3 1 1 conv3_2_relu3_2 conv3_3_relu3_3 -23330=4,3,56,56,256 0=256 1=3 4=1 5=1 6=589824 9=1
|
||||
Pooling pool3 1 1 conv3_3_relu3_3 pool3 -23330=4,3,28,28,256 1=2 2=2
|
||||
Convolution conv4_1 1 1 pool3 conv4_1_relu4_1 -23330=4,3,28,28,512 0=512 1=3 4=1 5=1 6=1179648 9=1
|
||||
Convolution conv4_2 1 1 conv4_1_relu4_1 conv4_2_relu4_2 -23330=4,3,28,28,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Convolution conv4_3 1 1 conv4_2_relu4_2 conv4_3_relu4_3 -23330=4,3,28,28,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Pooling pool4 1 1 conv4_3_relu4_3 pool4 -23330=4,3,14,14,512 1=2 2=2
|
||||
Convolution conv5_1 1 1 pool4 conv5_1_relu5_1 -23330=4,3,14,14,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Convolution conv5_2 1 1 conv5_1_relu5_1 conv5_2_relu5_2 -23330=4,3,14,14,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Convolution conv5_3 1 1 conv5_2_relu5_2 conv5_3_relu5_3 -23330=4,3,14,14,512 0=512 1=3 4=1 5=1 6=2359296 9=1
|
||||
Pooling pool5 1 1 conv5_3_relu5_3 pool5 -23330=4,3,7,7,512 1=2 2=2
|
||||
InnerProduct fc6 1 1 pool5 fc6_drop6 -23330=4,1,4096,1,1 0=4096 1=1 2=102760448 9=1
|
||||
InnerProduct fc7 1 1 fc6_drop6 fc7_drop7 -23330=4,1,4096,1,1 0=4096 1=1 2=16777216 9=1
|
||||
InnerProduct fc8 1 1 fc7_drop7 fc8 -23330=4,1,1000,1,1 0=1000 1=1 2=4096000
|
||||
Softmax prob 1 1 fc8 output -23330=4,1,1000,1,1
|
25
3rdparty/ncnn/benchmark/vgg16_int8.param
vendored
Normal file
25
3rdparty/ncnn/benchmark/vgg16_int8.param
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
7767517
|
||||
23 23
|
||||
Input data 0 1 data 0=224 1=224 2=3
|
||||
Convolution conv1_1 1 1 data conv1_1_relu1_1 0=64 1=3 4=1 5=1 6=1728 8=102 9=1
|
||||
Convolution conv1_2 1 1 conv1_1_relu1_1 conv1_2_relu1_2 0=64 1=3 4=1 5=1 6=36864 8=2 9=1
|
||||
Pooling pool1 1 1 conv1_2_relu1_2 pool1 1=2 2=2
|
||||
Convolution conv2_1 1 1 pool1 conv2_1_relu2_1 0=128 1=3 4=1 5=1 6=73728 8=102 9=1
|
||||
Convolution conv2_2 1 1 conv2_1_relu2_1 conv2_2_relu2_2 0=128 1=3 4=1 5=1 6=147456 8=2 9=1
|
||||
Pooling pool2 1 1 conv2_2_relu2_2 pool2 1=2 2=2
|
||||
Convolution conv3_1 1 1 pool2 conv3_1_relu3_1 0=256 1=3 4=1 5=1 6=294912 8=102 9=1
|
||||
Convolution conv3_2 1 1 conv3_1_relu3_1 conv3_2_relu3_2 0=256 1=3 4=1 5=1 6=589824 8=102 9=1
|
||||
Convolution conv3_3 1 1 conv3_2_relu3_2 conv3_3_relu3_3 0=256 1=3 4=1 5=1 6=589824 8=2 9=1
|
||||
Pooling pool3 1 1 conv3_3_relu3_3 pool3 1=2 2=2
|
||||
Convolution conv4_1 1 1 pool3 conv4_1_relu4_1 0=512 1=3 4=1 5=1 6=1179648 8=102 9=1
|
||||
Convolution conv4_2 1 1 conv4_1_relu4_1 conv4_2_relu4_2 0=512 1=3 4=1 5=1 6=2359296 8=102 9=1
|
||||
Convolution conv4_3 1 1 conv4_2_relu4_2 conv4_3_relu4_3 0=512 1=3 4=1 5=1 6=2359296 8=2 9=1
|
||||
Pooling pool4 1 1 conv4_3_relu4_3 pool4 1=2 2=2
|
||||
Convolution conv5_1 1 1 pool4 conv5_1_relu5_1 0=512 1=3 4=1 5=1 6=2359296 8=102 9=1
|
||||
Convolution conv5_2 1 1 conv5_1_relu5_1 conv5_2_relu5_2 0=512 1=3 4=1 5=1 6=2359296 8=102 9=1
|
||||
Convolution conv5_3 1 1 conv5_2_relu5_2 conv5_3_relu5_3 0=512 1=3 4=1 5=1 6=2359296 8=2 9=1
|
||||
Pooling pool5 1 1 conv5_3_relu5_3 pool5 1=2 2=2
|
||||
InnerProduct fc6 1 1 pool5 fc6_drop6 0=4096 1=1 2=102760448 8=2 9=1
|
||||
InnerProduct fc7 1 1 fc6_drop6 fc7_drop7 0=4096 1=1 2=16777216 8=2 9=1
|
||||
InnerProduct fc8 1 1 fc7_drop7 fc8 0=1000 1=1 2=4096000 8=2
|
||||
Softmax prob 1 1 fc8 output
|
133
3rdparty/ncnn/benchmark/yolo-fastest-1.1.param
vendored
Normal file
133
3rdparty/ncnn/benchmark/yolo-fastest-1.1.param
vendored
Normal file
@ -0,0 +1,133 @@
|
||||
7767517
|
||||
131 154
|
||||
Input data 0 1 data -23330=4,3,320,320,3 0=320 1=320 2=3
|
||||
Convolution 0_22 1 1 data 0_22_bn_leaky -23330=4,3,160,160,8 0=8 1=3 3=2 4=1 5=1 6=216 9=2 -23310=1,1.000000e-01
|
||||
Convolution 1_31 1 1 0_22_bn_leaky 1_31_bn_leaky -23330=4,3,160,160,8 0=8 1=1 5=1 6=64 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 2_39 1 1 1_31_bn_leaky 2_39_bn_leaky -23330=4,3,160,160,8 0=8 1=3 4=1 5=1 6=72 7=8 9=2 -23310=1,1.000000e-01
|
||||
Convolution 3_48 1 1 2_39_bn_leaky 3_48_bn -23330=4,3,160,160,4 0=4 1=1 5=1 6=32
|
||||
Split 3_48_bn_split 1 2 3_48_bn 3_48_bn_split_0 3_48_bn_split_1 -23330=8,3,160,160,4,3,160,160,4
|
||||
Convolution 4_57 1 1 3_48_bn_split_0 4_57_bn_leaky -23330=4,3,160,160,8 0=8 1=1 5=1 6=32 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 5_65 1 1 4_57_bn_leaky 5_65_bn_leaky -23330=4,3,160,160,8 0=8 1=3 4=1 5=1 6=72 7=8 9=2 -23310=1,1.000000e-01
|
||||
Convolution 6_74 1 1 5_65_bn_leaky 6_74_bn -23330=4,3,160,160,4 0=4 1=1 5=1 6=32
|
||||
Eltwise 8_86 2 1 6_74_bn 3_48_bn_split_1 8_86 -23330=4,3,160,160,4 0=1
|
||||
Convolution 9_90 1 1 8_86 9_90_bn_leaky -23330=4,3,160,160,24 0=24 1=1 5=1 6=96 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 10_98 1 1 9_90_bn_leaky 10_98_bn_leaky -23330=4,3,80,80,24 0=24 1=3 3=2 4=1 5=1 6=216 7=24 9=2 -23310=1,1.000000e-01
|
||||
Convolution 11_107 1 1 10_98_bn_leaky 11_107_bn -23330=4,3,80,80,8 0=8 1=1 5=1 6=192
|
||||
Split 11_107_bn_split 1 2 11_107_bn 11_107_bn_split_0 11_107_bn_split_1 -23330=8,3,80,80,8,3,80,80,8
|
||||
Convolution 12_116 1 1 11_107_bn_split_0 12_116_bn_leaky -23330=4,3,80,80,32 0=32 1=1 5=1 6=256 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 13_124 1 1 12_116_bn_leaky 13_124_bn_leaky -23330=4,3,80,80,32 0=32 1=3 4=1 5=1 6=288 7=32 9=2 -23310=1,1.000000e-01
|
||||
Convolution 14_133 1 1 13_124_bn_leaky 14_133_bn -23330=4,3,80,80,8 0=8 1=1 5=1 6=256
|
||||
Eltwise 16_145 2 1 14_133_bn 11_107_bn_split_1 16_145 -23330=4,3,80,80,8 0=1
|
||||
Split 16_145_split 1 2 16_145 16_145_split_0 16_145_split_1 -23330=8,3,80,80,8,3,80,80,8
|
||||
Convolution 17_149 1 1 16_145_split_0 17_149_bn_leaky -23330=4,3,80,80,32 0=32 1=1 5=1 6=256 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 18_157 1 1 17_149_bn_leaky 18_157_bn_leaky -23330=4,3,80,80,32 0=32 1=3 4=1 5=1 6=288 7=32 9=2 -23310=1,1.000000e-01
|
||||
Convolution 19_166 1 1 18_157_bn_leaky 19_166_bn -23330=4,3,80,80,8 0=8 1=1 5=1 6=256
|
||||
Eltwise 21_179 2 1 19_166_bn 16_145_split_1 21_179 -23330=4,3,80,80,8 0=1
|
||||
Convolution 22_183 1 1 21_179 22_183_bn_leaky -23330=4,3,80,80,32 0=32 1=1 5=1 6=256 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 23_191 1 1 22_183_bn_leaky 23_191_bn_leaky -23330=4,3,40,40,32 0=32 1=3 3=2 4=1 5=1 6=288 7=32 9=2 -23310=1,1.000000e-01
|
||||
Convolution 24_200 1 1 23_191_bn_leaky 24_200_bn -23330=4,3,40,40,8 0=8 1=1 5=1 6=256
|
||||
Split 24_200_bn_split 1 2 24_200_bn 24_200_bn_split_0 24_200_bn_split_1 -23330=8,3,40,40,8,3,40,40,8
|
||||
Convolution 25_209 1 1 24_200_bn_split_0 25_209_bn_leaky -23330=4,3,40,40,48 0=48 1=1 5=1 6=384 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 26_217 1 1 25_209_bn_leaky 26_217_bn_leaky -23330=4,3,40,40,48 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01
|
||||
Convolution 27_226 1 1 26_217_bn_leaky 27_226_bn -23330=4,3,40,40,8 0=8 1=1 5=1 6=384
|
||||
Eltwise 29_238 2 1 27_226_bn 24_200_bn_split_1 29_238 -23330=4,3,40,40,8 0=1
|
||||
Split 29_238_split 1 2 29_238 29_238_split_0 29_238_split_1 -23330=8,3,40,40,8,3,40,40,8
|
||||
Convolution 30_242 1 1 29_238_split_0 30_242_bn_leaky -23330=4,3,40,40,48 0=48 1=1 5=1 6=384 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 31_250 1 1 30_242_bn_leaky 31_250_bn_leaky -23330=4,3,40,40,48 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01
|
||||
Convolution 32_259 1 1 31_250_bn_leaky 32_259_bn -23330=4,3,40,40,8 0=8 1=1 5=1 6=384
|
||||
Eltwise 34_273 2 1 32_259_bn 29_238_split_1 34_273 -23330=4,3,40,40,8 0=1
|
||||
Convolution 35_277 1 1 34_273 35_277_bn_leaky -23330=4,3,40,40,48 0=48 1=1 5=1 6=384 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 36_285 1 1 35_277_bn_leaky 36_285_bn_leaky -23330=4,3,40,40,48 0=48 1=3 4=1 5=1 6=432 7=48 9=2 -23310=1,1.000000e-01
|
||||
Convolution 37_294 1 1 36_285_bn_leaky 37_294_bn -23330=4,3,40,40,16 0=16 1=1 5=1 6=768
|
||||
Split 37_294_bn_split 1 2 37_294_bn 37_294_bn_split_0 37_294_bn_split_1 -23330=8,3,40,40,16,3,40,40,16
|
||||
Convolution 38_303 1 1 37_294_bn_split_0 38_303_bn_leaky -23330=4,3,40,40,96 0=96 1=1 5=1 6=1536 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 39_311 1 1 38_303_bn_leaky 39_311_bn_leaky -23330=4,3,40,40,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution 40_320 1 1 39_311_bn_leaky 40_320_bn -23330=4,3,40,40,16 0=16 1=1 5=1 6=1536
|
||||
Eltwise 42_332 2 1 40_320_bn 37_294_bn_split_1 42_332 -23330=4,3,40,40,16 0=1
|
||||
Split 42_332_split 1 2 42_332 42_332_split_0 42_332_split_1 -23330=8,3,40,40,16,3,40,40,16
|
||||
Convolution 43_336 1 1 42_332_split_0 43_336_bn_leaky -23330=4,3,40,40,96 0=96 1=1 5=1 6=1536 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 44_344 1 1 43_336_bn_leaky 44_344_bn_leaky -23330=4,3,40,40,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution 45_353 1 1 44_344_bn_leaky 45_353_bn -23330=4,3,40,40,16 0=16 1=1 5=1 6=1536
|
||||
Eltwise 47_365 2 1 45_353_bn 42_332_split_1 47_365 -23330=4,3,40,40,16 0=1
|
||||
Split 47_365_split 1 2 47_365 47_365_split_0 47_365_split_1 -23330=8,3,40,40,16,3,40,40,16
|
||||
Convolution 48_369 1 1 47_365_split_0 48_369_bn_leaky -23330=4,3,40,40,96 0=96 1=1 5=1 6=1536 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 49_377 1 1 48_369_bn_leaky 49_377_bn_leaky -23330=4,3,40,40,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution 50_386 1 1 49_377_bn_leaky 50_386_bn -23330=4,3,40,40,16 0=16 1=1 5=1 6=1536
|
||||
Eltwise 52_399 2 1 50_386_bn 47_365_split_1 52_399 -23330=4,3,40,40,16 0=1
|
||||
Split 52_399_split 1 2 52_399 52_399_split_0 52_399_split_1 -23330=8,3,40,40,16,3,40,40,16
|
||||
Convolution 53_403 1 1 52_399_split_0 53_403_bn_leaky -23330=4,3,40,40,96 0=96 1=1 5=1 6=1536 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 54_411 1 1 53_403_bn_leaky 54_411_bn_leaky -23330=4,3,40,40,96 0=96 1=3 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution 55_420 1 1 54_411_bn_leaky 55_420_bn -23330=4,3,40,40,16 0=16 1=1 5=1 6=1536
|
||||
Eltwise 57_433 2 1 55_420_bn 52_399_split_1 57_433 -23330=4,3,40,40,16 0=1
|
||||
Convolution 58_437 1 1 57_433 58_437_bn_leaky -23330=4,3,40,40,96 0=96 1=1 5=1 6=1536 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 59_445 1 1 58_437_bn_leaky 59_445_bn_leaky -23330=4,3,20,20,96 0=96 1=3 3=2 4=1 5=1 6=864 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution 60_454 1 1 59_445_bn_leaky 60_454_bn -23330=4,3,20,20,24 0=24 1=1 5=1 6=2304
|
||||
Split 60_454_bn_split 1 2 60_454_bn 60_454_bn_split_0 60_454_bn_split_1 -23330=8,3,20,20,24,3,20,20,24
|
||||
Convolution 61_463 1 1 60_454_bn_split_0 61_463_bn_leaky -23330=4,3,20,20,136 0=136 1=1 5=1 6=3264 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 62_471 1 1 61_463_bn_leaky 62_471_bn_leaky -23330=4,3,20,20,136 0=136 1=3 4=1 5=1 6=1224 7=136 9=2 -23310=1,1.000000e-01
|
||||
Convolution 63_480 1 1 62_471_bn_leaky 63_480_bn -23330=4,3,20,20,24 0=24 1=1 5=1 6=3264
|
||||
Eltwise 65_492 2 1 63_480_bn 60_454_bn_split_1 65_492 -23330=4,3,20,20,24 0=1
|
||||
Split 65_492_split 1 2 65_492 65_492_split_0 65_492_split_1 -23330=8,3,20,20,24,3,20,20,24
|
||||
Convolution 66_496 1 1 65_492_split_0 66_496_bn_leaky -23330=4,3,20,20,136 0=136 1=1 5=1 6=3264 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 67_504 1 1 66_496_bn_leaky 67_504_bn_leaky -23330=4,3,20,20,136 0=136 1=3 4=1 5=1 6=1224 7=136 9=2 -23310=1,1.000000e-01
|
||||
Convolution 68_513 1 1 67_504_bn_leaky 68_513_bn -23330=4,3,20,20,24 0=24 1=1 5=1 6=3264
|
||||
Eltwise 70_526 2 1 68_513_bn 65_492_split_1 70_526 -23330=4,3,20,20,24 0=1
|
||||
Split 70_526_split 1 2 70_526 70_526_split_0 70_526_split_1 -23330=8,3,20,20,24,3,20,20,24
|
||||
Convolution 71_530 1 1 70_526_split_0 71_530_bn_leaky -23330=4,3,20,20,136 0=136 1=1 5=1 6=3264 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 72_538 1 1 71_530_bn_leaky 72_538_bn_leaky -23330=4,3,20,20,136 0=136 1=3 4=1 5=1 6=1224 7=136 9=2 -23310=1,1.000000e-01
|
||||
Convolution 73_547 1 1 72_538_bn_leaky 73_547_bn -23330=4,3,20,20,24 0=24 1=1 5=1 6=3264
|
||||
Eltwise 75_559 2 1 73_547_bn 70_526_split_1 75_559 -23330=4,3,20,20,24 0=1
|
||||
Split 75_559_split 1 2 75_559 75_559_split_0 75_559_split_1 -23330=8,3,20,20,24,3,20,20,24
|
||||
Convolution 76_563 1 1 75_559_split_0 76_563_bn_leaky -23330=4,3,20,20,136 0=136 1=1 5=1 6=3264 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 77_571 1 1 76_563_bn_leaky 77_571_bn_leaky -23330=4,3,20,20,136 0=136 1=3 4=1 5=1 6=1224 7=136 9=2 -23310=1,1.000000e-01
|
||||
Convolution 78_580 1 1 77_571_bn_leaky 78_580_bn -23330=4,3,20,20,24 0=24 1=1 5=1 6=3264
|
||||
Eltwise 80_593 2 1 78_580_bn 75_559_split_1 80_593 -23330=4,3,20,20,24 0=1
|
||||
Split 80_593_split 1 2 80_593 80_593_split_0 80_593_split_1 -23330=8,3,20,20,24,3,20,20,24
|
||||
Convolution 81_597 1 1 80_593_split_0 81_597_bn_leaky -23330=4,3,20,20,136 0=136 1=1 5=1 6=3264 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 82_605 1 1 81_597_bn_leaky 82_605_bn_leaky -23330=4,3,10,10,136 0=136 1=3 3=2 4=1 5=1 6=1224 7=136 9=2 -23310=1,1.000000e-01
|
||||
Convolution 83_615 1 1 82_605_bn_leaky 83_615_bn -23330=4,3,10,10,48 0=48 1=1 5=1 6=6528
|
||||
Split 83_615_bn_split 1 2 83_615_bn 83_615_bn_split_0 83_615_bn_split_1 -23330=8,3,10,10,48,3,10,10,48
|
||||
Convolution 84_624 1 1 83_615_bn_split_0 84_624_bn_leaky -23330=4,3,10,10,224 0=224 1=1 5=1 6=10752 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 85_632 1 1 84_624_bn_leaky 85_632_bn_leaky -23330=4,3,10,10,224 0=224 1=3 4=1 5=1 6=2016 7=224 9=2 -23310=1,1.000000e-01
|
||||
Convolution 86_641 1 1 85_632_bn_leaky 86_641_bn -23330=4,3,10,10,48 0=48 1=1 5=1 6=10752
|
||||
Eltwise 88_653 2 1 86_641_bn 83_615_bn_split_1 88_653 -23330=4,3,10,10,48 0=1
|
||||
Split 88_653_split 1 2 88_653 88_653_split_0 88_653_split_1 -23330=8,3,10,10,48,3,10,10,48
|
||||
Convolution 89_657 1 1 88_653_split_0 89_657_bn_leaky -23330=4,3,10,10,224 0=224 1=1 5=1 6=10752 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 90_665 1 1 89_657_bn_leaky 90_665_bn_leaky -23330=4,3,10,10,224 0=224 1=3 4=1 5=1 6=2016 7=224 9=2 -23310=1,1.000000e-01
|
||||
Convolution 91_674 1 1 90_665_bn_leaky 91_674_bn -23330=4,3,10,10,48 0=48 1=1 5=1 6=10752
|
||||
Eltwise 93_686 2 1 91_674_bn 88_653_split_1 93_686 -23330=4,3,10,10,48 0=1
|
||||
Split 93_686_split 1 2 93_686 93_686_split_0 93_686_split_1 -23330=8,3,10,10,48,3,10,10,48
|
||||
Convolution 94_690 1 1 93_686_split_0 94_690_bn_leaky -23330=4,3,10,10,224 0=224 1=1 5=1 6=10752 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 95_698 1 1 94_690_bn_leaky 95_698_bn_leaky -23330=4,3,10,10,224 0=224 1=3 4=1 5=1 6=2016 7=224 9=2 -23310=1,1.000000e-01
|
||||
Convolution 96_707 1 1 95_698_bn_leaky 96_707_bn -23330=4,3,10,10,48 0=48 1=1 5=1 6=10752
|
||||
Eltwise 98_719 2 1 96_707_bn 93_686_split_1 98_719 -23330=4,3,10,10,48 0=1
|
||||
Split 98_719_split 1 2 98_719 98_719_split_0 98_719_split_1 -23330=8,3,10,10,48,3,10,10,48
|
||||
Convolution 99_723 1 1 98_719_split_0 99_723_bn_leaky -23330=4,3,10,10,224 0=224 1=1 5=1 6=10752 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 100_731 1 1 99_723_bn_leaky 100_731_bn_leaky -23330=4,3,10,10,224 0=224 1=3 4=1 5=1 6=2016 7=224 9=2 -23310=1,1.000000e-01
|
||||
Convolution 101_740 1 1 100_731_bn_leaky 101_740_bn -23330=4,3,10,10,48 0=48 1=1 5=1 6=10752
|
||||
Eltwise 103_752 2 1 101_740_bn 98_719_split_1 103_752 -23330=4,3,10,10,48 0=1
|
||||
Split 103_752_split 1 2 103_752 103_752_split_0 103_752_split_1 -23330=8,3,10,10,48,3,10,10,48
|
||||
Convolution 104_756 1 1 103_752_split_0 104_756_bn_leaky -23330=4,3,10,10,224 0=224 1=1 5=1 6=10752 9=2 -23310=1,1.000000e-01
|
||||
ConvolutionDepthWise 105_764 1 1 104_756_bn_leaky 105_764_bn_leaky -23330=4,3,10,10,224 0=224 1=3 4=1 5=1 6=2016 7=224 9=2 -23310=1,1.000000e-01
|
||||
Convolution 106_773 1 1 105_764_bn_leaky 106_773_bn -23330=4,3,10,10,48 0=48 1=1 5=1 6=10752
|
||||
Eltwise 108_784 2 1 106_773_bn 103_752_split_1 108_784 -23330=4,3,10,10,48 0=1
|
||||
Split 108_784_split 1 4 108_784 108_784_split_0 108_784_split_1 108_784_split_2 108_784_split_3 -23330=16,3,10,10,48,3,10,10,48,3,10,10,48,3,10,10,48
|
||||
Pooling 109_788 1 1 108_784_split_0 109_788 -23330=4,3,10,10,48 1=3 3=1 5=1
|
||||
Pooling 111_795 1 1 108_784_split_1 111_795 -23330=4,3,10,10,48 1=5 3=2 5=1
|
||||
Pooling 113_802 1 1 108_784_split_2 113_802 -23330=4,3,10,10,48 1=9 3=4 5=1
|
||||
Concat 114_806 4 1 113_802 111_795 109_788 108_784_split_3 114_806 -23330=4,3,10,10,192
|
||||
Convolution 115_811 1 1 114_806 115_811_bn_leaky -23330=4,3,10,10,96 0=96 1=1 5=1 6=18432 9=2 -23310=1,1.000000e-01
|
||||
Split 115_811_bn_leaky_split 1 2 115_811_bn_leaky 115_811_bn_leaky_split_0 115_811_bn_leaky_split_1 -23330=8,3,10,10,96,3,10,10,96
|
||||
ConvolutionDepthWise 116_819 1 1 115_811_bn_leaky_split_0 116_819_bn_leaky -23330=4,3,10,10,96 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution 117_828 1 1 116_819_bn_leaky 117_828_bn -23330=4,3,10,10,96 0=96 1=1 5=1 6=9216
|
||||
ConvolutionDepthWise 118_836 1 1 117_828_bn 118_836_bn_leaky -23330=4,3,10,10,96 0=96 1=5 4=2 5=1 6=2400 7=96 9=2 -23310=1,1.000000e-01
|
||||
Convolution 119_845 1 1 118_836_bn_leaky 119_845_bn -23330=4,3,10,10,96 0=96 1=1 5=1 6=9216
|
||||
Convolution 120_854 1 1 119_845_bn 120_854 -23330=4,3,10,10,255 0=255 1=1 5=1 6=24480
|
||||
Interp 123_882 1 1 115_811_bn_leaky_split_1 123_882 -23330=4,3,20,20,96 0=1 1=2.000000e+00 2=2.000000e+00
|
||||
Concat 124_885 2 1 123_882 80_593_split_1 124_885 -23330=4,3,20,20,120
|
||||
ConvolutionDepthWise 125_888 1 1 124_885 125_888_bn_leaky -23330=4,3,20,20,120 0=120 1=5 4=2 5=1 6=3000 7=120 9=2 -23310=1,1.000000e-01
|
||||
Convolution 126_897 1 1 125_888_bn_leaky 126_897_bn -23330=4,3,20,20,120 0=120 1=1 5=1 6=14400
|
||||
ConvolutionDepthWise 127_905 1 1 126_897_bn 127_905_bn_leaky -23330=4,3,20,20,120 0=120 1=5 4=2 5=1 6=3000 7=120 9=2 -23310=1,1.000000e-01
|
||||
Convolution 128_914 1 1 127_905_bn_leaky 128_914_bn -23330=4,3,20,20,120 0=120 1=1 5=1 6=14400
|
||||
Convolution 129_922 1 1 128_914_bn 129_922 -23330=4,3,20,20,255 0=255 1=1 5=1 6=30600
|
||||
Yolov3DetectionOutput detection_out 2 1 120_854 129_922 output -23330=4,2,6,1431,1 0=80 1=3 2=5.500000e-01 -23304=12,1.200000e+01,1.800000e+01,3.700000e+01,4.900000e+01,5.200000e+01,1.320000e+02,1.150000e+02,7.300000e+01,1.190000e+02,1.990000e+02,2.420000e+02,2.380000e+02 -23305=6,1077936128,1082130432,1084227584,0,1065353216,1073741824 -23306=2,3.200000e+01,1.600000e+01
|
146
3rdparty/ncnn/benchmark/yolo-fastestv2.param
vendored
Normal file
146
3rdparty/ncnn/benchmark/yolo-fastestv2.param
vendored
Normal file
@ -0,0 +1,146 @@
|
||||
7767517
|
||||
144 166
|
||||
Input input.1 0 1 input.1 -23330=4,3,352,352,3 0=352 1=352 2=3
|
||||
Convolution Conv_0 1 1 input.1 447 -23330=4,3,176,176,24 0=24 1=3 3=2 4=1 5=1 6=648 9=1
|
||||
Pooling MaxPool_2 1 1 447 448 -23330=4,3,88,88,24 1=3 2=2 3=1 5=1
|
||||
Split splitncnn_0 1 2 448 448_splitncnn_0 448_splitncnn_1 -23330=8,3,88,88,24,3,88,88,24
|
||||
ConvolutionDepthWise Conv_3 1 1 448_splitncnn_1 800 -23330=4,3,44,44,24 0=24 1=3 3=2 4=1 5=1 6=216 7=24
|
||||
Convolution Conv_4 1 1 800 453 -23330=4,3,44,44,24 0=24 1=1 5=1 6=576 9=1
|
||||
Convolution Conv_6 1 1 448_splitncnn_0 456 -23330=4,3,88,88,24 0=24 1=1 5=1 6=576 9=1
|
||||
ConvolutionDepthWise Conv_8 1 1 456 809 -23330=4,3,44,44,24 0=24 1=3 3=2 4=1 5=1 6=216 7=24
|
||||
Convolution Conv_9 1 1 809 461 -23330=4,3,44,44,24 0=24 1=1 5=1 6=576 9=1
|
||||
Concat Concat_11 2 1 453 461 462 -23330=4,3,44,44,48
|
||||
ShuffleChannel Reshape_16 1 1 462 467 -23330=4,3,44,44,48 0=2 1=1
|
||||
Slice Gather_20 1 2 467 469 471 -23330=8,3,44,44,24,3,44,44,24 -23300=2,-233,-233
|
||||
Convolution Conv_21 1 1 471 474 -23330=4,3,44,44,24 0=24 1=1 5=1 6=576 9=1
|
||||
ConvolutionDepthWise Conv_23 1 1 474 818 -23330=4,3,44,44,24 0=24 1=3 4=1 5=1 6=216 7=24
|
||||
Convolution Conv_24 1 1 818 479 -23330=4,3,44,44,24 0=24 1=1 5=1 6=576 9=1
|
||||
Concat Concat_26 2 1 469 479 480 -23330=4,3,44,44,48
|
||||
ShuffleChannel Reshape_31 1 1 480 485 -23330=4,3,44,44,48 0=2 1=1
|
||||
Slice Gather_35 1 2 485 487 489 -23330=8,3,44,44,24,3,44,44,24 -23300=2,-233,-233
|
||||
Convolution Conv_36 1 1 489 492 -23330=4,3,44,44,24 0=24 1=1 5=1 6=576 9=1
|
||||
ConvolutionDepthWise Conv_38 1 1 492 827 -23330=4,3,44,44,24 0=24 1=3 4=1 5=1 6=216 7=24
|
||||
Convolution Conv_39 1 1 827 497 -23330=4,3,44,44,24 0=24 1=1 5=1 6=576 9=1
|
||||
Concat Concat_41 2 1 487 497 498 -23330=4,3,44,44,48
|
||||
ShuffleChannel Reshape_46 1 1 498 503 -23330=4,3,44,44,48 0=2 1=1
|
||||
Slice Gather_50 1 2 503 505 507 -23330=8,3,44,44,24,3,44,44,24 -23300=2,-233,-233
|
||||
Convolution Conv_51 1 1 507 510 -23330=4,3,44,44,24 0=24 1=1 5=1 6=576 9=1
|
||||
ConvolutionDepthWise Conv_53 1 1 510 836 -23330=4,3,44,44,24 0=24 1=3 4=1 5=1 6=216 7=24
|
||||
Convolution Conv_54 1 1 836 515 -23330=4,3,44,44,24 0=24 1=1 5=1 6=576 9=1
|
||||
Concat Concat_56 2 1 505 515 516 -23330=4,3,44,44,48
|
||||
Split splitncnn_1 1 2 516 516_splitncnn_0 516_splitncnn_1 -23330=8,3,44,44,48,3,44,44,48
|
||||
ConvolutionDepthWise Conv_57 1 1 516_splitncnn_1 842 -23330=4,3,22,22,48 0=48 1=3 3=2 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_58 1 1 842 521 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Convolution Conv_60 1 1 516_splitncnn_0 524 -23330=4,3,44,44,48 0=48 1=1 5=1 6=2304 9=1
|
||||
ConvolutionDepthWise Conv_62 1 1 524 851 -23330=4,3,22,22,48 0=48 1=3 3=2 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_63 1 1 851 529 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Concat Concat_65 2 1 521 529 530 -23330=4,3,22,22,96
|
||||
ShuffleChannel Reshape_70 1 1 530 535 -23330=4,3,22,22,96 0=2 1=1
|
||||
Slice Gather_74 1 2 535 537 539 -23330=8,3,22,22,48,3,22,22,48 -23300=2,-233,-233
|
||||
Convolution Conv_75 1 1 539 542 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
ConvolutionDepthWise Conv_77 1 1 542 860 -23330=4,3,22,22,48 0=48 1=3 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_78 1 1 860 547 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Concat Concat_80 2 1 537 547 548 -23330=4,3,22,22,96
|
||||
ShuffleChannel Reshape_85 1 1 548 553 -23330=4,3,22,22,96 0=2 1=1
|
||||
Slice Gather_89 1 2 553 555 557 -23330=8,3,22,22,48,3,22,22,48 -23300=2,-233,-233
|
||||
Convolution Conv_90 1 1 557 560 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
ConvolutionDepthWise Conv_92 1 1 560 869 -23330=4,3,22,22,48 0=48 1=3 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_93 1 1 869 565 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Concat Concat_95 2 1 555 565 566 -23330=4,3,22,22,96
|
||||
ShuffleChannel Reshape_100 1 1 566 571 -23330=4,3,22,22,96 0=2 1=1
|
||||
Slice Gather_104 1 2 571 573 575 -23330=8,3,22,22,48,3,22,22,48 -23300=2,-233,-233
|
||||
Convolution Conv_105 1 1 575 578 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
ConvolutionDepthWise Conv_107 1 1 578 878 -23330=4,3,22,22,48 0=48 1=3 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_108 1 1 878 583 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Concat Concat_110 2 1 573 583 584 -23330=4,3,22,22,96
|
||||
ShuffleChannel Reshape_115 1 1 584 589 -23330=4,3,22,22,96 0=2 1=1
|
||||
Slice Gather_119 1 2 589 591 593 -23330=8,3,22,22,48,3,22,22,48 -23300=2,-233,-233
|
||||
Convolution Conv_120 1 1 593 596 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
ConvolutionDepthWise Conv_122 1 1 596 887 -23330=4,3,22,22,48 0=48 1=3 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_123 1 1 887 601 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Concat Concat_125 2 1 591 601 602 -23330=4,3,22,22,96
|
||||
ShuffleChannel Reshape_130 1 1 602 607 -23330=4,3,22,22,96 0=2 1=1
|
||||
Slice Gather_134 1 2 607 609 611 -23330=8,3,22,22,48,3,22,22,48 -23300=2,-233,-233
|
||||
Convolution Conv_135 1 1 611 614 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
ConvolutionDepthWise Conv_137 1 1 614 896 -23330=4,3,22,22,48 0=48 1=3 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_138 1 1 896 619 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Concat Concat_140 2 1 609 619 620 -23330=4,3,22,22,96
|
||||
ShuffleChannel Reshape_145 1 1 620 625 -23330=4,3,22,22,96 0=2 1=1
|
||||
Slice Gather_149 1 2 625 627 629 -23330=8,3,22,22,48,3,22,22,48 -23300=2,-233,-233
|
||||
Convolution Conv_150 1 1 629 632 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
ConvolutionDepthWise Conv_152 1 1 632 905 -23330=4,3,22,22,48 0=48 1=3 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_153 1 1 905 637 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Concat Concat_155 2 1 627 637 638 -23330=4,3,22,22,96
|
||||
ShuffleChannel Reshape_160 1 1 638 643 -23330=4,3,22,22,96 0=2 1=1
|
||||
Slice Gather_164 1 2 643 645 647 -23330=8,3,22,22,48,3,22,22,48 -23300=2,-233,-233
|
||||
Convolution Conv_165 1 1 647 650 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
ConvolutionDepthWise Conv_167 1 1 650 914 -23330=4,3,22,22,48 0=48 1=3 4=1 5=1 6=432 7=48
|
||||
Convolution Conv_168 1 1 914 655 -23330=4,3,22,22,48 0=48 1=1 5=1 6=2304 9=1
|
||||
Concat Concat_170 2 1 645 655 656 -23330=4,3,22,22,96
|
||||
Split splitncnn_2 1 3 656 656_splitncnn_0 656_splitncnn_1 656_splitncnn_2 -23330=12,3,22,22,96,3,22,22,96,3,22,22,96
|
||||
ConvolutionDepthWise Conv_171 1 1 656_splitncnn_2 920 -23330=4,3,11,11,96 0=96 1=3 3=2 4=1 5=1 6=864 7=96
|
||||
Convolution Conv_172 1 1 920 661 -23330=4,3,11,11,96 0=96 1=1 5=1 6=9216 9=1
|
||||
Convolution Conv_174 1 1 656_splitncnn_1 664 -23330=4,3,22,22,96 0=96 1=1 5=1 6=9216 9=1
|
||||
ConvolutionDepthWise Conv_176 1 1 664 929 -23330=4,3,11,11,96 0=96 1=3 3=2 4=1 5=1 6=864 7=96
|
||||
Convolution Conv_177 1 1 929 669 -23330=4,3,11,11,96 0=96 1=1 5=1 6=9216 9=1
|
||||
Concat Concat_179 2 1 661 669 670 -23330=4,3,11,11,192
|
||||
ShuffleChannel Reshape_184 1 1 670 675 -23330=4,3,11,11,192 0=2 1=1
|
||||
Slice Gather_188 1 2 675 677 679 -23330=8,3,11,11,96,3,11,11,96 -23300=2,-233,-233
|
||||
Convolution Conv_189 1 1 679 682 -23330=4,3,11,11,96 0=96 1=1 5=1 6=9216 9=1
|
||||
ConvolutionDepthWise Conv_191 1 1 682 938 -23330=4,3,11,11,96 0=96 1=3 4=1 5=1 6=864 7=96
|
||||
Convolution Conv_192 1 1 938 687 -23330=4,3,11,11,96 0=96 1=1 5=1 6=9216 9=1
|
||||
Concat Concat_194 2 1 677 687 688 -23330=4,3,11,11,192
|
||||
ShuffleChannel Reshape_199 1 1 688 693 -23330=4,3,11,11,192 0=2 1=1
|
||||
Slice Gather_203 1 2 693 695 697 -23330=8,3,11,11,96,3,11,11,96 -23300=2,-233,-233
|
||||
Convolution Conv_204 1 1 697 700 -23330=4,3,11,11,96 0=96 1=1 5=1 6=9216 9=1
|
||||
ConvolutionDepthWise Conv_206 1 1 700 947 -23330=4,3,11,11,96 0=96 1=3 4=1 5=1 6=864 7=96
|
||||
Convolution Conv_207 1 1 947 705 -23330=4,3,11,11,96 0=96 1=1 5=1 6=9216 9=1
|
||||
Concat Concat_209 2 1 695 705 706 -23330=4,3,11,11,192
|
||||
ShuffleChannel Reshape_214 1 1 706 711 -23330=4,3,11,11,192 0=2 1=1
|
||||
Slice Gather_218 1 2 711 713 715 -23330=8,3,11,11,96,3,11,11,96 -23300=2,-233,-233
|
||||
Convolution Conv_219 1 1 715 718 -23330=4,3,11,11,96 0=96 1=1 5=1 6=9216 9=1
|
||||
ConvolutionDepthWise Conv_221 1 1 718 956 -23330=4,3,11,11,96 0=96 1=3 4=1 5=1 6=864 7=96
|
||||
Convolution Conv_222 1 1 956 723 -23330=4,3,11,11,96 0=96 1=1 5=1 6=9216 9=1
|
||||
Concat Concat_224 2 1 713 723 724 -23330=4,3,11,11,192
|
||||
Split splitncnn_3 1 2 724 724_splitncnn_0 724_splitncnn_1 -23330=8,3,11,11,192,3,11,11,192
|
||||
Convolution Conv_225 1 1 724_splitncnn_1 727 -23330=4,3,11,11,72 0=72 1=1 5=1 6=13824 9=1
|
||||
Split splitncnn_4 1 2 727 727_splitncnn_0 727_splitncnn_1 -23330=8,3,11,11,72,3,11,11,72
|
||||
ConvolutionDepthWise Conv_227 1 1 727_splitncnn_1 730 -23330=4,3,11,11,72 0=72 1=5 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution Conv_229 1 1 730 968 -23330=4,3,11,11,72 0=72 1=1 5=1 6=5184
|
||||
ConvolutionDepthWise Conv_230 1 1 968 735 -23330=4,3,11,11,72 0=72 1=5 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution Conv_232 1 1 735 974 -23330=4,3,11,11,72 0=72 1=1 5=1 6=5184
|
||||
Split splitncnn_5 1 2 974 974_splitncnn_0 974_splitncnn_1 -23330=8,3,11,11,72,3,11,11,72
|
||||
ConvolutionDepthWise Conv_233 1 1 727_splitncnn_0 740 -23330=4,3,11,11,72 0=72 1=5 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution Conv_235 1 1 740 980 -23330=4,3,11,11,72 0=72 1=1 5=1 6=5184
|
||||
ConvolutionDepthWise Conv_236 1 1 980 745 -23330=4,3,11,11,72 0=72 1=5 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution Conv_238 1 1 745 986 -23330=4,3,11,11,72 0=72 1=1 5=1 6=5184
|
||||
Interp Resize_240 1 1 724_splitncnn_0 752 -23330=4,3,22,22,192 0=1 1=2.000000e+00 2=2.000000e+00
|
||||
Concat Concat_241 2 1 752 656_splitncnn_0 753 -23330=4,3,22,22,288
|
||||
Convolution Conv_242 1 1 753 756 -23330=4,3,22,22,72 0=72 1=1 5=1 6=20736 9=1
|
||||
Split splitncnn_6 1 2 756 756_splitncnn_0 756_splitncnn_1 -23330=8,3,22,22,72,3,22,22,72
|
||||
ConvolutionDepthWise Conv_244 1 1 756_splitncnn_1 759 -23330=4,3,22,22,72 0=72 1=5 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution Conv_246 1 1 759 995 -23330=4,3,22,22,72 0=72 1=1 5=1 6=5184
|
||||
ConvolutionDepthWise Conv_247 1 1 995 764 -23330=4,3,22,22,72 0=72 1=5 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution Conv_249 1 1 764 1001 -23330=4,3,22,22,72 0=72 1=1 5=1 6=5184
|
||||
Split splitncnn_7 1 2 1001 1001_splitncnn_0 1001_splitncnn_1 -23330=8,3,22,22,72,3,22,22,72
|
||||
ConvolutionDepthWise Conv_250 1 1 756_splitncnn_0 769 -23330=4,3,22,22,72 0=72 1=5 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution Conv_252 1 1 769 1007 -23330=4,3,22,22,72 0=72 1=1 5=1 6=5184
|
||||
ConvolutionDepthWise Conv_253 1 1 1007 774 -23330=4,3,22,22,72 0=72 1=5 4=2 5=1 6=1800 7=72 9=1
|
||||
Convolution Conv_255 1 1 774 1013 -23330=4,3,22,22,72 0=72 1=1 5=1 6=5184
|
||||
Convolution Conv_256 1 1 1013 783 -23330=4,3,22,22,12 0=12 1=1 5=1 6=864 9=4
|
||||
Convolution Conv_257 1 1 1001_splitncnn_1 784 -23330=4,3,22,22,3 0=3 1=1 5=1 6=216 9=4
|
||||
Convolution Conv_258 1 1 1001_splitncnn_0 779 -23330=4,3,22,22,80 0=80 1=1 5=1 6=5760
|
||||
Convolution Conv_259 1 1 986 788 -23330=4,3,11,11,12 0=12 1=1 5=1 6=864 9=4
|
||||
Convolution Conv_260 1 1 974_splitncnn_1 789 -23330=4,3,11,11,3 0=3 1=1 5=1 6=216 9=4
|
||||
Convolution Conv_261 1 1 974_splitncnn_0 782 -23330=4,3,11,11,80 0=80 1=1 5=1 6=5760
|
||||
Permute Transpose_264 1 1 779 785 -23330=4,3,80,22,22 0=5
|
||||
Softmax Softmax_265 1 1 785 786 -23330=4,3,80,22,22 0=2 1=1
|
||||
Permute Transpose_266 1 1 786 787 -23330=4,3,22,22,80 0=5
|
||||
Permute Transpose_269 1 1 782 790 -23330=4,3,80,11,11 0=5
|
||||
Softmax Softmax_270 1 1 790 791 -23330=4,3,80,11,11 0=2 1=1
|
||||
Permute Transpose_271 1 1 791 792 -23330=4,3,11,11,80 0=5
|
||||
Concat Concat_272 3 1 783 784 787 793 -23330=4,3,22,22,95
|
||||
Permute Transpose_273 1 1 793 794 -23330=4,3,95,22,22 0=3
|
||||
Concat Concat_274 3 1 788 789 792 795 -23330=4,3,11,11,95
|
||||
Permute Transpose_275 1 1 795 796 -23330=4,3,95,11,11 0=3
|
||||
Noop output 2 1 794 796 output
|
47
3rdparty/ncnn/benchmark/yolov4-tiny.param
vendored
Normal file
47
3rdparty/ncnn/benchmark/yolov4-tiny.param
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
7767517
|
||||
45 53
|
||||
Input data 0 1 data -23330=4,3,416,416,3 0=416 1=416 2=3
|
||||
Convolution 0_25 1 1 data 0_25_bn_leaky -23330=4,3,208,208,32 0=32 1=3 3=2 4=1 5=1 6=864 9=2 -23310=1,1.000000e-01
|
||||
Convolution 1_33 1 1 0_25_bn_leaky 1_33_bn_leaky -23330=4,3,104,104,64 0=64 1=3 3=2 4=1 5=1 6=18432 9=2 -23310=1,1.000000e-01
|
||||
Convolution 2_41 1 1 1_33_bn_leaky 2_41_bn_leaky -23330=4,3,104,104,64 0=64 1=3 4=1 5=1 6=36864 9=2 -23310=1,1.000000e-01
|
||||
Split 2_41_bn_leaky_split 1 2 2_41_bn_leaky 2_41_bn_leaky_split_0 2_41_bn_leaky_split_1 -23330=8,3,104,104,64,3,104,104,64
|
||||
Crop 3_49 1 1 2_41_bn_leaky_split_0 3_49 -23330=4,3,104,104,32 2=32 3=104 4=104 5=32
|
||||
Convolution 4_54 1 1 3_49 4_54_bn_leaky -23330=4,3,104,104,32 0=32 1=3 4=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
|
||||
Split 4_54_bn_leaky_split 1 2 4_54_bn_leaky 4_54_bn_leaky_split_0 4_54_bn_leaky_split_1 -23330=8,3,104,104,32,3,104,104,32
|
||||
Convolution 5_62 1 1 4_54_bn_leaky_split_0 5_62_bn_leaky -23330=4,3,104,104,32 0=32 1=3 4=1 5=1 6=9216 9=2 -23310=1,1.000000e-01
|
||||
Concat 6_70 2 1 5_62_bn_leaky 4_54_bn_leaky_split_1 6_70 -23330=4,3,104,104,64
|
||||
Convolution 7_73 1 1 6_70 7_73_bn_leaky -23330=4,3,104,104,64 0=64 1=1 5=1 6=4096 9=2 -23310=1,1.000000e-01
|
||||
Concat 8_81 2 1 2_41_bn_leaky_split_1 7_73_bn_leaky 8_81 -23330=4,3,104,104,128
|
||||
Pooling 9_84 1 1 8_81 9_84 -23330=4,3,52,52,128 1=2 2=2 14=1 15=1 5=1
|
||||
Convolution 10_88 1 1 9_84 10_88_bn_leaky -23330=4,3,52,52,128 0=128 1=3 4=1 5=1 6=147456 9=2 -23310=1,1.000000e-01
|
||||
Split 10_88_bn_leaky_split 1 2 10_88_bn_leaky 10_88_bn_leaky_split_0 10_88_bn_leaky_split_1 -23330=8,3,52,52,128,3,52,52,128
|
||||
Crop 11_96 1 1 10_88_bn_leaky_split_0 11_96 -23330=4,3,52,52,64 2=64 3=52 4=52 5=64
|
||||
Convolution 12_101 1 1 11_96 12_101_bn_leaky -23330=4,3,52,52,64 0=64 1=3 4=1 5=1 6=36864 9=2 -23310=1,1.000000e-01
|
||||
Split 12_101_bn_leaky_split 1 2 12_101_bn_leaky 12_101_bn_leaky_split_0 12_101_bn_leaky_split_1 -23330=8,3,52,52,64,3,52,52,64
|
||||
Convolution 13_109 1 1 12_101_bn_leaky_split_0 13_109_bn_leaky -23330=4,3,52,52,64 0=64 1=3 4=1 5=1 6=36864 9=2 -23310=1,1.000000e-01
|
||||
Concat 14_117 2 1 13_109_bn_leaky 12_101_bn_leaky_split_1 14_117 -23330=4,3,52,52,128
|
||||
Convolution 15_120 1 1 14_117 15_120_bn_leaky -23330=4,3,52,52,128 0=128 1=1 5=1 6=16384 9=2 -23310=1,1.000000e-01
|
||||
Concat 16_128 2 1 10_88_bn_leaky_split_1 15_120_bn_leaky 16_128 -23330=4,3,52,52,256
|
||||
Pooling 17_131 1 1 16_128 17_131 -23330=4,3,26,26,256 1=2 2=2 14=1 15=1 5=1
|
||||
Convolution 18_135 1 1 17_131 18_135_bn_leaky -23330=4,3,26,26,256 0=256 1=3 4=1 5=1 6=589824 9=2 -23310=1,1.000000e-01
|
||||
Split 18_135_bn_leaky_split 1 2 18_135_bn_leaky 18_135_bn_leaky_split_0 18_135_bn_leaky_split_1 -23330=8,3,26,26,256,3,26,26,256
|
||||
Crop 19_143 1 1 18_135_bn_leaky_split_0 19_143 -23330=4,3,26,26,128 2=128 3=26 4=26 5=128
|
||||
Convolution 20_148 1 1 19_143 20_148_bn_leaky -23330=4,3,26,26,128 0=128 1=3 4=1 5=1 6=147456 9=2 -23310=1,1.000000e-01
|
||||
Split 20_148_bn_leaky_split 1 2 20_148_bn_leaky 20_148_bn_leaky_split_0 20_148_bn_leaky_split_1 -23330=8,3,26,26,128,3,26,26,128
|
||||
Convolution 21_156 1 1 20_148_bn_leaky_split_0 21_156_bn_leaky -23330=4,3,26,26,128 0=128 1=3 4=1 5=1 6=147456 9=2 -23310=1,1.000000e-01
|
||||
Concat 22_164 2 1 21_156_bn_leaky 20_148_bn_leaky_split_1 22_164 -23330=4,3,26,26,256
|
||||
Convolution 23_167 1 1 22_164 23_167_bn_leaky -23330=4,3,26,26,256 0=256 1=1 5=1 6=65536 9=2 -23310=1,1.000000e-01
|
||||
Split 23_167_bn_leaky_split 1 2 23_167_bn_leaky 23_167_bn_leaky_split_0 23_167_bn_leaky_split_1 -23330=8,3,26,26,256,3,26,26,256
|
||||
Concat 24_175 2 1 18_135_bn_leaky_split_1 23_167_bn_leaky_split_0 24_175 -23330=4,3,26,26,512
|
||||
Pooling 25_178 1 1 24_175 25_178 -23330=4,3,13,13,512 1=2 2=2 14=1 15=1 5=1
|
||||
Convolution 26_182 1 1 25_178 26_182_bn_leaky -23330=4,3,13,13,512 0=512 1=3 4=1 5=1 6=2359296 9=2 -23310=1,1.000000e-01
|
||||
Convolution 27_192 1 1 26_182_bn_leaky 27_192_bn_leaky -23330=4,3,13,13,256 0=256 1=1 5=1 6=131072 9=2 -23310=1,1.000000e-01
|
||||
Split 27_192_bn_leaky_split 1 2 27_192_bn_leaky 27_192_bn_leaky_split_0 27_192_bn_leaky_split_1 -23330=8,3,13,13,256,3,13,13,256
|
||||
Convolution 28_200 1 1 27_192_bn_leaky_split_0 28_200_bn_leaky -23330=4,3,13,13,512 0=512 1=3 4=1 5=1 6=1179648 9=2 -23310=1,1.000000e-01
|
||||
Convolution 29_208 1 1 28_200_bn_leaky 29_208 -23330=4,3,13,13,255 0=255 1=1 5=1 6=130560
|
||||
Convolution 32_237 1 1 27_192_bn_leaky_split_1 32_237_bn_leaky -23330=4,3,13,13,128 0=128 1=1 5=1 6=32768 9=2 -23310=1,1.000000e-01
|
||||
Interp 33_245 1 1 32_237_bn_leaky 33_245 -23330=4,3,26,26,128 0=1 1=2.000000e+00 2=2.000000e+00
|
||||
Concat 34_248 2 1 33_245 23_167_bn_leaky_split_1 34_248 -23330=4,3,26,26,384
|
||||
Convolution 35_251 1 1 34_248 35_251_bn_leaky -23330=4,3,26,26,256 0=256 1=3 4=1 5=1 6=884736 9=2 -23310=1,1.000000e-01
|
||||
Convolution 36_259 1 1 35_251_bn_leaky 36_259 -23330=4,3,26,26,255 0=255 1=1 5=1 6=65280
|
||||
Yolov3DetectionOutput detection_out 2 1 29_208 36_259 output -23330=4,2,6,1637,1 0=80 1=3 2=3.000001e-01 -23304=12,1.000000e+01,1.400000e+01,2.300000e+01,2.700000e+01,3.700000e+01,5.800000e+01,8.100000e+01,8.200000e+01,1.350000e+02,1.690000e+02,3.440000e+02,3.190000e+02 -23305=6,1077936128,1082130432,1084227584,1065353216,1073741824,1077936128 -23306=2,3.360000e+01,1.680000e+01
|
59
3rdparty/ncnn/build-android.cmd
vendored
Normal file
59
3rdparty/ncnn/build-android.cmd
vendored
Normal file
@ -0,0 +1,59 @@
|
||||
:: Set android ndk root
|
||||
@ECHO OFF
|
||||
@SETLOCAL
|
||||
@SET ANDROID_NDK=<your-ndk-root_path, such as"E:\android-ndk-r18b">
|
||||
@SET VULKAN_SDK=<your-vulkan-toolkit_path, such as"D:\VulkanSDK\1.1.106.0\Bin">
|
||||
|
||||
:: Set ninja.exe
|
||||
:: @SET NINJA_EXE=<your-ninja-exe_path, such as"D:\android\sdk\cmake\3.10.2.4988404\bin\ninja.exe">
|
||||
|
||||
:: android armv7
|
||||
mkdir build-android-armv7
|
||||
pushd build-android-armv7
|
||||
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-21 ..
|
||||
:: cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE="%ANDROID_NDK%/build/cmake/android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM=%NINJA_EXE% -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-21 ..
|
||||
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
|
||||
cmake --build . --target install
|
||||
popd
|
||||
|
||||
:: android armv7 vulkan
|
||||
mkdir build-android-armv7-vulkan
|
||||
pushd build-android-armv7-vulkan
|
||||
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
|
||||
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
|
||||
cmake --build . --target install
|
||||
popd
|
||||
|
||||
:: android aarch64
|
||||
mkdir build-android-aarch64
|
||||
pushd build-android-aarch64
|
||||
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 ..
|
||||
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
|
||||
cmake --build . --target install
|
||||
popd
|
||||
|
||||
:: android aarch64 vulkan
|
||||
mkdir build-android-aarch64-vulkan
|
||||
pushd build-android-aarch64-vulkan
|
||||
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
|
||||
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
|
||||
cmake --build . --target install
|
||||
popd
|
||||
|
||||
:: android x86
|
||||
mkdir build-android-x86
|
||||
pushd build-android-x86
|
||||
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 ..
|
||||
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
|
||||
cmake --build . --target install
|
||||
popd
|
||||
|
||||
:: android x86_64
|
||||
mkdir build-android-x86_64
|
||||
pushd build-android-x86_64
|
||||
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 ..
|
||||
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
|
||||
cmake --build . --target install
|
||||
popd
|
||||
|
||||
@ENDLOCAL
|
219
3rdparty/ncnn/build.sh
vendored
Executable file
219
3rdparty/ncnn/build.sh
vendored
Executable file
@ -0,0 +1,219 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
##### android armv7
|
||||
mkdir -p build-android-armv7
|
||||
pushd build-android-armv7
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### android aarch64
|
||||
mkdir -p build-android-aarch64
|
||||
pushd build-android-aarch64
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### android armv7 without neon
|
||||
mkdir -p build-android-armv7-without-neon
|
||||
pushd build-android-armv7-without-neon
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF -DANDROID_PLATFORM=android-19 ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### android x86
|
||||
mkdir -p build-android-x86
|
||||
pushd build-android-x86
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### android x86_64
|
||||
mkdir -p build-android-x86_64
|
||||
pushd build-android-x86_64
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### android armv7 vulkan
|
||||
mkdir -p build-android-armv7-vulkan
|
||||
pushd build-android-armv7-vulkan
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### android aarch64 vulkan
|
||||
mkdir -p build-android-aarch64-vulkan
|
||||
pushd build-android-aarch64-vulkan
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### android x86 vulkan
|
||||
mkdir -p build-android-x86-vulkan
|
||||
pushd build-android-x86-vulkan
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### android x86_64 vulkan
|
||||
mkdir -p build-android-x86_64-vulkan
|
||||
pushd build-android-x86_64-vulkan
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux of hisiv300 (forgot the chip name) toolchain with neon and openmp
|
||||
mkdir -p build-hisiv300-linux
|
||||
pushd build-hisiv300-linux
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv300.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux of hisiv500 (Hi3516CV200 and Hi3519V101) toolchain with neon and openmp
|
||||
mkdir -p build-hisiv500-linux
|
||||
pushd build-hisiv500-linux
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv500.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux of hisiv600 (Hi3559V100) toolchain with neon and no openmp (due to only one cpu, close openmp)
|
||||
mkdir -p build-hisiv600-linux
|
||||
pushd build-hisiv600-linux
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv600.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux of himix100 (Hi3559a) toolchain with neon and openmp
|
||||
mkdir -p build-himix100-linux
|
||||
pushd build-himix100-linux
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/himix100.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux of arm-linux-gnueabi toolchain
|
||||
mkdir -p build-arm-linux-gnueabi
|
||||
pushd build-arm-linux-gnueabi
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux of arm-linux-gnueabihf toolchain
|
||||
mkdir -p build-arm-linux-gnueabihf
|
||||
pushd build-arm-linux-gnueabihf
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux of v831 toolchain with neon and openmp
|
||||
mkdir -p build-v831-linux
|
||||
pushd build-v831-linux
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/v831.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux for aarch64-linux-gnu toolchain
|
||||
mkdir -p build-aarch64-linux-gnu
|
||||
pushd build-aarch64-linux-gnu
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### linux host system with gcc/g++
|
||||
mkdir -p build-host-gcc-linux
|
||||
pushd build-host-gcc-linux
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/host.gcc.toolchain.cmake ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### ios armv7 arm64
|
||||
mkdir -p build-ios
|
||||
pushd build-ios
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc.toolchain.cmake -DENABLE_BITCODE=OFF ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### ios armv7 arm64 bitcode
|
||||
mkdir -p build-ios-bitcode
|
||||
pushd build-ios-bitcode
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc.toolchain.cmake -DENABLE_BITCODE=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### ios simulator i386 x86_64
|
||||
mkdir -p build-ios-sim
|
||||
pushd build-ios-sim
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc.toolchain.cmake -DENABLE_BITCODE=OFF ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### ios simulator i386 x86_64 bitcode
|
||||
mkdir -p build-ios-sim-bitcode
|
||||
pushd build-ios-sim-bitcode
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc.toolchain.cmake -DENABLE_BITCODE=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### ios arm64 vulkan
|
||||
mkdir -p build-ios-vulkan
|
||||
pushd build-ios-vulkan
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc-arm64.toolchain.cmake -DENABLE_BITCODE=OFF -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### ios arm64 vulkan bitcode
|
||||
mkdir -p build-ios-vulkan-bitcode
|
||||
pushd build-ios-vulkan-bitcode
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc-arm64.toolchain.cmake -DENABLE_BITCODE=ON -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### ios simulator x86_64 vulkan
|
||||
mkdir -p build-ios-sim-vulkan
|
||||
pushd build-ios-sim-vulkan
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc-x64.toolchain.cmake -DENABLE_BITCODE=OFF -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON ..
|
||||
make
|
||||
make install
|
||||
popd
|
||||
|
||||
##### ios simulator x86_64 vulkan bitcode
|
||||
mkdir -p build-ios-sim-vulkan-bitcode
|
||||
pushd build-ios-sim-vulkan-bitcode
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc-x64.toolchain.cmake -DENABLE_BITCODE=ON -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON ..
|
||||
make -j4
|
||||
make install
|
||||
popd
|
||||
|
||||
##### MacOS
|
||||
mkdir -p build-mac
|
||||
pushd build-mac
|
||||
cmake -DNCNN_OPENMP=OFF \
|
||||
-DNCNN_BENCHMARK=ON \
|
||||
..
|
||||
make -j8
|
||||
make install
|
||||
popd
|
38
3rdparty/ncnn/cmake/ncnnConfig.cmake.in
vendored
Normal file
38
3rdparty/ncnn/cmake/ncnnConfig.cmake.in
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
set(NCNN_OPENMP @NCNN_OPENMP@)
|
||||
set(NCNN_THREADS @NCNN_THREADS@)
|
||||
set(NCNN_VULKAN @NCNN_VULKAN@)
|
||||
set(NCNN_SHARED_LIB @NCNN_SHARED_LIB@)
|
||||
set(NCNN_SYSTEM_GLSLANG @NCNN_SYSTEM_GLSLANG@)
|
||||
|
||||
if(NCNN_OPENMP)
|
||||
find_package(OpenMP)
|
||||
endif()
|
||||
|
||||
if(NCNN_THREADS)
|
||||
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
|
||||
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
|
||||
find_package(Threads REQUIRED)
|
||||
endif()
|
||||
|
||||
if(NCNN_VULKAN)
|
||||
find_package(Vulkan REQUIRED)
|
||||
|
||||
if(NOT NCNN_SHARED_LIB)
|
||||
if(NCNN_SYSTEM_GLSLANG)
|
||||
set(GLSLANG_TARGET_DIR "@GLSLANG_TARGET_DIR@")
|
||||
else()
|
||||
set(GLSLANG_TARGET_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../@CMAKE_INSTALL_LIBDIR@/cmake")
|
||||
endif(NCNN_SYSTEM_GLSLANG)
|
||||
|
||||
include(${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake)
|
||||
include(${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake)
|
||||
if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
|
||||
# hlsl support can be optional
|
||||
include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
|
||||
endif()
|
||||
include(${GLSLANG_TARGET_DIR}/glslangTargets.cmake)
|
||||
include(${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake)
|
||||
endif()
|
||||
endif(NCNN_VULKAN)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/ncnn.cmake)
|
244
3rdparty/ncnn/cmake/ncnn_add_layer.cmake
vendored
Normal file
244
3rdparty/ncnn/cmake/ncnn_add_layer.cmake
vendored
Normal file
@ -0,0 +1,244 @@
|
||||
|
||||
macro(ncnn_add_arch_opt_layer class NCNN_TARGET_ARCH_OPT NCNN_TARGET_ARCH_OPT_CFLAGS)
|
||||
set(NCNN_${NCNN_TARGET_ARCH}_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}.h)
|
||||
set(NCNN_${NCNN_TARGET_ARCH}_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}.cpp)
|
||||
|
||||
if(WITH_LAYER_${name} AND EXISTS ${NCNN_${NCNN_TARGET_ARCH}_HEADER} AND EXISTS ${NCNN_${NCNN_TARGET_ARCH}_SOURCE})
|
||||
|
||||
set(NCNN_${NCNN_TARGET_ARCH_OPT}_HEADER ${CMAKE_CURRENT_BINARY_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}.h)
|
||||
set(NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}.cpp)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${NCNN_${NCNN_TARGET_ARCH_OPT}_HEADER}
|
||||
COMMAND ${CMAKE_COMMAND} -DSRC=${NCNN_${NCNN_TARGET_ARCH}_HEADER} -DDST=${NCNN_${NCNN_TARGET_ARCH_OPT}_HEADER} -DCLASS=${class} -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_generate_${NCNN_TARGET_ARCH_OPT}_source.cmake"
|
||||
DEPENDS ${NCNN_${NCNN_TARGET_ARCH}_HEADER}
|
||||
COMMENT "Generating source ${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}.h"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${NCNN_${NCNN_TARGET_ARCH_OPT}_HEADER} PROPERTIES GENERATED TRUE)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE}
|
||||
COMMAND ${CMAKE_COMMAND} -DSRC=${NCNN_${NCNN_TARGET_ARCH}_SOURCE} -DDST=${NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE} -DCLASS=${class} -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_generate_${NCNN_TARGET_ARCH_OPT}_source.cmake"
|
||||
DEPENDS ${NCNN_${NCNN_TARGET_ARCH}_SOURCE}
|
||||
COMMENT "Generating source ${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}.cpp"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
set_source_files_properties(${NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE} PROPERTIES COMPILE_FLAGS ${NCNN_TARGET_ARCH_OPT_CFLAGS})
|
||||
|
||||
list(APPEND ncnn_SRCS ${NCNN_${NCNN_TARGET_ARCH_OPT}_HEADER} ${NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE})
|
||||
|
||||
# generate layer_declaration and layer_registry file
|
||||
set(layer_declaration "${layer_declaration}#include \"layer/${name}.h\"\n")
|
||||
set(layer_declaration_class "class ${class}_final_${NCNN_TARGET_ARCH_OPT} : virtual public ${class}")
|
||||
set(create_pipeline_content " { int ret = ${class}::create_pipeline(opt); if (ret) return ret; }\n")
|
||||
set(destroy_pipeline_content " { int ret = ${class}::destroy_pipeline(opt); if (ret) return ret; }\n")
|
||||
|
||||
set(layer_declaration "${layer_declaration}#include \"layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}.h\"\n")
|
||||
set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}")
|
||||
set(create_pipeline_content "${create_pipeline_content} { int ret = ${class}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}::create_pipeline(opt); if (ret) return ret; }\n")
|
||||
set(destroy_pipeline_content " { int ret = ${class}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")
|
||||
|
||||
if(WITH_LAYER_${name}_vulkan)
|
||||
set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n")
|
||||
set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan")
|
||||
set(create_pipeline_content "${create_pipeline_content} if (vkdev) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n")
|
||||
set(destroy_pipeline_content " if (vkdev) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")
|
||||
endif()
|
||||
|
||||
set(layer_declaration "${layer_declaration}namespace ncnn {\n${layer_declaration_class}\n{\n")
|
||||
set(layer_declaration "${layer_declaration}public:\n")
|
||||
set(layer_declaration "${layer_declaration} virtual int create_pipeline(const Option& opt) {\n${create_pipeline_content} return 0;\n }\n")
|
||||
set(layer_declaration "${layer_declaration} virtual int destroy_pipeline(const Option& opt) {\n${destroy_pipeline_content} return 0;\n }\n")
|
||||
set(layer_declaration "${layer_declaration}};\n")
|
||||
set(layer_declaration "${layer_declaration}DEFINE_LAYER_CREATOR(${class}_final_${NCNN_TARGET_ARCH_OPT})\n} // namespace ncnn\n\n")
|
||||
|
||||
set(layer_registry_${NCNN_TARGET_ARCH_OPT} "${layer_registry_${NCNN_TARGET_ARCH_OPT}}#if NCNN_STRING\n{\"${class}\", ${class}_final_${NCNN_TARGET_ARCH_OPT}_layer_creator},\n#else\n{${class}_final_${NCNN_TARGET_ARCH_OPT}_layer_creator},\n#endif\n")
|
||||
else()
|
||||
# no arm optimized version
|
||||
if(WITH_LAYER_${name})
|
||||
set(layer_registry_${NCNN_TARGET_ARCH_OPT} "${layer_registry_${NCNN_TARGET_ARCH_OPT}}#if NCNN_STRING\n{\"${class}\", ${class}_final_layer_creator},\n#else\n{${class}_final_layer_creator},\n#endif\n")
|
||||
else()
|
||||
set(layer_registry_${NCNN_TARGET_ARCH_OPT} "${layer_registry_${NCNN_TARGET_ARCH_OPT}}#if NCNN_STRING\n{\"${class}\", 0},\n#else\n{0},\n#endif\n")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endmacro()
|
||||
|
||||
macro(ncnn_add_arch_opt_source class NCNN_TARGET_ARCH_OPT NCNN_TARGET_ARCH_OPT_CFLAGS)
|
||||
set(NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}_${NCNN_TARGET_ARCH_OPT}.cpp)
|
||||
|
||||
if(WITH_LAYER_${name} AND EXISTS ${NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE})
|
||||
set_source_files_properties(${NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE} PROPERTIES COMPILE_FLAGS ${NCNN_TARGET_ARCH_OPT_CFLAGS})
|
||||
list(APPEND ncnn_SRCS ${NCNN_${NCNN_TARGET_ARCH_OPT}_SOURCE})
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(ncnn_add_layer class)
|
||||
string(TOLOWER ${class} name)
|
||||
|
||||
# WITH_LAYER_xxx option
|
||||
if(${ARGC} EQUAL 2)
|
||||
option(WITH_LAYER_${name} "build with layer ${name}" ${ARGV1})
|
||||
else()
|
||||
option(WITH_LAYER_${name} "build with layer ${name}" ON)
|
||||
endif()
|
||||
|
||||
if(NCNN_CMAKE_VERBOSE)
|
||||
message(STATUS "WITH_LAYER_${name} = ${WITH_LAYER_${name}}")
|
||||
endif()
|
||||
|
||||
if(WITH_LAYER_${name})
|
||||
list(APPEND ncnn_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/layer/${name}.cpp)
|
||||
|
||||
# look for arch specific implementation and append source
|
||||
# optimized implementation for armv7, aarch64 or x86
|
||||
set(LAYER_ARCH_SRC ${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}.cpp)
|
||||
if(EXISTS ${LAYER_ARCH_SRC})
|
||||
set(WITH_LAYER_${name}_${NCNN_TARGET_ARCH} 1)
|
||||
list(APPEND ncnn_SRCS ${LAYER_ARCH_SRC})
|
||||
endif()
|
||||
|
||||
set(LAYER_VULKAN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/layer/vulkan/${name}_vulkan.cpp)
|
||||
if(NCNN_VULKAN AND EXISTS ${LAYER_VULKAN_SRC})
|
||||
set(WITH_LAYER_${name}_vulkan 1)
|
||||
list(APPEND ncnn_SRCS ${LAYER_VULKAN_SRC})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# generate layer_declaration and layer_registry file
|
||||
if(WITH_LAYER_${name})
|
||||
set(layer_declaration "${layer_declaration}#include \"layer/${name}.h\"\n")
|
||||
set(layer_declaration_class "class ${class}_final : virtual public ${class}")
|
||||
set(create_pipeline_content " { int ret = ${class}::create_pipeline(opt); if (ret) return ret; }\n")
|
||||
set(destroy_pipeline_content " { int ret = ${class}::destroy_pipeline(opt); if (ret) return ret; }\n")
|
||||
|
||||
source_group ("sources\\\\layers" FILES "${CMAKE_CURRENT_SOURCE_DIR}/layer/${name}.cpp")
|
||||
endif()
|
||||
|
||||
if(WITH_LAYER_${name}_${NCNN_TARGET_ARCH})
|
||||
set(layer_declaration "${layer_declaration}#include \"layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}.h\"\n")
|
||||
set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_${NCNN_TARGET_ARCH}")
|
||||
set(create_pipeline_content "${create_pipeline_content} { int ret = ${class}_${NCNN_TARGET_ARCH}::create_pipeline(opt); if (ret) return ret; }\n")
|
||||
set(destroy_pipeline_content " { int ret = ${class}_${NCNN_TARGET_ARCH}::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")
|
||||
|
||||
source_group ("sources\\\\layers\\\\${NCNN_TARGET_ARCH}" FILES "${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}/${name}_${NCNN_TARGET_ARCH}.cpp")
|
||||
endif()
|
||||
|
||||
if(WITH_LAYER_${name}_vulkan)
|
||||
set(layer_declaration "${layer_declaration}#include \"layer/vulkan/${name}_vulkan.h\"\n")
|
||||
set(layer_declaration_class "${layer_declaration_class}, virtual public ${class}_vulkan")
|
||||
set(create_pipeline_content "${create_pipeline_content} if (vkdev) { int ret = ${class}_vulkan::create_pipeline(opt); if (ret) return ret; }\n")
|
||||
set(destroy_pipeline_content " if (vkdev) { int ret = ${class}_vulkan::destroy_pipeline(opt); if (ret) return ret; }\n${destroy_pipeline_content}")
|
||||
|
||||
file(GLOB_RECURSE NCNN_SHADER_SRCS "layer/vulkan/shader/${name}.comp")
|
||||
file(GLOB_RECURSE NCNN_SHADER_SUBSRCS "layer/vulkan/shader/${name}_*.comp")
|
||||
list(APPEND NCNN_SHADER_SRCS ${NCNN_SHADER_SUBSRCS})
|
||||
foreach(NCNN_SHADER_SRC ${NCNN_SHADER_SRCS})
|
||||
ncnn_add_shader(${NCNN_SHADER_SRC})
|
||||
endforeach()
|
||||
|
||||
source_group ("sources\\\\layers\\\\vulkan" FILES "${CMAKE_CURRENT_SOURCE_DIR}/layer/vulkan/${name}_vulkan.cpp")
|
||||
endif()
|
||||
|
||||
if(WITH_LAYER_${name})
|
||||
set(layer_declaration "${layer_declaration}namespace ncnn {\n${layer_declaration_class}\n{\n")
|
||||
set(layer_declaration "${layer_declaration}public:\n")
|
||||
set(layer_declaration "${layer_declaration} virtual int create_pipeline(const Option& opt) {\n${create_pipeline_content} return 0;\n }\n")
|
||||
set(layer_declaration "${layer_declaration} virtual int destroy_pipeline(const Option& opt) {\n${destroy_pipeline_content} return 0;\n }\n")
|
||||
set(layer_declaration "${layer_declaration}};\n")
|
||||
set(layer_declaration "${layer_declaration}DEFINE_LAYER_CREATOR(${class}_final)\n} // namespace ncnn\n\n")
|
||||
endif()
|
||||
|
||||
if(WITH_LAYER_${name})
|
||||
set(layer_registry "${layer_registry}#if NCNN_STRING\n{\"${class}\", ${class}_final_layer_creator},\n#else\n{${class}_final_layer_creator},\n#endif\n")
|
||||
else()
|
||||
set(layer_registry "${layer_registry}#if NCNN_STRING\n{\"${class}\", 0},\n#else\n{0},\n#endif\n")
|
||||
endif()
|
||||
|
||||
if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "x86")
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
|
||||
if(NCNN_AVX512)
|
||||
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 /D__FMA__ /D__F16C__")
|
||||
endif()
|
||||
if(NCNN_FMA)
|
||||
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX /D__FMA__ /D__F16C__")
|
||||
endif()
|
||||
if(NCNN_AVX)
|
||||
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX")
|
||||
endif()
|
||||
if(NCNN_AVX512VNNI)
|
||||
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
|
||||
endif()
|
||||
if(NCNN_AVXVNNI)
|
||||
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 /D__FMA__ /D__F16C__ /D__AVXVNNI__")
|
||||
endif()
|
||||
if(NCNN_AVX2)
|
||||
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 /D__FMA__ /D__F16C__")
|
||||
endif()
|
||||
if(NCNN_XOP)
|
||||
ncnn_add_arch_opt_source(${class} xop "/arch:AVX /D__XOP__")
|
||||
endif()
|
||||
if(NCNN_F16C)
|
||||
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX /D__F16C__")
|
||||
endif()
|
||||
else()
|
||||
if(NCNN_AVX512)
|
||||
ncnn_add_arch_opt_layer(${class} avx512 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c")
|
||||
endif()
|
||||
if(NCNN_FMA)
|
||||
ncnn_add_arch_opt_layer(${class} fma "-mavx -mfma")
|
||||
endif()
|
||||
if(NCNN_AVX)
|
||||
ncnn_add_arch_opt_layer(${class} avx "-mavx")
|
||||
endif()
|
||||
if(NCNN_AVX512VNNI)
|
||||
ncnn_add_arch_opt_source(${class} avx512vnni "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni")
|
||||
endif()
|
||||
if(NCNN_AVXVNNI)
|
||||
ncnn_add_arch_opt_source(${class} avxvnni "-mavx2 -mfma -mf16c -mavxvnni")
|
||||
endif()
|
||||
if(NCNN_AVX2)
|
||||
ncnn_add_arch_opt_source(${class} avx2 "-mavx2 -mfma -mf16c")
|
||||
endif()
|
||||
if(NCNN_XOP)
|
||||
ncnn_add_arch_opt_source(${class} xop "-mavx -mxop")
|
||||
endif()
|
||||
if(NCNN_F16C)
|
||||
ncnn_add_arch_opt_source(${class} f16c "-mavx -mf16c")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NCNN_RUNTIME_CPU AND ((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64|aarch64)")))
|
||||
if(NCNN_ARM82)
|
||||
ncnn_add_arch_opt_layer(${class} arm82 "-march=armv8.2-a+fp16")
|
||||
endif()
|
||||
if(NCNN_ARM82DOT)
|
||||
ncnn_add_arch_opt_source(${class} arm82dot "-march=armv8.2-a+fp16+dotprod")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NCNN_RUNTIME_CPU AND NCNN_TARGET_ARCH STREQUAL "mips")
|
||||
if(NCNN_MSA)
|
||||
ncnn_add_arch_opt_layer(${class} msa "-mmsa")
|
||||
endif()
|
||||
if(NCNN_MMI)
|
||||
ncnn_add_arch_opt_source(${class} mmi "-mloongson-mmi")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NCNN_RUNTIME_CPU AND NCNN_RVV AND NCNN_TARGET_ARCH STREQUAL "riscv")
|
||||
if(NCNN_COMPILER_SUPPORT_RVV_FP16)
|
||||
ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv_zfh")
|
||||
elseif(NCNN_COMPILER_SUPPORT_RVV)
|
||||
ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# generate layer_type_enum file
|
||||
set(layer_type_enum "${layer_type_enum}${class} = ${__LAYER_TYPE_ENUM_INDEX},\n")
|
||||
math(EXPR __LAYER_TYPE_ENUM_INDEX "${__LAYER_TYPE_ENUM_INDEX}+1")
|
||||
endmacro()
|
27
3rdparty/ncnn/cmake/ncnn_add_shader.cmake
vendored
Normal file
27
3rdparty/ncnn/cmake/ncnn_add_shader.cmake
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
|
||||
macro(ncnn_add_shader NCNN_SHADER_SRC)
|
||||
get_filename_component(NCNN_SHADER_SRC_NAME_WE ${NCNN_SHADER_SRC} NAME_WE)
|
||||
set(NCNN_SHADER_COMP_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${NCNN_SHADER_SRC_NAME_WE}.comp.hex.h)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${NCNN_SHADER_COMP_HEADER}
|
||||
COMMAND ${CMAKE_COMMAND} -DSHADER_SRC=${NCNN_SHADER_SRC} -DSHADER_COMP_HEADER=${NCNN_SHADER_COMP_HEADER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_generate_shader_comp_header.cmake"
|
||||
DEPENDS ${NCNN_SHADER_SRC}
|
||||
COMMENT "Preprocessing shader source ${NCNN_SHADER_SRC_NAME_WE}.comp"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${NCNN_SHADER_COMP_HEADER} PROPERTIES GENERATED TRUE)
|
||||
|
||||
get_filename_component(NCNN_SHADER_COMP_HEADER_NAME ${NCNN_SHADER_COMP_HEADER} NAME)
|
||||
string(APPEND layer_shader_spv_data "#include \"${NCNN_SHADER_COMP_HEADER_NAME}\"\n")
|
||||
|
||||
get_filename_component(NCNN_SHADER_SRC_NAME_WE ${NCNN_SHADER_SRC} NAME_WE)
|
||||
string(APPEND layer_shader_registry "{${NCNN_SHADER_SRC_NAME_WE}_comp_data,sizeof(${NCNN_SHADER_SRC_NAME_WE}_comp_data)},\n")
|
||||
|
||||
list(APPEND NCNN_SHADER_SPV_HEX_FILES ${NCNN_SHADER_COMP_HEADER})
|
||||
|
||||
# generate layer_shader_type_enum file
|
||||
set(layer_shader_type_enum "${layer_shader_type_enum}${NCNN_SHADER_SRC_NAME_WE} = ${__LAYER_SHADER_TYPE_ENUM_INDEX},\n")
|
||||
math(EXPR __LAYER_SHADER_TYPE_ENUM_INDEX "${__LAYER_SHADER_TYPE_ENUM_INDEX}+1")
|
||||
endmacro()
|
||||
|
14
3rdparty/ncnn/cmake/ncnn_generate_arm82_source.cmake
vendored
Normal file
14
3rdparty/ncnn/cmake/ncnn_generate_arm82_source.cmake
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
# must define SRC DST CLASS
|
||||
|
||||
file(READ ${SRC} source_data)
|
||||
|
||||
# replace
|
||||
string(TOUPPER ${CLASS} CLASS_UPPER)
|
||||
string(TOLOWER ${CLASS} CLASS_LOWER)
|
||||
|
||||
string(REGEX REPLACE "LAYER_${CLASS_UPPER}_ARM_H" "LAYER_${CLASS_UPPER}_ARM_ARM82_H" source_data "${source_data}")
|
||||
string(REGEX REPLACE "${CLASS}_arm" "${CLASS}_arm_arm82" source_data "${source_data}")
|
||||
string(REGEX REPLACE "#include \"${CLASS_LOWER}_arm.h\"" "#include \"${CLASS_LOWER}_arm_arm82.h\"" source_data "${source_data}")
|
||||
|
||||
file(WRITE ${DST} "${source_data}")
|
14
3rdparty/ncnn/cmake/ncnn_generate_avx512_source.cmake
vendored
Normal file
14
3rdparty/ncnn/cmake/ncnn_generate_avx512_source.cmake
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
# must define SRC DST CLASS
|
||||
|
||||
file(READ ${SRC} source_data)
|
||||
|
||||
# replace
|
||||
string(TOUPPER ${CLASS} CLASS_UPPER)
|
||||
string(TOLOWER ${CLASS} CLASS_LOWER)
|
||||
|
||||
string(REGEX REPLACE "LAYER_${CLASS_UPPER}_X86_H" "LAYER_${CLASS_UPPER}_X86_AVX512_H" source_data "${source_data}")
|
||||
string(REGEX REPLACE "${CLASS}_x86" "${CLASS}_x86_avx512" source_data "${source_data}")
|
||||
string(REGEX REPLACE "#include \"${CLASS_LOWER}_x86.h\"" "#include \"${CLASS_LOWER}_x86_avx512.h\"" source_data "${source_data}")
|
||||
|
||||
file(WRITE ${DST} "${source_data}")
|
14
3rdparty/ncnn/cmake/ncnn_generate_avx_source.cmake
vendored
Normal file
14
3rdparty/ncnn/cmake/ncnn_generate_avx_source.cmake
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
# must define SRC DST CLASS
|
||||
|
||||
file(READ ${SRC} source_data)
|
||||
|
||||
# replace
|
||||
string(TOUPPER ${CLASS} CLASS_UPPER)
|
||||
string(TOLOWER ${CLASS} CLASS_LOWER)
|
||||
|
||||
string(REGEX REPLACE "LAYER_${CLASS_UPPER}_X86_H" "LAYER_${CLASS_UPPER}_X86_AVX_H" source_data "${source_data}")
|
||||
string(REGEX REPLACE "${CLASS}_x86" "${CLASS}_x86_avx" source_data "${source_data}")
|
||||
string(REGEX REPLACE "#include \"${CLASS_LOWER}_x86.h\"" "#include \"${CLASS_LOWER}_x86_avx.h\"" source_data "${source_data}")
|
||||
|
||||
file(WRITE ${DST} "${source_data}")
|
14
3rdparty/ncnn/cmake/ncnn_generate_fma_source.cmake
vendored
Normal file
14
3rdparty/ncnn/cmake/ncnn_generate_fma_source.cmake
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
# must define SRC DST CLASS
|
||||
|
||||
file(READ ${SRC} source_data)
|
||||
|
||||
# replace
|
||||
string(TOUPPER ${CLASS} CLASS_UPPER)
|
||||
string(TOLOWER ${CLASS} CLASS_LOWER)
|
||||
|
||||
string(REGEX REPLACE "LAYER_${CLASS_UPPER}_X86_H" "LAYER_${CLASS_UPPER}_X86_FMA_H" source_data "${source_data}")
|
||||
string(REGEX REPLACE "${CLASS}_x86" "${CLASS}_x86_fma" source_data "${source_data}")
|
||||
string(REGEX REPLACE "#include \"${CLASS_LOWER}_x86.h\"" "#include \"${CLASS_LOWER}_x86_fma.h\"" source_data "${source_data}")
|
||||
|
||||
file(WRITE ${DST} "${source_data}")
|
14
3rdparty/ncnn/cmake/ncnn_generate_msa_source.cmake
vendored
Normal file
14
3rdparty/ncnn/cmake/ncnn_generate_msa_source.cmake
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
# must define SRC DST CLASS
|
||||
|
||||
file(READ ${SRC} source_data)
|
||||
|
||||
# replace
|
||||
string(TOUPPER ${CLASS} CLASS_UPPER)
|
||||
string(TOLOWER ${CLASS} CLASS_LOWER)
|
||||
|
||||
string(REGEX REPLACE "LAYER_${CLASS_UPPER}_MIPS_H" "LAYER_${CLASS_UPPER}_MIPS_MSA_H" source_data "${source_data}")
|
||||
string(REGEX REPLACE "${CLASS}_mips" "${CLASS}_mips_msa" source_data "${source_data}")
|
||||
string(REGEX REPLACE "#include \"${CLASS_LOWER}_mips.h\"" "#include \"${CLASS_LOWER}_mips_msa.h\"" source_data "${source_data}")
|
||||
|
||||
file(WRITE ${DST} "${source_data}")
|
14
3rdparty/ncnn/cmake/ncnn_generate_rvv_source.cmake
vendored
Normal file
14
3rdparty/ncnn/cmake/ncnn_generate_rvv_source.cmake
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
# must define SRC DST CLASS
|
||||
|
||||
file(READ ${SRC} source_data)
|
||||
|
||||
# replace
|
||||
string(TOUPPER ${CLASS} CLASS_UPPER)
|
||||
string(TOLOWER ${CLASS} CLASS_LOWER)
|
||||
|
||||
string(REGEX REPLACE "LAYER_${CLASS_UPPER}_RISCV_H" "LAYER_${CLASS_UPPER}_RISCV_RVV_H" source_data "${source_data}")
|
||||
string(REGEX REPLACE "${CLASS}_riscv" "${CLASS}_riscv_rvv" source_data "${source_data}")
|
||||
string(REGEX REPLACE "#include \"${CLASS_LOWER}_riscv.h\"" "#include \"${CLASS_LOWER}_riscv_rvv.h\"" source_data "${source_data}")
|
||||
|
||||
file(WRITE ${DST} "${source_data}")
|
24
3rdparty/ncnn/cmake/ncnn_generate_shader_comp_header.cmake
vendored
Normal file
24
3rdparty/ncnn/cmake/ncnn_generate_shader_comp_header.cmake
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
|
||||
# must define SHADER_COMP_HEADER SHADER_SRC
|
||||
|
||||
file(READ ${SHADER_SRC} comp_data)
|
||||
|
||||
# skip leading comment
|
||||
string(FIND "${comp_data}" "#version" version_start)
|
||||
if(NOT ${version_start} EQUAL -1)
|
||||
string(SUBSTRING "${comp_data}" ${version_start} -1 comp_data)
|
||||
endif()
|
||||
|
||||
# remove whitespace
|
||||
string(REGEX REPLACE "\n +" "\n" comp_data "${comp_data}")
|
||||
|
||||
get_filename_component(SHADER_SRC_NAME_WE ${SHADER_SRC} NAME_WE)
|
||||
|
||||
# text to hex
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.text2hex.txt "${comp_data}")
|
||||
file(READ ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.text2hex.txt comp_data_hex HEX)
|
||||
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," comp_data_hex ${comp_data_hex})
|
||||
string(FIND "${comp_data_hex}" "," tail_comma REVERSE)
|
||||
string(SUBSTRING "${comp_data_hex}" 0 ${tail_comma} comp_data_hex)
|
||||
|
||||
file(WRITE ${SHADER_COMP_HEADER} "static const char ${SHADER_SRC_NAME_WE}_comp_data[] = {${comp_data_hex}};\n")
|
581
3rdparty/ncnn/cmake/ncnn_generate_shader_spv_header.cmake
vendored
Normal file
581
3rdparty/ncnn/cmake/ncnn_generate_shader_spv_header.cmake
vendored
Normal file
@ -0,0 +1,581 @@
|
||||
|
||||
function(ncnn_generate_shader_spv_header SHADER_SPV_HEADER SHADER_SPV_HEX_HEADERS SHADER_SRC)
|
||||
|
||||
# fp32
|
||||
get_filename_component(SHADER_SRC_NAME_WE ${SHADER_SRC} NAME_WE)
|
||||
|
||||
set(SHADER_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float -Dsfpvec2=vec2 -Dsfpvec4=vec4 -Dsfpvec8=mat2x4 -Dsfpmat4=mat4
|
||||
-Dafp=float -Dafpvec2=vec2 -Dafpvec4=vec4 -Dafpvec8=mat2x4 -Dafpmat4=mat4
|
||||
"-D buffer_ld1(buf,i)=buf[i]"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=vec4(sbuf[si4.r],sbuf[si4.g],sbuf[si4.b],sbuf[si4.a]);}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=mat2x4(sbuf[si4.r],sbuf[si4.g],sbuf[si4.b],sbuf[si4.a],sbuf[sii4.r],sbuf[sii4.g],sbuf[sii4.b],sbuf[sii4.a]);}"
|
||||
"-D buffer_ld2(buf,i)=buf[i]"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=buf[i]"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={vec4 _v=sbuf[si]; buf[i4.r]=_v.r;buf[i4.g]=_v.g;buf[i4.b]=_v.b;buf[i4.a]=_v.a;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=mat2x4(sbuf[si2.r],sbuf[si2.g]);}"
|
||||
"-D buffer_ld8(buf,i)=buf[i]"
|
||||
"-D buffer_st8(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={mat2x4 _v=sbuf[si]; buf[i4.r]=_v[0].r;buf[i4.g]=_v[0].g;buf[i4.b]=_v[0].b;buf[i4.a]=_v[0].a; buf[ii4.r]=_v[1].r;buf[ii4.g]=_v[1].g;buf[ii4.b]=_v[1].b;buf[ii4.a]=_v[1].a;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={mat2x4 _v=sbuf[si]; buf[i2.r]=_v[0];buf[i2.g]=_v[1];}"
|
||||
"-D sfp2afpmat4(v)=v"
|
||||
"-D afp2sfpmat4(v)=v"
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-V -s -x -o ${SHADER_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# fp16 packed
|
||||
set(SHADER_fp16p_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_fp16p")
|
||||
|
||||
set(SHADER_fp16p_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_fp16p_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_fp16p_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float -Dsfpvec2=uint -Dsfpvec4=uvec2 -Dsfpvec8=uvec4
|
||||
-Dafp=float -Dafpvec2=vec2 -Dafpvec4=vec4 -Dafpvec8=mat2x4 -Dafpmat4=mat4
|
||||
"-D buffer_ld1(buf,i)=buf[i]"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=uvec2(packHalf2x16(vec2(sbuf[si4.r],sbuf[si4.g])),packHalf2x16(vec2(sbuf[si4.b],sbuf[si4.a])));}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=uvec4(packHalf2x16(vec2(sbuf[si4.r],sbuf[si4.g])),packHalf2x16(vec2(sbuf[si4.b],sbuf[si4.a])),packHalf2x16(vec2(sbuf[sii4.r],sbuf[sii4.g])),packHalf2x16(vec2(sbuf[sii4.b],sbuf[sii4.a])));}"
|
||||
"-D buffer_ld2(buf,i)=unpackHalf2x16(buf[i])"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=packHalf2x16(v)}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=vec4(unpackHalf2x16(buf[i].x),unpackHalf2x16(buf[i].y))"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=uvec2(packHalf2x16(v.rg),packHalf2x16(v.ba));}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={uvec2 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.x);vec2 _v1=unpackHalf2x16(_v.y); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=uvec4(sbuf[si2.r],sbuf[si2.g]);}"
|
||||
"-D buffer_ld8(buf,i)=mat2x4(vec4(unpackHalf2x16(buf[i].r),unpackHalf2x16(buf[i].g)),vec4(unpackHalf2x16(buf[i].b),unpackHalf2x16(buf[i].a)))"
|
||||
"-D buffer_st8(buf,i,v)={buf[i]=uvec4(uvec2(packHalf2x16(v[0].rg),packHalf2x16(v[0].ba)),uvec2(packHalf2x16(v[1].rg),packHalf2x16(v[1].ba)));}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={uvec4 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.r);vec2 _v1=unpackHalf2x16(_v.g);vec2 _v2=unpackHalf2x16(_v.b);vec2 _v3=unpackHalf2x16(_v.a); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g; buf[ii4.r]=_v2.r;buf[ii4.g]=_v2.g;buf[ii4.b]=_v3.r;buf[ii4.a]=_v3.g;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={uvec4 _v=sbuf[si]; buf[i2.r]=_v.rg;buf[i2.g]=_v.ba;}"
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_fp16_packed=1
|
||||
-V -s -x -o ${SHADER_fp16p_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_fp16p_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_fp16p_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# fp16 packed + fp16 arithmetic
|
||||
set(SHADER_fp16pa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_fp16pa")
|
||||
|
||||
set(SHADER_fp16pa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_fp16pa_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_fp16pa_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float -Dsfpvec2=uint -Dsfpvec4=uvec2 -Dsfpvec8=uvec4
|
||||
-Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4
|
||||
"-D buffer_ld1(buf,i)=float16_t(buf[i])"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=float(v);}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=uvec2(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))));}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=uvec4(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.r],sbuf[sii4.g]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.b],sbuf[sii4.a]))));}"
|
||||
"-D buffer_ld2(buf,i)=f16vec2(unpackHalf2x16(buf[i]))"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=packHalf2x16(vec2(v))}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=f16vec4(vec4(unpackHalf2x16(buf[i].x),unpackHalf2x16(buf[i].y)))"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=uvec2(packHalf2x16(vec2(v.rg)),packHalf2x16(vec2(v.ba)));}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={uvec2 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.x);vec2 _v1=unpackHalf2x16(_v.y); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=uvec4(sbuf[si2.r],sbuf[si2.g]);}"
|
||||
"-D buffer_ld8(buf,i)=f16mat2x4(f16vec4(vec4(unpackHalf2x16(buf[i].r),unpackHalf2x16(buf[i].g))),f16vec4(vec4(unpackHalf2x16(buf[i].b),unpackHalf2x16(buf[i].a))))"
|
||||
"-D buffer_st8(buf,i,v)={buf[i]=uvec4(uvec2(packHalf2x16(vec2(v[0].rg)),packHalf2x16(vec2(v[0].ba))),uvec2(packHalf2x16(vec2(v[1].rg)),packHalf2x16(vec2(v[1].ba))));}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={uvec4 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.r);vec2 _v1=unpackHalf2x16(_v.g);vec2 _v2=unpackHalf2x16(_v.b);vec2 _v3=unpackHalf2x16(_v.a); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g; buf[ii4.r]=_v2.r;buf[ii4.g]=_v2.g;buf[ii4.b]=_v3.r;buf[ii4.a]=_v3.g;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={uvec4 _v=sbuf[si]; buf[i2.r]=_v.rg;buf[i2.g]=_v.ba;}"
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_fp16_packed=1 -DNCNN_fp16_arithmetic=1
|
||||
-V -s -x -o ${SHADER_fp16pa_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_fp16pa_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_fp16pa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# fp16 storage
|
||||
set(SHADER_fp16s_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_fp16s")
|
||||
|
||||
set(SHADER_fp16s_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_fp16s_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_fp16s_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float16_t -Dsfpvec2=f16vec2 -Dsfpvec4=f16vec4
|
||||
-Dafp=float -Dafpvec2=vec2 -Dafpvec4=vec4 -Dafpvec8=mat2x4 -Dafpmat4=mat4
|
||||
"-D buffer_ld1(buf,i)=float(buf[i])"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=float16_t(v);}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i].r=sbuf[si4.r];buf[i].g=sbuf[si4.g];buf[i].b=sbuf[si4.b];buf[i].a=sbuf[si4.a];}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i].abcd.r=sbuf[si4.r];buf[i].abcd.g=sbuf[si4.g];buf[i].abcd.b=sbuf[si4.b];buf[i].abcd.a=sbuf[si4.a];buf[i].efgh.r=sbuf[sii4.r];buf[i].efgh.g=sbuf[sii4.g];buf[i].efgh.b=sbuf[sii4.b];buf[i].efgh.a=sbuf[sii4.a];}"
|
||||
"-D buffer_ld2(buf,i)=vec2(buf[i])"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=f16vec2(v);}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=vec4(buf[i])"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=f16vec4(v);}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={buf[i4.r]=sbuf[si].r;buf[i4.g]=sbuf[si].g;buf[i4.b]=sbuf[si].b;buf[i4.a]=sbuf[si].a;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i].abcd=sbuf[si2.r];buf[i].efgh=sbuf[si2.g];}"
|
||||
"-D buffer_ld8(buf,i)=mat2x4(vec4(buf[i].abcd),vec4(buf[i].efgh))"
|
||||
"-D buffer_st8(buf,i,v)={buf[i].abcd=f16vec4(v[0]);buf[i].efgh=f16vec4(v[1]);}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i].abcd=sbuf[si].abcd;buf[i].efgh=sbuf[si].efgh;}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={buf[i4.r]=sbuf[si].abcd.r;buf[i4.g]=sbuf[si].abcd.g;buf[i4.b]=sbuf[si].abcd.b;buf[i4.a]=sbuf[si].abcd.a; buf[ii4.r]=sbuf[si].efgh.r;buf[ii4.g]=sbuf[si].efgh.g;buf[ii4.b]=sbuf[si].efgh.b;buf[ii4.a]=sbuf[si].efgh.a;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={buf[i2.r]=sbuf[si].abcd;buf[i2.g]=sbuf[si].efgh;}"
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_fp16_storage=1
|
||||
-V -s -x -o ${SHADER_fp16s_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_fp16s_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_fp16s_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# fp16 storage + fp16 arithmetic
|
||||
set(SHADER_fp16sa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_fp16sa")
|
||||
|
||||
set(SHADER_fp16sa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_fp16sa_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_fp16sa_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float16_t -Dsfpvec2=f16vec2 -Dsfpvec4=f16vec4 -Dsfpvec8=f16mat2x4 -Dsfpmat4=f16mat4
|
||||
-Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4
|
||||
"-D buffer_ld1(buf,i)=buf[i]"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=f16vec4(sbuf[si4.r],sbuf[si4.g],sbuf[si4.b],sbuf[si4.a]);}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=f16mat2x4(sbuf[si4.r],sbuf[si4.g],sbuf[si4.b],sbuf[si4.a],sbuf[sii4.r],sbuf[sii4.g],sbuf[sii4.b],sbuf[sii4.a]);}"
|
||||
"-D buffer_ld2(buf,i)=buf[i]"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=buf[i]"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={buf[i4.r]=sbuf[si].r;buf[i4.g]=sbuf[si].g;buf[i4.b]=sbuf[si].b;buf[i4.a]=sbuf[si].a;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=f16mat2x4(sbuf[si2.r],sbuf[si2.g]);}"
|
||||
"-D buffer_ld8(buf,i)=buf[i]"
|
||||
"-D buffer_st8(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={f16mat2x4 _v=sbuf[si]; buf[i4.r]=_v[0].r;buf[i4.g]=_v[0].g;buf[i4.b]=_v[0].b;buf[i4.a]=_v[0].a; buf[ii4.r]=_v[1].r;buf[ii4.g]=_v[1].g;buf[ii4.b]=_v[1].b;buf[ii4.a]=_v[1].a;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={f16mat2x4 _v=sbuf[si]; buf[i2.r]=_v[0];buf[i2.g]=_v[1];}"
|
||||
"-D sfp2afpmat4(v)=v"
|
||||
"-D afp2sfpmat4(v)=v"
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_fp16_storage=1 -DNCNN_fp16_arithmetic=1
|
||||
-V -s -x -o ${SHADER_fp16sa_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_fp16sa_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_fp16sa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# image + fp32
|
||||
set(SHADER_image_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image")
|
||||
|
||||
set(SHADER_image_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_image_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float -Dsfpvec2=vec2 -Dsfpvec4=vec4 -Dsfpvec8=mat2x4 -Dsfpmat4=mat4
|
||||
-Dafp=float -Dafpvec2=vec2 -Dafpvec4=vec4 -Dafpvec8=mat2x4 -Dafpmat4=mat4
|
||||
|
||||
-Dimfmtc1=r32f -Dimfmtc4=rgba32f
|
||||
-Dunfp=highp
|
||||
|
||||
"-D image1d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image2d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image3d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image1d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image2d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image3d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image1d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image2d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image3d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image1d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image2d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image3d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image1d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld8(tex,p)=mat2x4(texelFetch(tex,(p)*2,0),texelFetch(tex,(p)*2+1,0))"
|
||||
"-D image2d_ld8(tex,p)=mat2x4(texelFetch(tex,ivec2(p.x*2,p.y),0),texelFetch(tex,ivec2(p.x*2+1,p.y),0))"
|
||||
"-D image3d_ld8(tex,p)=mat2x4(texelFetch(tex,ivec3(p.x*2,p.y,p.z),0),texelFetch(tex,ivec3(p.x*2+1,p.y,p.z),0))"
|
||||
"-D image1d_st8(img,p,v)={imageStore(img,(p)*2,v[0]);imageStore(img,(p)*2+1,v[1]);}"
|
||||
"-D image2d_st8(img,p,v)={imageStore(img,ivec2(p.x*2,p.y),v[0]);imageStore(img,ivec2(p.x*2+1,p.y),v[1]);}"
|
||||
"-D image3d_st8(img,p,v)={imageStore(img,ivec3(p.x*2,p.y,p.z),v[0]);imageStore(img,ivec3(p.x*2+1,p.y,p.z),v[1]);}"
|
||||
"-D image1d_cp8(img,p,tex,sp)={imageStore(img,(p)*2,texelFetch(tex,sp*2,0));imageStore(img,(p)*2+1,texelFetch(tex,sp*2+1,0));}"
|
||||
"-D image2d_cp8(img,p,tex,sp)={imageStore(img,ivec2(p.x*2,p.y),texelFetch(tex,ivec2(sp.x*2,sp.y),0));imageStore(img,ivec2(p.x*2+1,p.y),texelFetch(tex,ivec2(sp.x*2+1,sp.y),0));}"
|
||||
"-D image3d_cp8(img,p,tex,sp)={imageStore(img,ivec3(p.x*2,p.y,p.z),texelFetch(tex,ivec3(sp.x*2,sp.y,sp.z),0));imageStore(img,ivec3(p.x*2+1,p.y,p.z),texelFetch(tex,ivec3(sp.x*2+1,sp.y,sp.z),0));}"
|
||||
|
||||
"-D buffer_ld1(buf,i)=buf[i]"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=vec4(sbuf[si4.r],sbuf[si4.g],sbuf[si4.b],sbuf[si4.a]);}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=mat2x4(sbuf[si4.r],sbuf[si4.g],sbuf[si4.b],sbuf[si4.a],sbuf[sii4.r],sbuf[sii4.g],sbuf[sii4.b],sbuf[sii4.a]);}"
|
||||
"-D buffer_ld2(buf,i)=buf[i]"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=buf[i]"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={vec4 _v=sbuf[si]; buf[i4.r]=_v.r;buf[i4.g]=_v.g;buf[i4.b]=_v.b;buf[i4.a]=_v.a;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=mat2x4(sbuf[si2.r],sbuf[si2.g]);}"
|
||||
"-D buffer_ld8(buf,i)=buf[i]"
|
||||
"-D buffer_st8(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={mat2x4 _v=sbuf[si]; buf[i4.r]=_v[0].r;buf[i4.g]=_v[0].g;buf[i4.b]=_v[0].b;buf[i4.a]=_v[0].a; buf[ii4.r]=_v[1].r;buf[ii4.g]=_v[1].g;buf[ii4.b]=_v[1].b;buf[ii4.a]=_v[1].a;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={mat2x4 _v=sbuf[si]; buf[i2.r]=_v[0];buf[i2.g]=_v[1];}"
|
||||
|
||||
"-D sfp2afpmat4(v)=v"
|
||||
"-D afp2sfpmat4(v)=v"
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_image_shader=1
|
||||
-V -s -x -o ${SHADER_image_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_image_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_image_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# image + fp16p
|
||||
set(SHADER_image_fp16p_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16p")
|
||||
|
||||
set(SHADER_image_fp16p_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16p_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_image_fp16p_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float -Dsfpvec2=uint -Dsfpvec4=uvec2 -Dsfpvec8=uvec4
|
||||
-Dafp=float -Dafpvec2=vec2 -Dafpvec4=vec4 -Dafpvec8=mat2x4 -Dafpmat4=mat4
|
||||
|
||||
-Dimfmtc1=r32f -Dimfmtc4=rgba16f
|
||||
-Dunfp=mediump
|
||||
|
||||
"-D image1d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image2d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image3d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image1d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image2d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image3d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image1d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image2d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image3d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image1d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image2d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image3d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image1d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld8(tex,p)=mat2x4(texelFetch(tex,(p)*2,0),texelFetch(tex,(p)*2+1,0))"
|
||||
"-D image2d_ld8(tex,p)=mat2x4(texelFetch(tex,ivec2(p.x*2,p.y),0),texelFetch(tex,ivec2(p.x*2+1,p.y),0))"
|
||||
"-D image3d_ld8(tex,p)=mat2x4(texelFetch(tex,ivec3(p.x*2,p.y,p.z),0),texelFetch(tex,ivec3(p.x*2+1,p.y,p.z),0))"
|
||||
"-D image1d_st8(img,p,v)={imageStore(img,(p)*2,v[0]);imageStore(img,(p)*2+1,v[1]);}"
|
||||
"-D image2d_st8(img,p,v)={imageStore(img,ivec2(p.x*2,p.y),v[0]);imageStore(img,ivec2(p.x*2+1,p.y),v[1]);}"
|
||||
"-D image3d_st8(img,p,v)={imageStore(img,ivec3(p.x*2,p.y,p.z),v[0]);imageStore(img,ivec3(p.x*2+1,p.y,p.z),v[1]);}"
|
||||
"-D image1d_cp8(img,p,tex,sp)={imageStore(img,(p)*2,texelFetch(tex,sp*2,0));imageStore(img,(p)*2+1,texelFetch(tex,sp*2+1,0));}"
|
||||
"-D image2d_cp8(img,p,tex,sp)={imageStore(img,ivec2(p.x*2,p.y),texelFetch(tex,ivec2(sp.x*2,sp.y),0));imageStore(img,ivec2(p.x*2+1,p.y),texelFetch(tex,ivec2(sp.x*2+1,sp.y),0));}"
|
||||
"-D image3d_cp8(img,p,tex,sp)={imageStore(img,ivec3(p.x*2,p.y,p.z),texelFetch(tex,ivec3(sp.x*2,sp.y,sp.z),0));imageStore(img,ivec3(p.x*2+1,p.y,p.z),texelFetch(tex,ivec3(sp.x*2+1,sp.y,sp.z),0));}"
|
||||
|
||||
"-D buffer_ld1(buf,i)=buf[i]"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=uvec2(packHalf2x16(vec2(sbuf[si4.r],sbuf[si4.g])),packHalf2x16(vec2(sbuf[si4.b],sbuf[si4.a])));}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=uvec4(packHalf2x16(vec2(sbuf[si4.r],sbuf[si4.g])),packHalf2x16(vec2(sbuf[si4.b],sbuf[si4.a])),packHalf2x16(vec2(sbuf[sii4.r],sbuf[sii4.g])),packHalf2x16(vec2(sbuf[sii4.b],sbuf[sii4.a])));}"
|
||||
"-D buffer_ld2(buf,i)=unpackHalf2x16(buf[i])"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=packHalf2x16(v)}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=vec4(unpackHalf2x16(buf[i].x),unpackHalf2x16(buf[i].y))"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=uvec2(packHalf2x16(v.rg),packHalf2x16(v.ba));}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={uvec2 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.x);vec2 _v1=unpackHalf2x16(_v.y); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=uvec4(sbuf[si2.r],sbuf[si2.g]);}"
|
||||
"-D buffer_ld8(buf,i)=mat2x4(vec4(unpackHalf2x16(buf[i].r),unpackHalf2x16(buf[i].g)),vec4(unpackHalf2x16(buf[i].b),unpackHalf2x16(buf[i].a)))"
|
||||
"-D buffer_st8(buf,i,v)={buf[i]=uvec4(uvec2(packHalf2x16(v[0].rg),packHalf2x16(v[0].ba)),uvec2(packHalf2x16(v[1].rg),packHalf2x16(v[1].ba)));}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={uvec4 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.r);vec2 _v1=unpackHalf2x16(_v.g);vec2 _v2=unpackHalf2x16(_v.b);vec2 _v3=unpackHalf2x16(_v.a); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g; buf[ii4.r]=_v2.r;buf[ii4.g]=_v2.g;buf[ii4.b]=_v3.r;buf[ii4.a]=_v3.g;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={uvec4 _v=sbuf[si]; buf[i2.r]=_v.rg;buf[i2.g]=_v.ba;}"
|
||||
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_image_shader=1 -DNCNN_fp16_packed=1
|
||||
-V -s -x -o ${SHADER_image_fp16p_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_image_fp16p_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_image_fp16p_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# image + fp16p + fp16a
|
||||
set(SHADER_image_fp16pa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16pa")
|
||||
|
||||
set(SHADER_image_fp16pa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16pa_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_image_fp16pa_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float -Dsfpvec2=uint -Dsfpvec4=uvec2 -Dsfpvec8=uvec4
|
||||
-Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4
|
||||
|
||||
-Dimfmtc1=r32f -Dimfmtc4=rgba16f
|
||||
-Dunfp=mediump
|
||||
|
||||
"-D image1d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
|
||||
"-D image2d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
|
||||
"-D image3d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
|
||||
"-D image1d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image2d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image3d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image1d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
|
||||
"-D image2d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
|
||||
"-D image3d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
|
||||
"-D image1d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image2d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image3d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image1d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld8(tex,p)=f16mat2x4(texelFetch(tex,(p)*2,0),texelFetch(tex,(p)*2+1,0))"
|
||||
"-D image2d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec2(p.x*2,p.y),0),texelFetch(tex,ivec2(p.x*2+1,p.y),0))"
|
||||
"-D image3d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec3(p.x*2,p.y,p.z),0),texelFetch(tex,ivec3(p.x*2+1,p.y,p.z),0))"
|
||||
"-D image1d_st8(img,p,v)={imageStore(img,(p)*2,v[0]);imageStore(img,(p)*2+1,v[1]);}"
|
||||
"-D image2d_st8(img,p,v)={imageStore(img,ivec2(p.x*2,p.y),v[0]);imageStore(img,ivec2(p.x*2+1,p.y),v[1]);}"
|
||||
"-D image3d_st8(img,p,v)={imageStore(img,ivec3(p.x*2,p.y,p.z),v[0]);imageStore(img,ivec3(p.x*2+1,p.y,p.z),v[1]);}"
|
||||
"-D image1d_cp8(img,p,tex,sp)={imageStore(img,(p)*2,texelFetch(tex,sp*2,0));imageStore(img,(p)*2+1,texelFetch(tex,sp*2+1,0));}"
|
||||
"-D image2d_cp8(img,p,tex,sp)={imageStore(img,ivec2(p.x*2,p.y),texelFetch(tex,ivec2(sp.x*2,sp.y),0));imageStore(img,ivec2(p.x*2+1,p.y),texelFetch(tex,ivec2(sp.x*2+1,sp.y),0));}"
|
||||
"-D image3d_cp8(img,p,tex,sp)={imageStore(img,ivec3(p.x*2,p.y,p.z),texelFetch(tex,ivec3(sp.x*2,sp.y,sp.z),0));imageStore(img,ivec3(p.x*2+1,p.y,p.z),texelFetch(tex,ivec3(sp.x*2+1,sp.y,sp.z),0));}"
|
||||
|
||||
"-D buffer_ld1(buf,i)=float16_t(buf[i])"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=float(v);}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=uvec2(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))));}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=uvec4(packHalf2x16(vec2(f16vec2(sbuf[si4.r],sbuf[si4.g]))),packHalf2x16(vec2(f16vec2(sbuf[si4.b],sbuf[si4.a]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.r],sbuf[sii4.g]))),packHalf2x16(vec2(f16vec2(sbuf[sii4.b],sbuf[sii4.a]))));}"
|
||||
"-D buffer_ld2(buf,i)=f16vec2(unpackHalf2x16(buf[i]))"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=packHalf2x16(vec2(v))}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=f16vec4(vec4(unpackHalf2x16(buf[i].x),unpackHalf2x16(buf[i].y)))"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=uvec2(packHalf2x16(vec2(v.rg)),packHalf2x16(vec2(v.ba)));}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={uvec2 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.x);vec2 _v1=unpackHalf2x16(_v.y); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=uvec4(sbuf[si2.r],sbuf[si2.g]);}"
|
||||
"-D buffer_ld8(buf,i)=f16mat2x4(f16vec4(vec4(unpackHalf2x16(buf[i].r),unpackHalf2x16(buf[i].g))),f16vec4(vec4(unpackHalf2x16(buf[i].b),unpackHalf2x16(buf[i].a))))"
|
||||
"-D buffer_st8(buf,i,v)={buf[i]=uvec4(uvec2(packHalf2x16(vec2(v[0].rg)),packHalf2x16(vec2(v[0].ba))),uvec2(packHalf2x16(vec2(v[1].rg)),packHalf2x16(vec2(v[1].ba))));}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={uvec4 _v=sbuf[si]; vec2 _v0=unpackHalf2x16(_v.r);vec2 _v1=unpackHalf2x16(_v.g);vec2 _v2=unpackHalf2x16(_v.b);vec2 _v3=unpackHalf2x16(_v.a); buf[i4.r]=_v0.r;buf[i4.g]=_v0.g;buf[i4.b]=_v1.r;buf[i4.a]=_v1.g; buf[ii4.r]=_v2.r;buf[ii4.g]=_v2.g;buf[ii4.b]=_v3.r;buf[ii4.a]=_v3.g;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={uvec4 _v=sbuf[si]; buf[i2.r]=_v.rg;buf[i2.g]=_v.ba;}"
|
||||
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_image_shader=1 -DNCNN_fp16_packed=1 -DNCNN_fp16_arithmetic=1
|
||||
-V -s -x -o ${SHADER_image_fp16pa_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_image_fp16pa_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_image_fp16pa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# image + fp16s
|
||||
set(SHADER_image_fp16s_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16s")
|
||||
|
||||
set(SHADER_image_fp16s_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16s_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_image_fp16s_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float16_t -Dsfpvec2=f16vec2 -Dsfpvec4=f16vec4
|
||||
-Dafp=float -Dafpvec2=vec2 -Dafpvec4=vec4 -Dafpvec8=mat2x4 -Dafpmat4=mat4
|
||||
|
||||
-Dimfmtc1=r16f -Dimfmtc4=rgba16f
|
||||
-Dunfp=mediump
|
||||
|
||||
"-D image1d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image2d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image3d_ld1(tex,p)=texelFetch(tex,p,0).r"
|
||||
"-D image1d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image2d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image3d_st1(img,p,v)={vec4 _v;_v.r=v;imageStore(img,p,_v);}"
|
||||
"-D image1d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image2d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image3d_ld4(tex,p)=texelFetch(tex,p,0)"
|
||||
"-D image1d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image2d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image3d_st4(img,p,v)={imageStore(img,p,v);}"
|
||||
"-D image1d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld8(tex,p)=mat2x4(texelFetch(tex,(p)*2,0),texelFetch(tex,(p)*2+1,0))"
|
||||
"-D image2d_ld8(tex,p)=mat2x4(texelFetch(tex,ivec2(p.x*2,p.y),0),texelFetch(tex,ivec2(p.x*2+1,p.y),0))"
|
||||
"-D image3d_ld8(tex,p)=mat2x4(texelFetch(tex,ivec3(p.x*2,p.y,p.z),0),texelFetch(tex,ivec3(p.x*2+1,p.y,p.z),0))"
|
||||
"-D image1d_st8(img,p,v)={imageStore(img,(p)*2,v[0]);imageStore(img,(p)*2+1,v[1]);}"
|
||||
"-D image2d_st8(img,p,v)={imageStore(img,ivec2(p.x*2,p.y),v[0]);imageStore(img,ivec2(p.x*2+1,p.y),v[1]);}"
|
||||
"-D image3d_st8(img,p,v)={imageStore(img,ivec3(p.x*2,p.y,p.z),v[0]);imageStore(img,ivec3(p.x*2+1,p.y,p.z),v[1]);}"
|
||||
"-D image1d_cp8(img,p,tex,sp)={imageStore(img,(p)*2,texelFetch(tex,sp*2,0));imageStore(img,(p)*2+1,texelFetch(tex,sp*2+1,0));}"
|
||||
"-D image2d_cp8(img,p,tex,sp)={imageStore(img,ivec2(p.x*2,p.y),texelFetch(tex,ivec2(sp.x*2,sp.y),0));imageStore(img,ivec2(p.x*2+1,p.y),texelFetch(tex,ivec2(sp.x*2+1,sp.y),0));}"
|
||||
"-D image3d_cp8(img,p,tex,sp)={imageStore(img,ivec3(p.x*2,p.y,p.z),texelFetch(tex,ivec3(sp.x*2,sp.y,sp.z),0));imageStore(img,ivec3(p.x*2+1,p.y,p.z),texelFetch(tex,ivec3(sp.x*2+1,sp.y,sp.z),0));}"
|
||||
|
||||
"-D buffer_ld1(buf,i)=float(buf[i])"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=float16_t(v);}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i].r=sbuf[si4.r];buf[i].g=sbuf[si4.g];buf[i].b=sbuf[si4.b];buf[i].a=sbuf[si4.a];}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i].abcd.r=sbuf[si4.r];buf[i].abcd.g=sbuf[si4.g];buf[i].abcd.b=sbuf[si4.b];buf[i].abcd.a=sbuf[si4.a];buf[i].efgh.r=sbuf[sii4.r];buf[i].efgh.g=sbuf[sii4.g];buf[i].efgh.b=sbuf[sii4.b];buf[i].efgh.a=sbuf[sii4.a];}"
|
||||
"-D buffer_ld2(buf,i)=vec2(buf[i])"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=f16vec2(v);}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=vec4(buf[i])"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=f16vec4(v);}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={buf[i4.r]=sbuf[si].r;buf[i4.g]=sbuf[si].g;buf[i4.b]=sbuf[si].b;buf[i4.a]=sbuf[si].a;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i].abcd=sbuf[si2.r];buf[i].efgh=sbuf[si2.g];}"
|
||||
"-D buffer_ld8(buf,i)=mat2x4(vec4(buf[i].abcd),vec4(buf[i].efgh))"
|
||||
"-D buffer_st8(buf,i,v)={buf[i].abcd=f16vec4(v[0]);buf[i].efgh=f16vec4(v[1]);}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i].abcd=sbuf[si].abcd;buf[i].efgh=sbuf[si].efgh;}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={buf[i4.r]=sbuf[si].abcd.r;buf[i4.g]=sbuf[si].abcd.g;buf[i4.b]=sbuf[si].abcd.b;buf[i4.a]=sbuf[si].abcd.a; buf[ii4.r]=sbuf[si].efgh.r;buf[ii4.g]=sbuf[si].efgh.g;buf[ii4.b]=sbuf[si].efgh.b;buf[ii4.a]=sbuf[si].efgh.a;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={buf[i2.r]=sbuf[si].abcd;buf[i2.g]=sbuf[si].efgh;}"
|
||||
|
||||
"-D sfp2afpmat4(v)=v"
|
||||
"-D afp2sfpmat4(v)=v"
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_image_shader=1 -DNCNN_fp16_storage=1
|
||||
-V -s -x -o ${SHADER_image_fp16s_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_image_fp16s_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_image_fp16s_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
# image + fp16s + fp16a
|
||||
set(SHADER_image_fp16sa_SRC_NAME_WE "${SHADER_SRC_NAME_WE}_image_fp16sa")
|
||||
|
||||
set(SHADER_image_fp16sa_SPV_HEX_FILE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_image_fp16sa_SRC_NAME_WE}.spv.hex.h)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_image_fp16sa_SPV_HEX_FILE}
|
||||
COMMAND ${GLSLANGVALIDATOR_EXECUTABLE}
|
||||
ARGS -Dsfp=float16_t -Dsfpvec2=f16vec2 -Dsfpvec4=f16vec4 -Dsfpvec8=f16mat2x4 -Dsfpmat4=f16mat4
|
||||
-Dafp=float16_t -Dafpvec2=f16vec2 -Dafpvec4=f16vec4 -Dafpvec8=f16mat2x4 -Dafpmat4=f16mat4
|
||||
|
||||
-Dimfmtc1=r16f -Dimfmtc4=rgba16f
|
||||
-Dunfp=mediump
|
||||
|
||||
"-D image1d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
|
||||
"-D image2d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
|
||||
"-D image3d_ld1(tex,p)=float16_t(texelFetch(tex,p,0).r)"
|
||||
"-D image1d_st1(img,p,v)={f16vec4 _v;_v.r=float16_t(v);imageStore(img,p,_v);}"
|
||||
"-D image2d_st1(img,p,v)={f16vec4 _v;_v.r=float16_t(v);imageStore(img,p,_v);}"
|
||||
"-D image3d_st1(img,p,v)={f16vec4 _v;_v.r=float16_t(v);imageStore(img,p,_v);}"
|
||||
"-D image1d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp1(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
|
||||
"-D image2d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
|
||||
"-D image3d_ld4(tex,p)=f16vec4(texelFetch(tex,p,0))"
|
||||
"-D image1d_st4(img,p,v)={imageStore(img,p,vec4(v));}"
|
||||
"-D image2d_st4(img,p,v)={imageStore(img,p,vec4(v));}"
|
||||
"-D image3d_st4(img,p,v)={imageStore(img,p,vec4(v));}"
|
||||
"-D image1d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image2d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
"-D image3d_cp4(img,p,tex,sp)={imageStore(img,p,texelFetch(tex,sp,0));}"
|
||||
|
||||
"-D image1d_ld8(tex,p)=f16mat2x4(texelFetch(tex,(p)*2,0),texelFetch(tex,(p)*2+1,0))"
|
||||
"-D image2d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec2(p.x*2,p.y),0),texelFetch(tex,ivec2(p.x*2+1,p.y),0))"
|
||||
"-D image3d_ld8(tex,p)=f16mat2x4(texelFetch(tex,ivec3(p.x*2,p.y,p.z),0),texelFetch(tex,ivec3(p.x*2+1,p.y,p.z),0))"
|
||||
"-D image1d_st8(img,p,v)={imageStore(img,(p)*2,vec4(v[0]));imageStore(img,(p)*2+1,vec4(v[1]));}"
|
||||
"-D image2d_st8(img,p,v)={imageStore(img,ivec2(p.x*2,p.y),vec4(v[0]));imageStore(img,ivec2(p.x*2+1,p.y),vec4(v[1]));}"
|
||||
"-D image3d_st8(img,p,v)={imageStore(img,ivec3(p.x*2,p.y,p.z),vec4(v[0]));imageStore(img,ivec3(p.x*2+1,p.y,p.z),vec4(v[1]));}"
|
||||
"-D image1d_cp8(img,p,tex,sp)={imageStore(img,(p)*2,texelFetch(tex,sp*2,0));imageStore(img,(p)*2+1,texelFetch(tex,sp*2+1,0));}"
|
||||
"-D image2d_cp8(img,p,tex,sp)={imageStore(img,ivec2(p.x*2,p.y),texelFetch(tex,ivec2(sp.x*2,sp.y),0));imageStore(img,ivec2(p.x*2+1,p.y),texelFetch(tex,ivec2(sp.x*2+1,sp.y),0));}"
|
||||
"-D image3d_cp8(img,p,tex,sp)={imageStore(img,ivec3(p.x*2,p.y,p.z),texelFetch(tex,ivec3(sp.x*2,sp.y,sp.z),0));imageStore(img,ivec3(p.x*2+1,p.y,p.z),texelFetch(tex,ivec3(sp.x*2+1,sp.y,sp.z),0));}"
|
||||
|
||||
"-D buffer_ld1(buf,i)=buf[i]"
|
||||
"-D buffer_st1(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp1(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp1to4(buf,i,sbuf,si4)={buf[i]=f16vec4(sbuf[si4.r],sbuf[si4.g],sbuf[si4.b],sbuf[si4.a]);}"
|
||||
"-D buffer_cp1to8(buf,i,sbuf,si4,sii4)={buf[i]=f16mat2x4(sbuf[si4.r],sbuf[si4.g],sbuf[si4.b],sbuf[si4.a],sbuf[sii4.r],sbuf[sii4.g],sbuf[sii4.b],sbuf[sii4.a]);}"
|
||||
"-D buffer_ld2(buf,i)=buf[i]"
|
||||
"-D buffer_st2(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp2(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_ld4(buf,i)=buf[i]"
|
||||
"-D buffer_st4(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp4(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp4to1(buf,i4,sbuf,si)={buf[i4.r]=sbuf[si].r;buf[i4.g]=sbuf[si].g;buf[i4.b]=sbuf[si].b;buf[i4.a]=sbuf[si].a;}"
|
||||
"-D buffer_cp4to8(buf,i,sbuf,si2)={buf[i]=f16mat2x4(sbuf[si2.r],sbuf[si2.g]);}"
|
||||
"-D buffer_ld8(buf,i)=buf[i]"
|
||||
"-D buffer_st8(buf,i,v)={buf[i]=v;}"
|
||||
"-D buffer_cp8(buf,i,sbuf,si)={buf[i]=sbuf[si];}"
|
||||
"-D buffer_cp8to1(buf,i4,ii4,sbuf,si)={f16mat2x4 _v=sbuf[si]; buf[i4.r]=_v[0].r;buf[i4.g]=_v[0].g;buf[i4.b]=_v[0].b;buf[i4.a]=_v[0].a; buf[ii4.r]=_v[1].r;buf[ii4.g]=_v[1].g;buf[ii4.b]=_v[1].b;buf[ii4.a]=_v[1].a;}"
|
||||
"-D buffer_cp8to4(buf,i2,sbuf,si)={f16mat2x4 _v=sbuf[si]; buf[i2.r]=_v[0];buf[i2.g]=_v[1];}"
|
||||
"-D sfp2afpmat4(v)=v"
|
||||
"-D afp2sfpmat4(v)=v"
|
||||
|
||||
"-D psc(x)=(x==0?p.x:x)"
|
||||
-DNCNN_image_shader=1 -DNCNN_fp16_storage=1 -DNCNN_fp16_arithmetic=1
|
||||
-V -s -x -o ${SHADER_image_fp16sa_SPV_HEX_FILE} ${SHADER_SRC}
|
||||
DEPENDS ${SHADER_SRC}
|
||||
COMMENT "Building SPIR-V module ${SHADER_image_fp16sa_SRC_NAME_WE}.spv"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_image_fp16sa_SPV_HEX_FILE} PROPERTIES GENERATED TRUE)
|
||||
|
||||
set(LOCAL_SHADER_SPV_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.spv.h)
|
||||
|
||||
file(WRITE ${LOCAL_SHADER_SPV_HEADER}
|
||||
"static const uint32_t ${SHADER_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_fp16p_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16p_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_fp16pa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16pa_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_fp16s_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16s_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_fp16sa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_fp16sa_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_image_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_image_fp16p_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16p_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_image_fp16pa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16pa_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_image_fp16s_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16s_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
"static const uint32_t ${SHADER_image_fp16sa_SRC_NAME_WE}_spv_data[] = {\n#include \"${SHADER_image_fp16sa_SRC_NAME_WE}.spv.hex.h\"\n};\n"
|
||||
)
|
||||
|
||||
set_source_files_properties(${LOCAL_SHADER_SPV_HEADER} PROPERTIES GENERATED TRUE)
|
||||
|
||||
set(LOCAL_SHADER_SPV_HEX_HEADERS
|
||||
${SHADER_SPV_HEX_FILE}
|
||||
${SHADER_fp16p_SPV_HEX_FILE}
|
||||
${SHADER_fp16pa_SPV_HEX_FILE}
|
||||
${SHADER_fp16s_SPV_HEX_FILE}
|
||||
${SHADER_fp16sa_SPV_HEX_FILE}
|
||||
${SHADER_image_SPV_HEX_FILE}
|
||||
${SHADER_image_fp16p_SPV_HEX_FILE}
|
||||
${SHADER_image_fp16pa_SPV_HEX_FILE}
|
||||
${SHADER_image_fp16s_SPV_HEX_FILE}
|
||||
${SHADER_image_fp16sa_SPV_HEX_FILE}
|
||||
)
|
||||
|
||||
set(${SHADER_SPV_HEADER} ${LOCAL_SHADER_SPV_HEADER} PARENT_SCOPE)
|
||||
set(${SHADER_SPV_HEX_HEADERS} ${LOCAL_SHADER_SPV_HEX_HEADERS} PARENT_SCOPE)
|
||||
|
||||
endfunction()
|
5
3rdparty/ncnn/cmake/run_test.cmake
vendored
Normal file
5
3rdparty/ncnn/cmake/run_test.cmake
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
execute_process(COMMAND $ENV{TESTS_EXECUTABLE_LOADER} $ENV{TESTS_EXECUTABLE_LOADER_ARGUMENTS} ${TEST_EXECUTABLE} $ENV{TESTS_ARGUMENTS} RESULT_VARIABLE result)
|
||||
if(NOT "${result}" STREQUAL "0")
|
||||
message(FATAL_ERROR "Test failed with return value '${result}'")
|
||||
endif()
|
13
3rdparty/ncnn/codeformat.sh
vendored
Executable file
13
3rdparty/ncnn/codeformat.sh
vendored
Executable file
@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# we run clang-format and astyle twice to get stable format output
|
||||
|
||||
find src/ tools/ tests/ examples/ benchmark/ python/ -type f -name '*.c' -o -name '*.cpp' -o -name '*.cc' -o -name '*.h' | grep -v python/pybind11 | grep -v stb_image | xargs -i clang-format -i {}
|
||||
astyle -n -r "benchmark/*.h,*.cpp,*.cc" "tests/*.h,*.cpp,*.cc" "tools/*.h,*.cpp,*.cc" "examples/*.h,*.cpp,*.cc"
|
||||
astyle -n -r "src/*.h,*.cpp,*.cc" --exclude=src/stb_image.h --exclude=src/stb_image_write.h
|
||||
astyle -n -r "python/*.h,*.cpp,*.cc" --exclude=python/pybind11
|
||||
|
||||
find src/ tools/ tests/ examples/ benchmark/ python/ -type f -name '*.c' -o -name '*.cpp' -o -name '*.cc' -o -name '*.h' | grep -v python/pybind11 | grep -v stb_image | xargs -i clang-format -i {}
|
||||
astyle -n -r "benchmark/*.h,*.cpp,*.cc" "tests/*.h,*.cpp,*.cc" "tools/*.h,*.cpp,*.cc" "examples/*.h,*.cpp,*.cc"
|
||||
astyle -n -r "src/*.h,*.cpp,*.cc" --exclude=src/stb_image.h --exclude=src/stb_image_write.h
|
||||
astyle -n -r "python/*.h,*.cpp,*.cc" --exclude=python/pybind11
|
139
3rdparty/ncnn/docs/Home.md
vendored
Normal file
139
3rdparty/ncnn/docs/Home.md
vendored
Normal file
@ -0,0 +1,139 @@
|
||||
### input data and extract output
|
||||
```cpp
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/highgui/highgui.hpp>
|
||||
#include "net.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
cv::Mat img = cv::imread("image.ppm", CV_LOAD_IMAGE_GRAYSCALE);
|
||||
int w = img.cols;
|
||||
int h = img.rows;
|
||||
|
||||
// subtract 128, norm to -1 ~ 1
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(img.data, ncnn::Mat::PIXEL_GRAY, w, h, 60, 60);
|
||||
float mean[1] = { 128.f };
|
||||
float norm[1] = { 1/128.f };
|
||||
in.substract_mean_normalize(mean, norm);
|
||||
|
||||
ncnn::Net net;
|
||||
net.load_param("model.param");
|
||||
net.load_model("model.bin");
|
||||
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
ex.set_light_mode(true);
|
||||
ex.set_num_threads(4);
|
||||
|
||||
ex.input("data", in);
|
||||
|
||||
ncnn::Mat feat;
|
||||
ex.extract("output", feat);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
### print Mat content
|
||||
```cpp
|
||||
void pretty_print(const ncnn::Mat& m)
|
||||
{
|
||||
for (int q=0; q<m.c; q++)
|
||||
{
|
||||
const float* ptr = m.channel(q);
|
||||
for (int z=0; z<m.d; z++)
|
||||
{
|
||||
for (int y=0; y<m.h; y++)
|
||||
{
|
||||
for (int x=0; x<m.w; x++)
|
||||
{
|
||||
printf("%f ", ptr[x]);
|
||||
}
|
||||
ptr += m.w;
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("------------------------\n");
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### visualize Mat content
|
||||
```cpp
|
||||
void visualize(const char* title, const ncnn::Mat& m)
|
||||
{
|
||||
std::vector<cv::Mat> normed_feats(m.c);
|
||||
|
||||
for (int i=0; i<m.c; i++)
|
||||
{
|
||||
cv::Mat tmp(m.h, m.w, CV_32FC1, (void*)(const float*)m.channel(i));
|
||||
|
||||
cv::normalize(tmp, normed_feats[i], 0, 255, cv::NORM_MINMAX, CV_8U);
|
||||
|
||||
cv::cvtColor(normed_feats[i], normed_feats[i], cv::COLOR_GRAY2BGR);
|
||||
|
||||
// check NaN
|
||||
for (int y=0; y<m.h; y++)
|
||||
{
|
||||
const float* tp = tmp.ptr<float>(y);
|
||||
uchar* sp = normed_feats[i].ptr<uchar>(y);
|
||||
for (int x=0; x<m.w; x++)
|
||||
{
|
||||
float v = tp[x];
|
||||
if (v != v)
|
||||
{
|
||||
sp[0] = 0;
|
||||
sp[1] = 0;
|
||||
sp[2] = 255;
|
||||
}
|
||||
|
||||
sp += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int tw = m.w < 10 ? 32 : m.w < 20 ? 16 : m.w < 40 ? 8 : m.w < 80 ? 4 : m.w < 160 ? 2 : 1;
|
||||
int th = (m.c - 1) / tw + 1;
|
||||
|
||||
cv::Mat show_map(m.h * th, m.w * tw, CV_8UC3);
|
||||
show_map = cv::Scalar(127);
|
||||
|
||||
// tile
|
||||
for (int i=0; i<m.c; i++)
|
||||
{
|
||||
int ty = i / tw;
|
||||
int tx = i % tw;
|
||||
|
||||
normed_feats[i].copyTo(show_map(cv::Rect(tx * m.w, ty * m.h, m.w, m.h)));
|
||||
}
|
||||
|
||||
cv::resize(show_map, show_map, cv::Size(0,0), 2, 2, cv::INTER_NEAREST);
|
||||
cv::imshow(title, show_map);
|
||||
}
|
||||
```
|
||||
|
||||
### FAQ
|
||||
Q ncnn的起源
|
||||
|
||||
A 深度学习算法要在手机上落地,caffe依赖太多,手机上也没有cuda,需要个又快又小的前向网络实现
|
||||
|
||||
|
||||
Q ncnn名字的来历
|
||||
|
||||
A cnn就是卷积神经网络的缩写,开头的n算是一语n关。比如new/next(全新的实现),naive(ncnn是naive实现),neon(ncnn最初为手机优化),up主名字(←_←)
|
||||
|
||||
|
||||
Q 支持哪些平台
|
||||
|
||||
A 跨平台,支持 android / ios / linux / windows / macos,也支持裸机跑
|
||||
|
||||
|
||||
Q 计算精度如何
|
||||
|
||||
A armv7 neon float 不遵照 ieee754 标准,有些采用快速实现(如exp sin等),速度快但确保精度足够高
|
||||
|
||||
|
||||
Q logo
|
||||
|
||||
A up主是mc玩家,所以灵魂手绘像素猫,还可以找到ncnn...
|
48
3rdparty/ncnn/docs/application-with-ncnn-inside.md
vendored
Normal file
48
3rdparty/ncnn/docs/application-with-ncnn-inside.md
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
 Azar-视频交友与聊天 June 20, 2018
|
||||
|
||||
 玩美彩妆 - 自拍美颜 & 智能美妆相机 June 21, 2018
|
||||
|
||||
 You Makeup Photo Camera 2.1.5
|
||||
|
||||
 滤镜相机 Cartoon Camera- Paintlab January 24, 2018
|
||||
|
||||
 画中画相机 January 30, 2018
|
||||
|
||||
 Photo Editor Pro 1.1.4.1029
|
||||
|
||||
 Air Camera 1.7.3.1002
|
||||
|
||||
 美丽拍-懂你的自拍美颜相机 February 1, 2018
|
||||
|
||||
 玩美Fun-特效动图自拍滤镜&分享相片! May 15, 2018
|
||||
|
||||
 Sweet Snap - 生活贴纸&图像编辑器,实时滤镜,录制视频和有趣表情包,美容效果 June 22, 2018
|
||||
|
||||
 玩图 - 美图相机 March 29, 2018
|
||||
|
||||
 美颜相机 7.6.95
|
||||
|
||||
 自拍相机 - 照片编辑器和过滤器和贴纸 April 27, 2018
|
||||
|
||||
 APUS Camera 1.7.2.1001
|
||||
|
||||
 LIKE短视频 — 魔法视频自拍神器 2.2.4
|
||||
|
||||
 爱奇艺 9.6.0
|
||||
|
||||
 支付宝 10.1.25.752
|
||||
|
||||
 YouCam Shop - World's First AR Makeup Shopping App 3.4.0
|
||||
|
||||
 美容化妆自拍相机和自拍照片编辑器 1.4.8
|
||||
|
||||
 京东-挑好物,上京东 7.0.8
|
||||
|
||||
 Versa 2.9.2
|
||||
|
||||
 微视 4.3.1.88
|
||||
|
||||
 快手短视频—国民短视频平台 5.4.2.5360
|
||||
|
||||
 滴滴出行 5.3.0
|
||||
|
118
3rdparty/ncnn/docs/benchmark/the-benchmark-of-caffe-android-lib,-mini-caffe,-and-ncnn.md
vendored
Normal file
118
3rdparty/ncnn/docs/benchmark/the-benchmark-of-caffe-android-lib,-mini-caffe,-and-ncnn.md
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
caffe-android-lib https://github.com/sh1r0/caffe-android-lib
|
||||
|
||||
mini-caffe https://github.com/luoyetx/mini-caffe
|
||||
|
||||
openblas-0.2.20 https://github.com/xianyi/OpenBLAS
|
||||
|
||||
ncnn https://github.com/Tencent/ncnn
|
||||
|
||||
***
|
||||
|
||||
squeezenet_v1.1 https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1
|
||||
|
||||
mobilenet_v1 https://github.com/shicai/MobileNet-Caffe
|
||||
|
||||
vgg16 https://gist.github.com/ksimonyan/211839e770f7b538e2d8
|
||||
|
||||
***
|
||||
|
||||
Host platform and compiler configuration:
|
||||
|
||||
fedora 27, android-ndk-r15c, target arch = arm64-v8a
|
||||
|
||||
we manually update openblas package to version 0.2.20 in caffe-android-lib for better performance
|
||||
|
||||
|
||||
***
|
||||
|
||||
Device: Nexus 6p
|
||||
|
||||
OS: LineageOS 15.1(Android 8.1.0), ROM newly flashed without any third-party APP installed
|
||||
|
||||
CPU: Snapdragon 810 (Cortex-A57 2.0GHz x 4 + Cortex-A53 1.55GHz x 4)
|
||||
|
||||
RAM: 3G
|
||||
|
||||
|
||||
***
|
||||
|
||||
Benchmark method:
|
||||
|
||||
Run squeezenet, mobilenet inference 23 times in a loop, discard the first three warmup records, and then calculate the average inference time
|
||||
|
||||
Run vgg169 times in a loop, discard the first warmup record, and then calculate the average inference time
|
||||
|
||||
Since the system may force SOC lowering its frequency when temperature goes high, sleep over 1 minute before each benchmark to prevent this issue.
|
||||
|
||||
fps performance: fps = 1000 / avgtime(ms)
|
||||
|
||||
cpu usage: take the CPU value in top utility output
|
||||
|
||||
memory usage: take the RES value in top utility output
|
||||
|
||||
the overall power consumption and performance per watt:
|
||||
|
||||
Disable usb charging: adb shell echo 0 > /sys/class/power_supply/battery/charging_enabled
|
||||
|
||||
current(μA) = adb shell cat /sys/class/power_supply/battery/current_now (multiply -1 for 810 chip)
|
||||
|
||||
voltage(μV) = adb shell cat /sys/class/power_supply/battery/voltage_now
|
||||
|
||||
power consumption(mW) = current / 1000 * voltage / 1000 / 1000
|
||||
|
||||
performance per watt(1000fps/W) = fps / power consumption * 1000
|
||||
|
||||
|
||||
***
|
||||
|
||||
The binary size after debug stripping
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
***
|
||||
|
||||
squeezenet
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
***
|
||||
|
||||
mobilnet
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
***
|
||||
|
||||
vgg16
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
46
3rdparty/ncnn/docs/benchmark/vulkan-conformance-test.md
vendored
Normal file
46
3rdparty/ncnn/docs/benchmark/vulkan-conformance-test.md
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
|device|gpu|api version|driver version|squeezenet|mobilenetssd|yolov3|
|
||||
|---|---|---|---|---|---|---|
|
||||
|intel-i7-7700|Intel(R) HD Graphics 630 (Kaby Lake GT2)|1.1.90|18.3.4|y|y|y|
|
||||
|GTX-1060|GeForce GTX 1060 3GB|1.1.95|418.172.0|y|y|y|
|
||||
|AMD-Radeon R9 M290X|AMD RADV PITCAIRN (LLVM 7.0.1)|1.1.70|18.3.4|y|y|y|
|
||||
|iphone-5s|Apple A7 GPU|1.0.82|0.2.1825|y|y|y|
|
||||
|huawei-nexus6p|Adreno (TM) 430|1.0.49|35.601.2388|y|y|y
|
||||
|vivo-y1731ca|Adreno (TM) 505|1.0.61|37.845.1429|y|n|n|
|
||||
|vivo-y85a|Adreno (TM) 506|1.0.61|2.944.3349|y|n|n|
|
||||
|vivo-x9s|Adreno (TM) 510|1.0.61|42.917.1172|y|y|y|
|
||||
|meizu-15|Adreno (TM) 512|1.0.38|29.189.223|n|n|n|
|
||||
|chuizi-jianguo-pro2|Adreno (TM) 512|1.0.38|21.219.2615|n|n|n|
|
||||
|xiaomi-note3|Adreno (TM) 512|1.0.38|39.369.2305|n|n|n|
|
||||
|oppo-r11|Adreno (TM) 512|1.0.38|42.977.756|n|n|n|
|
||||
|xiaomi-6x|Adreno (TM) 512|1.0.61|14.322.3739|y|y|y|
|
||||
|oppo-r11s+|Adreno (TM) 512|1.0.61|35.1004.3936|y|y|y|
|
||||
|vivo-x20a|Adreno (TM) 512|1.0.61|43.10.3141|y|y|y|
|
||||
|vivo-v1816a|Adreno (TM) 512|1.0.61|43.10.3141|y|y|y|
|
||||
|vivo-z1|Adreno (TM) 512|1.0.61|43.10.3141|y|y|y|
|
||||
|xiaomi-redmi-note5|Adreno (TM) 512|1.0.61|63.219.2354|y|y|y|
|
||||
|google-pixel|Adreno (TM) 530|1.1.87|512.354.0|y|y|y|
|
||||
|nubia-z17|Adreno (TM) 540|1.0.38|1.28.32|n|n|n|
|
||||
|samsung-galaxys8+|Adreno (TM) 540|1.0.61|29.896.3583|y|y|y|
|
||||
|oneplus-5t|Adreno (TM) 540|1.0.61|18.1023.2233|y|y|y|
|
||||
|google-pixel2|Adreno (TM) 540|1.1.66|512.313.0|y|y|y|
|
||||
|essential-ph-1|Adreno (TM) 540|1.1.66|512.319.0|y|y|y|
|
||||
|vivo-x23|Adreno (TM) 615|1.0.66|33.870.3328|y|y|y|
|
||||
|vivo-v1813ba|Adreno (TM) 615|1.0.66|33.870.3328|y|y|y|
|
||||
|xiaomi-8se|Adreno (TM) 616|1.0.66|30.913.18|y|y|y|
|
||||
|vivo-nex-a|Adreno (TM) 616|1.0.66|33.870.3328|y|y|y|
|
||||
|xiaomi-mix2s|Adreno (TM) 630|1.0.61|4.91.2976|y|y|y|
|
||||
|heisha-SKR-A0|Adreno (TM) 630|1.0.61|36.173.3586|y|y|y|
|
||||
|heisha-SKR-A0|Adreno (TM) 630|1.0.66|47.448.1532|y|y|y|
|
||||
|oneplus-6|Adreno (TM) 630|1.1.66|512.324.0|y|y|y|
|
||||
|vivo-iQOO|Adreno (TM) 640|1.1.87|512.361.0|y|y|y|
|
||||
|meitu-m8s|Mali-T880|1.0.14|500.910.1017|n|n|n|
|
||||
|huawei-p10|Mali-G71|1.0.53|151.949.2145|n|n|n|
|
||||
|huawei-mate9|Mali-G71|1.0.53|151.949.2145|n|n|n|
|
||||
|oppo-a73|Mali-G71|1.0.47|575.795.1934|n|n|n|
|
||||
|vivo-y97|Mali-G72|1.0.58|240.537.3580|n|n|n|
|
||||
|huawei-mate10|Mali-G72|1.0.66|14.0.0|y|y|y|
|
||||
|huawei-v10|Mali-G72|1.0.66|14.0.0|y|y|y|
|
||||
|huawei-vce-al00|Mali-G72|1.0.66|14.0.0|y|y|y|
|
||||
|huawei-mate20|Mali-G76|1.0.66|14.0.0|y|y|y|
|
||||
|huawei-pct-al10|Mali-G76|1.0.66|14.0.0|y|y|y|
|
57
3rdparty/ncnn/docs/developer-guide/aarch64-mix-assembly-and-intrinsic.md
vendored
Normal file
57
3rdparty/ncnn/docs/developer-guide/aarch64-mix-assembly-and-intrinsic.md
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
```c
|
||||
// v寄存器全部使用 %.4s
|
||||
// 128-bit vreg matches %.4s
|
||||
// a += b * c
|
||||
float32x4_t _a = vld1q_f32(a);
|
||||
float32x4_t _b = vld1q_f32(b);
|
||||
float32x4_t _c = vld1q_f32(c);
|
||||
asm volatile(
|
||||
"fmla %0.4s, %2.4s, %3.4s"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
```c
|
||||
// v寄存器使用低64位 %.2s
|
||||
// low 64-bit vreg matches %.2s
|
||||
// a += b * c
|
||||
float32x2_t _a = vld1_f32(a);
|
||||
float32x2_t _b = vld1_f32(b);
|
||||
float32x2_t _c = vld1_f32(c);
|
||||
asm volatile(
|
||||
"fmla %0.2s, %2.2s, %3.2s"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
```c
|
||||
// v寄存器单路使用 %.s[0] %.s[1] %.s[2] %.s[3]
|
||||
// 32-bit register matches %.s[0]
|
||||
// a += b * c[0]
|
||||
// a += b * c[1]
|
||||
// a += b * c[2]
|
||||
// a += b * c[3]
|
||||
float32x4_t _a = vld1_f32(a);
|
||||
float32x4_t _b = vld1_f32(b);
|
||||
float32x4_t _c = vld1_f32(c);
|
||||
asm volatile(
|
||||
"fmla %0.4s, %2.4s, %3.s[0]"
|
||||
"fmla %0.4s, %2.4s, %3.s[1]"
|
||||
"fmla %0.4s, %2.4s, %3.s[2]"
|
||||
"fmla %0.4s, %2.4s, %3.s[3]"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
|
||||
|
||||
qwq
|
175
3rdparty/ncnn/docs/developer-guide/add-custom-layer.zh.md
vendored
Normal file
175
3rdparty/ncnn/docs/developer-guide/add-custom-layer.zh.md
vendored
Normal file
@ -0,0 +1,175 @@
|
||||
# NCNN增加自定义层
|
||||
|
||||
## 举例
|
||||
|
||||
这里举个例子添加自定义层次 如Relu6,即 std::min(6, std::max(0, val))
|
||||
|
||||
```
|
||||
Input input 0 1 input
|
||||
Convolution conv2d 1 1 input conv2d 0=32 1=1 2=1 3=1 4=0 5=0 6=768
|
||||
Relu6 relu6 1 1 conv2d relu6
|
||||
Pooling maxpool 1 1 relu6 maxpool 0=0 1=3 2=2 3=-233 4=0
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 定义源码h文件:src/layer/relu6.h
|
||||
|
||||
```CPP
|
||||
#ifndef LAYER_RELU6_H
|
||||
#define LAYER_RELU6_H
|
||||
|
||||
#include "layer.h"
|
||||
|
||||
namespace ncnn {
|
||||
|
||||
class Relu6 : public Layer
|
||||
{
|
||||
public:
|
||||
Relu6();
|
||||
|
||||
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
|
||||
};
|
||||
|
||||
} // namespace ncnn
|
||||
|
||||
#endif // LAYER_RELU6_H
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 定义源码CPP文件:src/layer/relu6.cpp
|
||||
|
||||
```CPP
|
||||
#include "relu6.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
namespace ncnn {
|
||||
|
||||
Relu6::Relu6()
|
||||
{
|
||||
one_blob_only = true;
|
||||
support_inplace = true;
|
||||
}
|
||||
|
||||
int Relu6::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
|
||||
{
|
||||
int w = bottom_top_blob.w;
|
||||
int h = bottom_top_blob.h;
|
||||
int channels = bottom_top_blob.c;
|
||||
int size = w * h;
|
||||
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
for (int q=0; q < channels; q++)
|
||||
{
|
||||
float* ptr = bottom_top_blob.channel(q);
|
||||
|
||||
for (int i=0; i<size; i++)
|
||||
{
|
||||
ptr[i] = std::min(6, std::max(0, ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace ncnn
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 修改 src/CMakeLists.txt 注册Relu6
|
||||
|
||||
```CPP
|
||||
ncnn_add_layer(GroupNorm)
|
||||
ncnn_add_layer(LayerNorm)
|
||||
ncnn_add_layer(Relu6)
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 定义测试用例CPP文件 src/test_relu6.cpp
|
||||
|
||||
```CPP
|
||||
#include "layer/relu6.h"
|
||||
#include "testutil.h"
|
||||
|
||||
static int test_relu6(const ncnn::Mat& a)
|
||||
{
|
||||
ncnn::ParamDict pd;
|
||||
|
||||
std::vector<ncnn::Mat> weights(0);
|
||||
|
||||
int ret = test_layer<ncnn::Relu6>("Relu6", pd, weights, a);
|
||||
if (ret != 0)
|
||||
{
|
||||
fprintf(stderr, "test_relu6 failed a.dims=%d a=(%d %d %d)\n", a.dims, a.w, a.h, a.c);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_relu6_0()
|
||||
{
|
||||
return 0
|
||||
|| test_relu6(RandomMat(5, 7, 24))
|
||||
|| test_relu6(RandomMat(7, 9, 12))
|
||||
|| test_relu6(RandomMat(3, 5, 13));
|
||||
}
|
||||
|
||||
static int test_relu6_1()
|
||||
{
|
||||
return 0
|
||||
|| test_relu6(RandomMat(15, 24))
|
||||
|| test_relu6(RandomMat(17, 12))
|
||||
|| test_relu6(RandomMat(19, 15));
|
||||
}
|
||||
|
||||
static int test_relu6_2()
|
||||
{
|
||||
return 0
|
||||
|| test_relu6(RandomMat(128))
|
||||
|| test_relu6(RandomMat(124))
|
||||
|| test_relu6(RandomMat(127));
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
SRAND(7767517);
|
||||
|
||||
return 0
|
||||
|| test_relu6_0()
|
||||
|| test_relu6_1()
|
||||
|| test_relu6_2();
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 修改tests/CMakeLists.txt 注册Relu6测试用例
|
||||
|
||||
```CPP
|
||||
ncnn_add_layer_test(LSTM)
|
||||
ncnn_add_layer_test(Yolov3DetectionOutput)
|
||||
ncnn_add_layer_test(Relu6)
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 编译
|
||||
|
||||
```
|
||||
按原NCNN步骤编译
|
||||
```
|
||||
|
||||
|
||||
|
||||
## 单元测试
|
||||
|
||||
```
|
||||
./test_relu6
|
||||
```
|
||||
|
85
3rdparty/ncnn/docs/developer-guide/arm-a53-a55-dual-issue.md
vendored
Normal file
85
3rdparty/ncnn/docs/developer-guide/arm-a53-a55-dual-issue.md
vendored
Normal file
@ -0,0 +1,85 @@
|
||||
## natural assembly
|
||||
* no register dependency, no penalty
|
||||
```
|
||||
ld1 {v0.4s}, [r0], #16
|
||||
fmla v10.4s, v16.4s, v24.s[0]
|
||||
fmla v11.4s, v16.4s, v24.s[1]
|
||||
fmla v12.4s, v16.4s, v24.s[2]
|
||||
fmla v13.4s, v16.4s, v24.s[3]
|
||||
```
|
||||
|
||||
## A53
|
||||
* 128bit vector load cannot be dual issued with fmla, wait 2 cycles
|
||||
* 64bit vector load cannot be dual issued with fmla, wait 1 cycle
|
||||
* 64bit integer load can be dual issued with fmla, no penalty
|
||||
* pointer update can be dual issued with fmla, no penalty
|
||||
* 64bit vector load and 64bit vector insert can be dual issued, no penalty
|
||||
* any vector load cannot be issued on the 4th cycle of each fmla (enters the accumulator pipeline)
|
||||
|
||||
### practical guide
|
||||
* use 64bit vector load only
|
||||
* issue vector load every three fmla
|
||||
* 1 cycle to load 64bit, dual issue with the previous interleaved 64bit insert
|
||||
* load the remaining 64bit into integer register, dual issue with fmla
|
||||
* update pointer, dual issue with fmla
|
||||
* insert 64bit into vector from integer register, dual issue with the next interleaved 64bit load
|
||||
* add nop every three fmla if no load, seems to be faster
|
||||
```
|
||||
ldr d0, [r0] // 1 cycle, v0 first 64bit
|
||||
fmla
|
||||
ldr x23, [r0, #8] // 0 cycle, v0 second 64bit to temp register
|
||||
fmla
|
||||
add r0, r0, #16 // 0 cycle, update pointer
|
||||
fmla
|
||||
ldr d1, [r0] // 1 cycle, v1 first 64bit
|
||||
ins v0.d[1], x23 // 0 cycle, v0 second 64bit complete
|
||||
fmla
|
||||
ldr x23, [r0, #8] // 0 cycle, v1 second 64bit to temp register
|
||||
fmla
|
||||
add r0, r0, #16 // 0 cycle, update pointer
|
||||
fmla
|
||||
ins v1.d[1], x23 // 1 cycle, v1 second 64bit complete
|
||||
nop
|
||||
fmla
|
||||
fmla
|
||||
fmla
|
||||
nop
|
||||
nop
|
||||
fmla
|
||||
fmla
|
||||
fmla
|
||||
```
|
||||
|
||||
## A55
|
||||
* 128bit vector load cannot be dual issued with fmla, wait 2 cycles
|
||||
* 64bit vector load can be dual issued with fmla, no penalty
|
||||
* 64bit integer load can be dual issued with fmla, no penalty
|
||||
* pointer update can be dual issued with fmla, no penalty
|
||||
* 64bit vector insert can be dual issued with fmla, no penalty
|
||||
|
||||
### practical guide
|
||||
* use 64bit vector load only
|
||||
* load 64bit, dual issue with fmla
|
||||
* load the remaining 64bit into integer register, dual issue with fmla
|
||||
* update pointer, dual issue with fmla
|
||||
* insert 64bit into vector from integer register, dual issue with fmla
|
||||
* interleaved load loose register dependency
|
||||
* nop trick is not needed
|
||||
```
|
||||
ldr d0, [r0] // 0 cycle, v0 first 64bit
|
||||
fmla
|
||||
ldr x23, [r0, #8] // 0 cycle, v0 second 64bit to temp register
|
||||
fmla
|
||||
add r0, r0, #16 // 0 cycle, update pointer
|
||||
fmla
|
||||
ldr d1, [r0] // 0 cycle, v1 first 64bit
|
||||
fmla
|
||||
ins v0.d[1], x23 // 0 cycle, v0 second 64bit complete
|
||||
fmla
|
||||
ldr x23, [r0, #8] // 0 cycle, v1 second 64bit to temp register
|
||||
fmla
|
||||
add r0, r0, #16 // 0 cycle, update pointer
|
||||
fmla
|
||||
ins v1.d[1], x23 // 0 cycle, v1 second 64bit complete
|
||||
fmla
|
||||
```
|
130
3rdparty/ncnn/docs/developer-guide/armv7-mix-assembly-and-intrinsic.md
vendored
Normal file
130
3rdparty/ncnn/docs/developer-guide/armv7-mix-assembly-and-intrinsic.md
vendored
Normal file
@ -0,0 +1,130 @@
|
||||
```c
|
||||
// d寄存器全部使用 %P
|
||||
// d reg matches %P
|
||||
// a += b * c
|
||||
float32x2_t _a = vld1_f32(a);
|
||||
float32x2_t _b = vld1_f32(b);
|
||||
float32x2_t _c = vld1_f32(c);
|
||||
asm volatile(
|
||||
"vmla.f32 %P0, %P2, %P3"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
```c
|
||||
// q寄存器全部使用 %q
|
||||
// q reg matches %q
|
||||
// a += b * c
|
||||
float32x4_t _a = vld1q_f32(a);
|
||||
float32x4_t _b = vld1q_f32(b);
|
||||
float32x4_t _c = vld1q_f32(c);
|
||||
asm volatile(
|
||||
"vmla.f32 %q0, %q2, %q3"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
```c
|
||||
// d寄存器单路使用 %P[0] %P[1]
|
||||
// 32bit d reg matches %P[0]
|
||||
// a += b * c[0]
|
||||
// a += b * c[1]
|
||||
float32x2_t _a = vld1_f32(a);
|
||||
float32x2_t _b = vld1_f32(b);
|
||||
float32x2_t _c = vld1_f32(c);
|
||||
asm volatile(
|
||||
"vmla.f32 %P0, %P2, %P3[0]"
|
||||
"vmla.f32 %P0, %P2, %P3[1]"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
```c
|
||||
// q寄存器单路使用 %e[0] %e[1] %f[0] %f[1]
|
||||
// 32-bit q reg matches %e[0]
|
||||
// a += b * c[0]
|
||||
// a += b * c[1]
|
||||
// a += b * c[2]
|
||||
// a += b * c[3]
|
||||
float32x4_t _a = vld1q_f32(a);
|
||||
float32x4_t _b = vld1q_f32(b);
|
||||
float32x4_t _c = vld1q_f32(c);
|
||||
asm volatile(
|
||||
"vmla.f32 %q0, %q2, %e3[0]"
|
||||
"vmla.f32 %q0, %q2, %e3[1]"
|
||||
"vmla.f32 %q0, %q2, %f3[0]"
|
||||
"vmla.f32 %q0, %q2, %f3[1]"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
```c
|
||||
// q寄存器拆分d寄存器使用 %e %f
|
||||
// use %e %f to split q reg into two d regs
|
||||
// a += b * c[0]c[1]
|
||||
// a += b * c[2]c[3]
|
||||
float32x2_t _a = vldq_f32(a);
|
||||
float32x2_t _b = vldq_f32(b);
|
||||
float32x4_t _c = vld1q_f32(c);
|
||||
asm volatile(
|
||||
"vmla.f32 %P0, %P2, %e3"
|
||||
"vmla.f32 %P0, %P2, %f3"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
```c
|
||||
// d寄存器声明绑定
|
||||
// specify concrete d reg which want to save
|
||||
// vmla.f32 d0, d2, d4
|
||||
register float32x2_t _a asm("d0") = vld1_f32(a);
|
||||
register float32x2_t _b asm("d2") = vld1_f32(b);
|
||||
register float32x2_t _c asm("d4") = vld1_f32(c);
|
||||
|
||||
asm volatile(
|
||||
"vmla.f32 %P0, %P2, %P3"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
```c
|
||||
// q寄存器声明绑定
|
||||
// bind q reg with data
|
||||
// vmla.f32 q0, q1, q2
|
||||
register float32x4_t _a asm("q0") = vld1q_f32(a);
|
||||
register float32x4_t _b asm("q1") = vld1q_f32(b);
|
||||
register float32x4_t _c asm("q2") = vld1q_f32(c);
|
||||
|
||||
asm volatile(
|
||||
"vmla.f32 %q0, %q2, %q3"
|
||||
: "=w"(_a) // %0
|
||||
: "0"(_a),
|
||||
"w"(_b), // %2
|
||||
"w"(_c) // %3
|
||||
:
|
||||
);
|
||||
```
|
||||
|
||||
如果不是因为编译器的bug,寄存器绑定是用不着的,然而。。。
|
||||
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=41538
|
||||
|
||||
qwq
|
52
3rdparty/ncnn/docs/developer-guide/binaryop-broadcasting.md
vendored
Normal file
52
3rdparty/ncnn/docs/developer-guide/binaryop-broadcasting.md
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
### broadcasting rule
|
||||
|
||||
ncnn BinaryOp accepts blobs with different shape
|
||||
|
||||
C = BinaryOp(A, B)
|
||||
|
||||
shape notation convention is [w], [w,h], [w,h,c], [w,h,d,c]
|
||||
|
||||
|type|A|B|C|
|
||||
|---|---|---|---|
|
||||
|1|[1]|scalar|[1]|
|
||||
|2|[1]|[2]|[2]|
|
||||
|3|[1]|[2,3]|[2,3]|
|
||||
|4|[1]|[2,3,4]|[2,3,4]|
|
||||
|5|[2]|scalar|[2]|
|
||||
|6|[2]|[1]|[2]|
|
||||
|7|[2]|[2]|[2]|
|
||||
|8|[3]|[2,3]|[2,3]|
|
||||
|9|[4]|[2,3,4]|[2,3,4]|
|
||||
|10|[2,3]|scalar|[2,3]|
|
||||
|11|[2,3]|[1]|[2,3]|
|
||||
|12|[2,3]|[3]|[2,3]|
|
||||
|13|[2,3]|[2,3]|[2,3]|
|
||||
|14|[3,4]|[2,3,4]|[2,3,4]|
|
||||
|15|[2,3,4]|scalar|[2,3,4]|
|
||||
|16|[2,3,4]|[1]|[2,3,4]|
|
||||
|17|[2,3,4]|[4]|[2,3,4]|
|
||||
|18|[2,3,4]|[3,4]|[2,3,4]|
|
||||
|19|[2,3,4]|[2,3,4]|[2,3,4]|
|
||||
|20|[1]|[2,3,4,5]|[2,3,4,5]|
|
||||
|21|[5]|[2,3,4,5]|[2,3,4,5]|
|
||||
|22|[4,5]|[2,3,4,5]|[2,3,4,5]|
|
||||
|23|[3,4,5]|[2,3,4,5]|[2,3,4,5]|
|
||||
|24|[2,3,4,5]|scalar|[2,3,4,5]|
|
||||
|25|[2,3,4,5]|[1]|[2,3,4,5]|
|
||||
|26|[2,3,4,5]|[5]|[2,3,4,5]|
|
||||
|27|[2,3,4,5]|[4,5]|[2,3,4,5]|
|
||||
|28|[2,3,4,5]|[3,4,5]|[2,3,4,5]|
|
||||
|29|[2,3,4,5]|[2,3,4,5]|[2,3,4,5]|
|
||||
|
||||
some special broadcasting rule exists for model compatibility
|
||||
|
||||
|special type|A|B|C|
|
||||
|---|---|---|---|
|
||||
|1|[2,3,4]|[1,1,4]|[2,3,4]|
|
||||
|2|[2,3,4]|[2,3,1]|[2,3,4]|
|
||||
|3|[1,1,4]|[2,3,4]|[2,3,4]|
|
||||
|4|[2,3,1]|[2,3,4]|[2,3,4]|
|
||||
|5|[2,3,4]|[1,3,4]|[2,3,4]|
|
||||
|6|[2,3,4]|[2,1,4]|[2,3,4]|
|
||||
|7|[1,3,4]|[2,3,4]|[2,3,4]|
|
||||
|8|[2,1,4]|[2,3,4]|[2,3,4]|
|
63
3rdparty/ncnn/docs/developer-guide/custom-allocator.md
vendored
Normal file
63
3rdparty/ncnn/docs/developer-guide/custom-allocator.md
vendored
Normal file
@ -0,0 +1,63 @@
|
||||
Mat structure is now allocator-aware via an extra allocator parameter with default zero value.
|
||||
|
||||
The good-old ncnn::fastMalloc()/ncnn::fastFree() will be used for a null allocator.
|
||||
|
||||
You could pass a custom allocator to delegate all memory allocation and deallocation.
|
||||
|
||||
```cpp
|
||||
class Allocator
|
||||
{
|
||||
public:
|
||||
virtual void* fastMalloc(size_t size) = 0;
|
||||
virtual void fastFree(void* ptr) = 0;
|
||||
};
|
||||
```
|
||||
|
||||
ncnn has already implemented two simple pooled Allocator class, with mutex lock or without it.
|
||||
|
||||
```cpp
|
||||
ncnn::PoolAllocator locked_mempool;
|
||||
ncnn::UnlockedPoolAllocator unlocked_mempool;
|
||||
```
|
||||
|
||||
the two allocator types in ncnn
|
||||
|
||||
* blob allocator
|
||||
|
||||
used to allocate memory for all named blobs, which you could retrieve by Extractor::extract()
|
||||
* workspace allocator
|
||||
|
||||
used to allocate memory for internal temporary use in layer implementation, such as the temp blob after padding in convolution
|
||||
|
||||
by default, all Extractor instance use the two allocator in the default option
|
||||
You can alter them by ncnn::set_default_option()
|
||||
or you can set them per Extractor by Extractor::set_blob_allocator()/Extractor::set_workspace_allocator()
|
||||
|
||||
blob allocator is guaranteed to be called in-order in layer implementation during each Extractor lifecycle
|
||||
while workspace allocator may be called synchronously
|
||||
|
||||
the practical usage
|
||||
|
||||
* one network, one-by-one inference
|
||||
|
||||
shared unlocked blob allocator for all Extractor
|
||||
|
||||
shared locked workspace allocator for all Extractor
|
||||
|
||||
* one network, concurrent inference
|
||||
|
||||
shared unlocked blob allocator for all Extractor in each thread
|
||||
|
||||
shared locked workspace allocator for all Extractor among all threads
|
||||
|
||||
* concurrent multiple networks, one-by-one inference for each network
|
||||
|
||||
shared unlocked blob allocator for all Extractor of each network
|
||||
|
||||
shared locked workspace allocator for all Extractor among all networks (for saving memory)
|
||||
|
||||
* concurrent multiple networks, concurrent inference for each network
|
||||
|
||||
shared unlocked blob allocator for all Extractor of each network in each thread
|
||||
|
||||
shared locked workspace allocator for all Extractor among all networks (for saving memory)
|
119
3rdparty/ncnn/docs/developer-guide/element-packing.md
vendored
Normal file
119
3rdparty/ncnn/docs/developer-guide/element-packing.md
vendored
Normal file
@ -0,0 +1,119 @@
|
||||
### what is packing and why
|
||||
|
||||
packing is the form of storing multiple short-sized values as one long-sized value.
|
||||
|
||||
element packing is well mapped with the underlying simd register, which usually use one very wide register to store different types of values.
|
||||
|
||||
|C|elemsize|elempack|
|
||||
|---|---|---|
|
||||
|double|8|1|
|
||||
|float|4|1|
|
||||
|int|4|1|
|
||||
|short|2|1|
|
||||
|signed char|1|1|
|
||||
|
||||
|arm neon|elemsize|elempack|
|
||||
|---|---|---|
|
||||
|float64x2_t|16|2|
|
||||
|float32x4_t|16|4|
|
||||
|int32x4_t|16|4|
|
||||
|float16x4_t|8|4|
|
||||
|int8x8_t|8|8|
|
||||
|
||||
Though the real count of values doubles when elempack is two, the wide-sized value is still treated as one value in the view of Mat structure. For example, we want to store 40 float values in Mat object, if elempack 1 is used, Mat width is then 40, while 10 if elempack 4 is used.
|
||||
|
||||
|dims|w|h|c|cstep|elemsize|elempack|
|
||||
|---|---|---|---|---|---|---|
|
||||
|1|40|1|1|40|4|1|
|
||||
|1|10|1|1|10|16|4|
|
||||
|
||||
### packing style convention
|
||||
|
||||
In practice, elempack 1, 4, 8 are the most common cases. It is possible to use any other packing style in theory.
|
||||
|
||||
The following table show the packing axis used in ncnn for different dimension.
|
||||
|
||||
|dims|packing axis|shape before packing|shape after packing|
|
||||
|---|---|---|---|
|
||||
|1|w|w|w/elempack|
|
||||
|2|h|w, h|w, h/elempack|
|
||||
|3|c|w, h, c|w, h, c/elempack|
|
||||
|
||||
If the packing axis dim is not evenly divisible by elempack, zero padding may be used.
|
||||
|
||||
```
|
||||
outw = (w + elempack - 1) / elempack;
|
||||
```
|
||||
|
||||
The following snippet shows the memory layout after elempack=4 on 3-dim Mat
|
||||
|
||||
```
|
||||
// w=2 h=3 c=4 elempack=1
|
||||
0 1
|
||||
2 3
|
||||
4 5
|
||||
|
||||
6 7
|
||||
8 9
|
||||
10 11
|
||||
|
||||
12 13
|
||||
14 15
|
||||
16 17
|
||||
|
||||
18 19
|
||||
20 21
|
||||
22 23
|
||||
|
||||
// w=2 h=3 c=1 elempack=4
|
||||
(0,6,12,18) (1,7,13,19)
|
||||
(2,8,14,20) (3,9,15,21)
|
||||
(4,10,16,22) (5,11,17,23)
|
||||
```
|
||||
|
||||
### how to convert elempack
|
||||
|
||||
There is a convenient wrapper function provided
|
||||
```
|
||||
// convert to elempack 4 if packing axis dim is evenly divisible by elempack
|
||||
// return the identity Mat otherwise
|
||||
ncnn::Mat a;
|
||||
ncnn::Mat a_packed;
|
||||
ncnn::convert_packing(a, a_packed, 4);
|
||||
if (a_packed.elempack == 4)
|
||||
{
|
||||
// check if packing is successful
|
||||
}
|
||||
|
||||
// convert to packing 1, aka unpacking, shall be always successful
|
||||
ncnn::Mat b;
|
||||
ncnn::Mat b_unpacked;
|
||||
ncnn::convert_packing(b, b_unpacked, 1);
|
||||
```
|
||||
|
||||
### handle general interleaved data
|
||||
|
||||
Here is an example of using convert packing to convert RGB interleaved data to planar
|
||||
|
||||
**NOTE:** The following code is just presented to explain what packing is and the conversion process. Do not use it in production due to its poor performance. Do use ncnn::Mat::from_pixels()
|
||||
|
||||
```cpp
|
||||
// rgb_interleaved_u8 is RGB RGB RGB ...
|
||||
// rgb_interleaved_u8.w = w;
|
||||
// rgb_interleaved_u8.h = h;
|
||||
// rgb_interleaved_u8.c = 1;
|
||||
// rgb_interleaved_u8.elemsize = 3;
|
||||
// rgb_interleaved_u8.elempack = 3;
|
||||
|
||||
ncnn::Mat rgb_interleaved_u8(w, h, 1, 3, 3);
|
||||
ncnn::Mat rgb_planar_u8;
|
||||
|
||||
ncnn::convert_packing(rgb_interleaved_u8, rgb_planar_u8, 1);
|
||||
|
||||
// rgb_planar_u8 is now RRR ... GGG ... BBB ...
|
||||
// rgb_planar_u8.w = w;
|
||||
// rgb_planar_u8.h = h;
|
||||
// rgb_planar_u8.c = 3;
|
||||
// rgb_planar_u8.elemsize = 1;
|
||||
// rgb_planar_u8.elempack = 1;
|
||||
```
|
75
3rdparty/ncnn/docs/developer-guide/how-to-be-a-contributor.zh.md
vendored
Normal file
75
3rdparty/ncnn/docs/developer-guide/how-to-be-a-contributor.zh.md
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
### 如何提交代码
|
||||
|
||||
#### 一、fork 分支
|
||||
在浏览器中打开 [ncnn](https://github.com/tencent/ncnn), `fork` 到自己的 repositories,例如
|
||||
```
|
||||
https://github.com/user/ncnn
|
||||
```
|
||||
|
||||
clone 项目到本地,添加官方 remote 并 fetch:
|
||||
```
|
||||
$ git clone https://github.com/user/ncnn && cd ncnn
|
||||
$ git remote add tencent https://github.com/tencent/ncnn
|
||||
$ git fetch tencent
|
||||
```
|
||||
对于 `git clone` 下来的项目,它现在有两个 remote,分别是 origin 和 tencent:
|
||||
|
||||
```
|
||||
$ git remote -v
|
||||
origin https://github.com/user/ncnn (fetch)
|
||||
origin https://github.com/user/ncnn (push)
|
||||
tencent https://github.com/Tencent/ncnn (fetch)
|
||||
tencent https://github.com/Tencent/ncnn (push)
|
||||
```
|
||||
origin 指向你 fork 的仓库地址;remote 即官方 repo。可以基于不同的 remote 创建和提交分支。
|
||||
|
||||
例如切换到官方 master 分支,并基于此创建自己的分支(命名尽量言简意赅。一个分支只做一件事,方便 review 和 revert)
|
||||
```
|
||||
$ git checkout tencent/master
|
||||
$ git checkout -b add-conv-int8
|
||||
```
|
||||
|
||||
或创建分支时指定基于官方 master 分支:
|
||||
```
|
||||
$ git checkout -b fix-typo-in-document tencent/master
|
||||
```
|
||||
|
||||
> `git fetch` 是从远程获取最新代码到本地。如果是第二次 pr ncnn,直接从 `git fetch tencent` 开始即可,不需要 `git remote add tencent`,也不需要修改 `github.com/user/ncnn`。
|
||||
|
||||
#### 二、代码习惯
|
||||
为了增加沟通效率,reviewer 一般要求 contributor 遵从以下规则
|
||||
|
||||
* `if-else`和花括号`{`中间需要换行
|
||||
* 不能随意增删空行
|
||||
* tab 替换为 4 个空格
|
||||
* 为了保证平台兼容性,目前不使用`c++11`,`src`目录下尽量避免使用`template`
|
||||
* 若是新增功能或平台,`test`目录需有对应测试用例
|
||||
* 文档放到`doc`对应目录下,中文用`.zh.md`做后缀;英文直接用`.md`后缀
|
||||
|
||||
开发完成后提交到自己的 repository
|
||||
```
|
||||
$ git commit -a
|
||||
$ git push origin add-conv-int8
|
||||
```
|
||||
推荐使用 [`commitizen`](https://pypi.org/project/commitizen/) 或 [`gitlint`](https://jorisroovers.com/gitlint/) 等工具格式化 commit message,方便事后检索海量提交记录
|
||||
|
||||
#### 三、代码提交
|
||||
浏览器中打开 [ncnn pulls](https://github.com/Tencent/ncnn/pulls) ,此时应有此分支 pr 提示,点击 `Compare & pull request`
|
||||
|
||||
* 标题**必须**是英文。未完成的分支应以 `WIP:` 开头,例如 `WIP: add conv int8`
|
||||
* 正文宜包含以下内容,中英不限
|
||||
* 内容概述和实现方式
|
||||
* 功能或性能测试
|
||||
* 测试结果
|
||||
|
||||
CI 已集成了自动格式化,restyled-io 会在 pr 的同时生成 `Restyled add conv int8`,需要 merge 自动 restyled 的分支,例如
|
||||
```
|
||||
$ git fetch tencent
|
||||
$ git checkout add-conv-int8
|
||||
$ git merge tencent/restyled/pull-2078
|
||||
$ git push origin add-conv-int8
|
||||
```
|
||||
回到浏览器签署 CLA,所有 CI 测试通过后通知 reviewer merge 此分支。
|
||||
|
||||
#### 四、彩蛋
|
||||
留下个人 qq 号会触发隐藏事件。
|
323
3rdparty/ncnn/docs/developer-guide/how-to-implement-custom-layer-step-by-step.md
vendored
Normal file
323
3rdparty/ncnn/docs/developer-guide/how-to-implement-custom-layer-step-by-step.md
vendored
Normal file
@ -0,0 +1,323 @@
|
||||
# step1 create a new empty class
|
||||
```cpp
|
||||
// mylayer.h
|
||||
#include "layer.h"
|
||||
using namespace ncnn;
|
||||
|
||||
// a new layer type called MyLayer
|
||||
class MyLayer : public Layer
|
||||
{
|
||||
};
|
||||
|
||||
// mylayer.cpp
|
||||
#include "mylayer.h"
|
||||
DEFINE_LAYER_CREATOR(MyLayer)
|
||||
```
|
||||
|
||||
# step2 declare layer parameters and weights
|
||||
```cpp
|
||||
// mylayer.h
|
||||
#include "layer.h"
|
||||
using namespace ncnn;
|
||||
|
||||
class MyLayer : public Layer
|
||||
{
|
||||
private:
|
||||
int channels;// new code
|
||||
float gamma;// new code
|
||||
Mat weight;// new code
|
||||
};
|
||||
|
||||
// mylayer.cpp
|
||||
#include "mylayer.h"
|
||||
DEFINE_LAYER_CREATOR(MyLayer)
|
||||
```
|
||||
|
||||
# step3 implement load functions for parameters and weights
|
||||
```cpp
|
||||
// mylayer.h
|
||||
#include "layer.h"
|
||||
using namespace ncnn;
|
||||
|
||||
class MyLayer : public Layer
|
||||
{
|
||||
public:
|
||||
virtual int load_param(const ParamDict& pd);// new code
|
||||
virtual int load_model(const ModelBin& mb);// new code
|
||||
|
||||
private:
|
||||
int channels;
|
||||
float eps;
|
||||
Mat gamma_data;
|
||||
};
|
||||
|
||||
// mylayer.cpp
|
||||
#include "mylayer.h"
|
||||
DEFINE_LAYER_CREATOR(MyLayer)
|
||||
|
||||
// new routine for loading parameters
|
||||
int MyLayer::load_param(const ParamDict& pd)
|
||||
{
|
||||
// details about the relations with param file
|
||||
// https://github.com/Tencent/ncnn/wiki/param-and-model-file-structure
|
||||
//
|
||||
channels = pd.get(0, 0);// parse 0=<int value> entry, default value 0
|
||||
eps = pd.get(1, 0.001f);// parse 1=<float value> entry, default value 0.001f
|
||||
|
||||
return 0;// return zero if success
|
||||
}
|
||||
|
||||
// new routine for loading weights
|
||||
int MyLayer::load_model(const ModelBin& mb)
|
||||
{
|
||||
// details about the relations with model file
|
||||
// https://github.com/Tencent/ncnn/wiki/param-and-model-file-structure
|
||||
//
|
||||
// read weights with length of channels * sizeof(float)
|
||||
// the second argument explains as follows
|
||||
// 0 judge the value type automatically, you may get float or float16 or uint8 etc
|
||||
// depends on the model storage and the supporting target hardware
|
||||
// 1 read float values anyway
|
||||
// 2 read float16 values anyway
|
||||
// 3 read uint8 values anyway
|
||||
gamma_data = mb.load(channels, 1);
|
||||
if (gamma_data.empty())
|
||||
return -100;// return non-zero on error, -100 indicates out-of-memory
|
||||
|
||||
return 0;// return zero if success
|
||||
}
|
||||
```
|
||||
|
||||
# step4 determine forward behavior
|
||||
```cpp
|
||||
// mylayer.h
|
||||
#include "layer.h"
|
||||
using namespace ncnn;
|
||||
|
||||
class MyLayer : public Layer
|
||||
{
|
||||
public:
|
||||
MyLayer();// new code
|
||||
virtual int load_param(const ParamDict& pd);
|
||||
virtual int load_model(const ModelBin& mb);
|
||||
|
||||
private:
|
||||
int channels;
|
||||
float eps;
|
||||
Mat gamma_data;
|
||||
};
|
||||
|
||||
// mylayer.cpp
|
||||
#include "mylayer.h"
|
||||
DEFINE_LAYER_CREATOR(MyLayer)
|
||||
|
||||
// new routine for setting forward behavior
|
||||
MyLayer::MyLayer()
|
||||
{
|
||||
// one input and one output
|
||||
// typical one_blob_only type: Convolution, Pooling, ReLU, Softmax ...
|
||||
// typical non-one_blob_only type: Eltwise, Split, Concat, Slice ...
|
||||
one_blob_only = true;
|
||||
|
||||
// do not change the blob size, modify data in-place
|
||||
// typical support_inplace type: ReLU, Sigmoid ...
|
||||
// typical non-support_inplace type: Convolution, Pooling ...
|
||||
support_inplace = true;
|
||||
}
|
||||
|
||||
int MyLayer::load_param(const ParamDict& pd)
|
||||
{
|
||||
channels = pd.get(0, 0);
|
||||
eps = pd.get(1, 0.001f);
|
||||
|
||||
// you could alter the behavior based on loaded parameter
|
||||
// if (eps == 0.001f)
|
||||
// {
|
||||
// one_blob_only = false;
|
||||
// support_inplace = false;
|
||||
// }
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int MyLayer::load_model(const ModelBin& mb)
|
||||
{
|
||||
gamma_data = mb.load(channels, 1);
|
||||
if (gamma_data.empty())
|
||||
return -100;
|
||||
|
||||
// you could alter the behavior based on loaded weight
|
||||
// if (gamma_data[0] == 0.f)
|
||||
// {
|
||||
// one_blob_only = false;
|
||||
// support_inplace = false;
|
||||
// }
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
# step5 choose proper interface based on forward behavior
|
||||
```cpp
|
||||
// The base class Layer defines four interfaces for each forward behavior combination
|
||||
|
||||
// 1
|
||||
virtual int forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const;
|
||||
|
||||
// 2
|
||||
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
|
||||
|
||||
// 3
|
||||
virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt) const;
|
||||
|
||||
// 4
|
||||
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
|
||||
```
|
||||
**must** = layer must implement this function
|
||||
|
||||
**optional** = layer may implement this function for optimal performance
|
||||
|
||||
sometimes the graph inference path cannot call forward_inplace directly due to data sharing, in this situation the non-inplace forward routine will be used, which deep-copy the input blob and call inplace forward on it if the optional routine is not implemented. Thus, you could avoid this deep-copy by process input to output on-the-fly.
|
||||
|
||||
|one_blob_only|support_inplace|1|2|3|4|
|
||||
|---|---|---|---|---|---|
|
||||
|false|false|must| | | |
|
||||
|false|true|optional| |must| |
|
||||
|true|false| |must| | |
|
||||
|true|true| |optional| |must|
|
||||
|
||||
# step6 implement forward function
|
||||
```cpp
|
||||
// mylayer.h
|
||||
#include "layer.h"
|
||||
using namespace ncnn;
|
||||
|
||||
class MyLayer : public Layer
|
||||
{
|
||||
public:
|
||||
MyLayer();
|
||||
virtual int load_param(const ParamDict& pd);
|
||||
virtual int load_model(const ModelBin& mb);
|
||||
|
||||
virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;// new code, optional
|
||||
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;// new code
|
||||
|
||||
private:
|
||||
int channels;
|
||||
float eps;
|
||||
Mat gamma_data;
|
||||
};
|
||||
|
||||
// mylayer.cpp
|
||||
#include "mylayer.h"
|
||||
DEFINE_LAYER_CREATOR(MyLayer)
|
||||
|
||||
MyLayer::MyLayer()
|
||||
{
|
||||
one_blob_only = true;
|
||||
support_inplace = true;
|
||||
}
|
||||
|
||||
int MyLayer::load_param(const ParamDict& pd)
|
||||
{
|
||||
channels = pd.get(0, 0);
|
||||
eps = pd.get(1, 0.001f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int MyLayer::load_model(const ModelBin& mb)
|
||||
{
|
||||
gamma_data = mb.load(channels, 1);
|
||||
if (gamma_data.empty())
|
||||
return -100;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// optional new routine for layer forward function, non-inplace version
|
||||
int MyLayer::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
|
||||
{
|
||||
// check input dims, return non-zero on error
|
||||
if (bottom_blob.c != channels)
|
||||
return -1;
|
||||
|
||||
// x = (x + eps) * gamma_per_channel
|
||||
|
||||
int w = bottom_blob.w;
|
||||
int h = bottom_blob.h;
|
||||
size_t elemsize = bottom_blob.elemsize;
|
||||
int size = w * h;
|
||||
|
||||
top_blob.create(w, h, channels, elemsize, opt.blob_allocator);
|
||||
if (top_blob.empty())
|
||||
return -100;// return non-zero on error, -100 indicates out-of-memory
|
||||
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
for (int q=0; q<channels; q++)
|
||||
{
|
||||
const float* ptr = bottom_blob.channel(q);
|
||||
float* outptr = top_blob.channel(q);
|
||||
const float gamma = gamma_data[q];
|
||||
|
||||
for (int i=0; i<size; i++)
|
||||
{
|
||||
outptr[i] = (ptr[i] + eps) * gamma ;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// new routine for layer forward function
|
||||
int MyLayer::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
|
||||
{
|
||||
// check input dims, return non-zero on error
|
||||
if (bottom_top_blob.c != channels)
|
||||
return -1;
|
||||
|
||||
// x = (x + eps) * gamma_per_channel
|
||||
|
||||
int w = bottom_top_blob.w;
|
||||
int h = bottom_top_blob.h;
|
||||
int size = w * h;
|
||||
|
||||
#pragma omp parallel for num_threads(opt.num_threads)
|
||||
for (int q=0; q<channels; q++)
|
||||
{
|
||||
float* ptr = bottom_top_blob.channel(q);
|
||||
const float gamma = gamma_data[q];
|
||||
|
||||
for (int i=0; i<size; i++)
|
||||
{
|
||||
ptr[i] = (ptr[i] + eps) * gamma ;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
# step7 integrate with ncnn library
|
||||
you may probably need to modify caffe2ncnn or mxnet2ncnn etc. to write your layer specific parameters and weights into ncnn param and model file
|
||||
|
||||
the param and model file structure [param-and-model-file-structure](param-and-model-file-structure)
|
||||
|
||||
```
|
||||
// example param file content
|
||||
Input input 0 1 input
|
||||
Convolution conv2d 1 1 input conv2d 0=32 1=1 2=1 3=1 4=0 5=0 6=768
|
||||
MyLayer mylayer 1 1 conv2d mylayer0
|
||||
Pooling maxpool 1 1 mylayer0 maxpool 0=0 1=3 2=2 3=-233 4=0
|
||||
```
|
||||
|
||||
```cpp
|
||||
ncnn::Net net;
|
||||
|
||||
// register custom layer before load param and model
|
||||
// the layer creator function signature is always XYZ_layer_creator, which defined in DEFINE_LAYER_CREATOR macro
|
||||
net.register_custom_layer("MyLayer", MyLayer_layer_creator);
|
||||
|
||||
net.load_param("model.param");
|
||||
net.load_model("model.bin");
|
||||
```
|
38
3rdparty/ncnn/docs/developer-guide/how-to-write-a-neon-optimized-op-kernel.md
vendored
Normal file
38
3rdparty/ncnn/docs/developer-guide/how-to-write-a-neon-optimized-op-kernel.md
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
# benchmark
|
||||
op
|
||||
|
||||
# naive C with openmp
|
||||
for for for
|
||||
|
||||
# unroll, first try
|
||||
h
|
||||
|
||||
# register allocation
|
||||
kernels
|
||||
|
||||
# unroll, second try
|
||||
simd
|
||||
|
||||
# neon intrinsics
|
||||
optional
|
||||
|
||||
# naive neon assembly with pld
|
||||
asm
|
||||
|
||||
# pipeline optimize, first try
|
||||
more register load mla
|
||||
|
||||
# pipeline optimize, second try
|
||||
interleave load mla
|
||||
|
||||
# pipeline optimize, third try
|
||||
loop tail
|
||||
|
||||
# usual practice, load/save
|
||||
233
|
||||
|
||||
# usual practice, unroll
|
||||
233
|
||||
|
||||
# usual practice, save register
|
||||
233
|
311
3rdparty/ncnn/docs/developer-guide/low-level-operation-api.md
vendored
Normal file
311
3rdparty/ncnn/docs/developer-guide/low-level-operation-api.md
vendored
Normal file
@ -0,0 +1,311 @@
|
||||
# implement elementwise addition with/without broadcast using BinaryOp operation
|
||||
|
||||
* input must be fp32 storage without packing
|
||||
* output is expected to be fp32 storage without packing
|
||||
|
||||
```cpp
|
||||
void binary_add(const ncnn::Mat& a, const ncnn::Mat& b, ncnn::Mat& c)
|
||||
{
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 2;
|
||||
opt.use_fp16_storage = false;
|
||||
opt.use_packing_layout = false;
|
||||
|
||||
ncnn::Layer* op = ncnn::create_layer("BinaryOp");
|
||||
|
||||
// set param
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, 0);// op_type
|
||||
|
||||
op->load_param(pd);
|
||||
|
||||
op->create_pipeline(opt);
|
||||
|
||||
// forward
|
||||
std::vector<ncnn::Mat> bottoms(2);
|
||||
bottoms[0] = a;
|
||||
bottoms[1] = b;
|
||||
|
||||
std::vector<ncnn::Mat> tops(1);
|
||||
op->forward(bottoms, tops, opt);
|
||||
|
||||
c = tops[0];
|
||||
|
||||
op->destroy_pipeline(opt);
|
||||
|
||||
delete op;
|
||||
}
|
||||
```
|
||||
|
||||
# implement 3x3 box blur on three channel image using ConvolutionDepthWise operation
|
||||
|
||||
* input must be fp32 storage without packing
|
||||
* output is expected to be fp32 storage without packing
|
||||
|
||||
```cpp
|
||||
void convolution_3x3_boxblur_RGB(const ncnn::Mat& rgb, ncnn::Mat& out)
|
||||
{
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 2;
|
||||
opt.use_fp16_storage = false;
|
||||
opt.use_packing_layout = false;
|
||||
|
||||
ncnn::Layer* op = ncnn::create_layer("ConvolutionDepthWise");
|
||||
|
||||
// set param
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, 3);// num_output
|
||||
pd.set(1, 3);// kernel_w
|
||||
pd.set(5, 0);// bias_term
|
||||
pd.set(6, 3*3*3);// weight_data_size
|
||||
pd.set(7, 3);// group
|
||||
|
||||
op->load_param(pd);
|
||||
|
||||
// set weights
|
||||
ncnn::Mat weights[1];
|
||||
weights[0].create(3*3*3);// weight_data
|
||||
|
||||
for (int i=0; i<3*3*3; i++)
|
||||
{
|
||||
weights[0][i] = 1.f / 9;
|
||||
}
|
||||
|
||||
op->load_model(ncnn::ModelBinFromMatArray(weights));
|
||||
|
||||
op->create_pipeline(opt);
|
||||
|
||||
// forward
|
||||
op->forward(rgb, out, opt);
|
||||
|
||||
op->destroy_pipeline(opt);
|
||||
|
||||
delete op;
|
||||
}
|
||||
```
|
||||
# transpose Mat, chw to cwh
|
||||
|
||||
* input must be fp32 storage with/without packing
|
||||
* output is expected to be fp32 storage packed
|
||||
|
||||
```cpp
|
||||
void transpose(const ncnn::Mat& in, ncnn::Mat& out)
|
||||
{
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 2;
|
||||
opt.use_fp16_storage = false;
|
||||
opt.use_packing_layout = true;
|
||||
|
||||
ncnn::Layer* op = ncnn::create_layer("Permute");
|
||||
|
||||
// set param
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, 1);// order_type
|
||||
|
||||
op->load_param(pd);
|
||||
|
||||
op->create_pipeline(opt);
|
||||
|
||||
ncnn::Mat in_packed = in;
|
||||
{
|
||||
// resolve dst_elempack
|
||||
int dims = in.dims;
|
||||
int elemcount = 0;
|
||||
if (dims == 1) elemcount = in.elempack * in.w;
|
||||
if (dims == 2) elemcount = in.elempack * in.h;
|
||||
if (dims == 3) elemcount = in.elempack * in.c;
|
||||
|
||||
int dst_elempack = 1;
|
||||
if (op->support_packing)
|
||||
{
|
||||
if (elemcount % 8 == 0 && (ncnn::cpu_support_x86_avx2() || ncnn::cpu_support_x86_avx()))
|
||||
dst_elempack = 8;
|
||||
else if (elemcount % 4 == 0)
|
||||
dst_elempack = 4;
|
||||
}
|
||||
|
||||
if (in.elempack != dst_elempack)
|
||||
{
|
||||
convert_packing(in, in_packed, dst_elempack, opt);
|
||||
}
|
||||
}
|
||||
|
||||
// forward
|
||||
op->forward(in_packed, out, opt);
|
||||
|
||||
op->destroy_pipeline(opt);
|
||||
|
||||
delete op;
|
||||
}
|
||||
```
|
||||
# apply instance normalization
|
||||
// x = (x - mean) / sqrt(var)
|
||||
|
||||
* input can be fp32/fp16 storage with/without packing
|
||||
* output is expected to be fp16 storage packed when supported, or fp32 storage packed otherwise
|
||||
|
||||
```cpp
|
||||
void normalize(const ncnn::Mat& in, ncnn::Mat& out)
|
||||
{
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 2;
|
||||
opt.use_fp16_storage = true;
|
||||
opt.use_packing_layout = true;
|
||||
|
||||
ncnn::Layer* op = ncnn::create_layer("InstanceNorm");
|
||||
|
||||
// set param
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, in.c);// channels
|
||||
pd.set(1, 0.f);// eps
|
||||
|
||||
op->load_param(pd);
|
||||
|
||||
// set weights
|
||||
ncnn::Mat weights[2];
|
||||
weights[0].create(in.c);// gamma_data
|
||||
weights[1].create(in.c);// beta_data
|
||||
|
||||
weights[0].fill(1.f);
|
||||
weights[1].fill(0.f);
|
||||
|
||||
op->load_model(ncnn::ModelBinFromMatArray(weights));
|
||||
|
||||
op->create_pipeline(opt);
|
||||
|
||||
ncnn::Mat in_fp16 = in;
|
||||
if (in.elembits() == 32 && op->support_fp16_storage)
|
||||
{
|
||||
cast_float32_to_float16(in, in_fp16, opt);
|
||||
}
|
||||
if (in.elembits() == 16 && !op->support_fp16_storage)
|
||||
{
|
||||
cast_float16_to_float32(in, in_fp16, opt);
|
||||
}
|
||||
|
||||
ncnn::Mat in_fp16_packed = in_fp16;
|
||||
{
|
||||
// resolve dst_elempack
|
||||
int dims = in_fp16.dims;
|
||||
int elemcount = 0;
|
||||
if (dims == 1) elemcount = in_fp16.elempack * in_fp16.w;
|
||||
if (dims == 2) elemcount = in_fp16.elempack * in_fp16.h;
|
||||
if (dims == 3) elemcount = in_fp16.elempack * in_fp16.c;
|
||||
|
||||
int dst_elempack = 1;
|
||||
if (op->support_packing)
|
||||
{
|
||||
if (elemcount % 8 == 0 && (ncnn::cpu_support_x86_avx2() || ncnn::cpu_support_x86_avx()))
|
||||
dst_elempack = 8;
|
||||
else if (elemcount % 4 == 0)
|
||||
dst_elempack = 4;
|
||||
}
|
||||
|
||||
if (in_fp16.elempack != dst_elempack)
|
||||
{
|
||||
convert_packing(in_fp16, in_fp16_packed, dst_elempack, opt);
|
||||
}
|
||||
}
|
||||
|
||||
// forward
|
||||
op->forward(in_fp16_packed, out, opt);
|
||||
|
||||
op->destroy_pipeline(opt);
|
||||
|
||||
delete op;
|
||||
}
|
||||
```
|
||||
|
||||
# cpu -> gpu -> forward -> gpu -> cpu
|
||||
|
||||
```cpp
|
||||
ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
|
||||
|
||||
ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
|
||||
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
|
||||
|
||||
ncnn::VkWeightAllocator* weight_vkallocator = new ncnn::VkWeightAllocator(vkdev);
|
||||
ncnn::VkWeightStagingAllocator* weight_staging_vkallocator = new ncnn::VkWeightStagingAllocator(vkdev);
|
||||
|
||||
// create layer
|
||||
ncnn::Layer* convolution = ncnn::create_layer("Convolution");
|
||||
convolution->vkdev = vkdev;
|
||||
|
||||
// set option
|
||||
ncnn::Option opt;
|
||||
opt.num_threads = 4;
|
||||
opt.use_vulkan_compute = true;
|
||||
opt.blob_vkallocator = blob_vkallocator;
|
||||
opt.workspace_vkallocator = blob_vkallocator;
|
||||
opt.staging_vkallocator = staging_vkallocator;
|
||||
|
||||
// load param
|
||||
{
|
||||
ncnn::ParamDict pd;
|
||||
pd.set(0, outch);
|
||||
pd.set(1, ksize);
|
||||
pd.set(6, outch*inch*ksize*ksize);
|
||||
pd.use_vulkan_compute = 1;
|
||||
|
||||
convolution->load_param(pd);
|
||||
}
|
||||
|
||||
// load model
|
||||
{
|
||||
ncnn::Mat weights[2];
|
||||
weights[0] = random_mat(outch*inch*ksize*ksize);
|
||||
weights[1] = random_mat(outch);
|
||||
|
||||
ncnn::ModelBinFromMatArray mb(weights);
|
||||
convolution->load_model(mb);
|
||||
}
|
||||
|
||||
// create pipeline
|
||||
convolution->create_pipeline(opt);
|
||||
|
||||
// upload model
|
||||
{
|
||||
ncnn::VkTransfer cmd(vkdev);
|
||||
|
||||
ncnn::Option opt_upload = opt;
|
||||
opt_upload.blob_vkallocator = weight_vkallocator;
|
||||
opt_upload.workspace_vkallocator = weight_vkallocator;
|
||||
opt_upload.staging_vkallocator = weight_staging_vkallocator;
|
||||
|
||||
convolution->upload_model(cmd, opt_upload);
|
||||
|
||||
cmd.submit_and_wait();
|
||||
}
|
||||
|
||||
ncnn::Mat bottom = random_mat(w, h, inch);
|
||||
|
||||
ncnn::Mat top;
|
||||
|
||||
// forward
|
||||
{
|
||||
ncnn::VkCompute cmd(vkdev);
|
||||
|
||||
ncnn::VkMat bottom_gpu;
|
||||
cmd.record_upload(bottom, bottom_gpu, opt);
|
||||
|
||||
ncnn::VkMat top_gpu;
|
||||
convolution->forward(bottom_gpu, top_gpu, cmd, opt);
|
||||
|
||||
cmd.record_download(top_gpu, top, opt);
|
||||
|
||||
cmd.submit_and_wait();
|
||||
}
|
||||
|
||||
convolution->destroy_pipeline(opt);
|
||||
|
||||
delete convolution;
|
||||
|
||||
vkdev->reclaim_blob_allocator(blob_vkallocator);
|
||||
vkdev->reclaim_staging_allocator(staging_vkallocator);
|
||||
|
||||
weight_vkallocator->clear();
|
||||
weight_staging_vkallocator->clear();
|
||||
delete weight_vkallocator;
|
||||
delete weight_staging_vkallocator;
|
||||
```
|
||||
|
46
3rdparty/ncnn/docs/developer-guide/ncnn-tips-and-tricks.zh.md
vendored
Normal file
46
3rdparty/ncnn/docs/developer-guide/ncnn-tips-and-tricks.zh.md
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
### blob内存是隐含共享的
|
||||
|
||||
ncnn的blob最初直接使用opencv的cv::Mat,后发现blob最多只支持三维,因此实现了类似的Mat
|
||||
Mat的data每个通道内存16字节对齐,并且有原子的引用计数,a=b不复制数据,超级快
|
||||
Mat支持直接引用外部的内存块,不复制数据,加快模型加载和输入输出
|
||||
|
||||
举个例子:split layer 将一个blob复制成n个,ncnn中实现为单纯的增加引用计数,没有任何数据复制
|
||||
|
||||
### 只运算一部分并保留中间结果
|
||||
|
||||
ncnn的net在解决分支依赖时是自上而下深度优先的,因此当网络有多个分支时,运算只会在需要结果的那个分支中进行,节约时间
|
||||
当多个分支有重合部分时,运算其中一个分支后会自动保留其余分支所需的中间结果,隐含共享,以便运算其余分支时利用
|
||||
|
||||
举个例子:某网络结构为 A -> B -> C1 + C2,向ncnn索要C1结果时,运算过程是 A -> B -> C1,同时B结果引用计数加1自动保留,后面还需要C2结果时,只运算C2就足够了
|
||||
|
||||
### 开启轻模式省内存
|
||||
|
||||
每个layer都会产生blob,除了最后的结果和多分支中间结果,大部分blob都不值得保留,开启轻模式可以在运算后自动回收,省下内存
|
||||
|
||||
举个例子:某网络结构为 A -> B -> C,在轻模式下,向ncnn索要C结果时,A结果会在运算B时自动回收,而B结果会在运算C时自动回收,最后只保留C结果,后面再需要C结果会直接获得,满足绝大部分深度网络的使用方式
|
||||
|
||||
### 网络和运算是分开的
|
||||
|
||||
ncnn的net是网络模型,实际使用的是extractor,也就是同个net可以有很多个运算实例,而且运算实例互不影响,中间结果保留在extractor内部,在多线程使用时共用网络的结构和参数数据,初始化网络模型和参数只需要一遍
|
||||
|
||||
举个例子:全局静态的net实例,初始化一次后,就能不停地生成extractor使用
|
||||
|
||||
### openmp虽快但未必合适
|
||||
|
||||
ncnn中几乎所有运算都能用上openmp多线程加速,而且性能很赞
|
||||
不过系统有时候会突然慢一下,比如手机太热自动降频,界面操作等等,ncnn耗时也会偶尔抖动变长,在计算耗时稳定性比较重要的时候建议关闭openmp,或者设置下extractor线程数
|
||||
|
||||
举个例子:手机自拍时,用ncnn进行人脸实时定位,如果耗时突然涨一下就会感觉到掉帧,而稳定的帧率体验更好
|
||||
|
||||
### NCNN_STDIO/NCNN_STRING禁用模型文件
|
||||
|
||||
ncnn支持加载自有的模型文件和模型内存,NCNN_STDIO控制是否需要支持加载模型文件,设成0能禁用这部分代码,从而减小库的体积,NCNN_STRING设成0能清除大部分可见的字符串和解析过程
|
||||
模型内存加载时的参数数据是直接引用的,速度更快,通常在手机上使用这种方式
|
||||
|
||||
### 削减 ncnn 内置的层实现
|
||||
|
||||
cmake的时候,加参数 -DWITH_LAYER_xxx=OFF 就可以完全不编译对应的内置层,这样可以进一步减小库的体积
|
||||
|
||||
### 关于 ARM big.LITTLE 调度
|
||||
|
||||
调用set_cpu_powersave可以把ncnn运算线程控制在特定的cpu核心上,大核心速度快耗电多,小核心速度慢点但省电,大小一起用手机热得快
|
194
3rdparty/ncnn/docs/developer-guide/new-model-load-api.md
vendored
Normal file
194
3rdparty/ncnn/docs/developer-guide/new-model-load-api.md
vendored
Normal file
@ -0,0 +1,194 @@
|
||||
## current model load api
|
||||
### Cons
|
||||
#### long and awful code
|
||||
#### two functions
|
||||
#### deal float32 float16 quantized-u8
|
||||
#### deal alignment size
|
||||
```cpp
|
||||
#if NCNN_STDIO
|
||||
int Convolution::load_model(FILE* binfp)
|
||||
{
|
||||
int nread;
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned char f0;
|
||||
unsigned char f1;
|
||||
unsigned char f2;
|
||||
unsigned char f3;
|
||||
};
|
||||
unsigned int tag;
|
||||
} flag_struct;
|
||||
|
||||
nread = fread(&flag_struct, sizeof(flag_struct), 1, binfp);
|
||||
if (nread != 1)
|
||||
{
|
||||
fprintf(stderr, "Convolution read flag_struct failed %d\n", nread);
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned int flag = flag_struct.f0 + flag_struct.f1 + flag_struct.f2 + flag_struct.f3;
|
||||
|
||||
weight_data.create(weight_data_size);
|
||||
if (weight_data.empty())
|
||||
return -100;
|
||||
|
||||
if (flag_struct.tag == 0x01306B47)
|
||||
{
|
||||
// half-precision weight data
|
||||
int align_weight_data_size = alignSize(weight_data_size * sizeof(unsigned short), 4);
|
||||
std::vector<unsigned short> float16_weights;
|
||||
float16_weights.resize(align_weight_data_size);
|
||||
nread = fread(float16_weights.data(), align_weight_data_size, 1, binfp);
|
||||
if (nread != 1)
|
||||
{
|
||||
fprintf(stderr, "Convolution read float16_weights failed %d\n", nread);
|
||||
return -1;
|
||||
}
|
||||
|
||||
weight_data = Mat::from_float16(float16_weights.data(), weight_data_size);
|
||||
if (weight_data.empty())
|
||||
return -100;
|
||||
}
|
||||
else if (flag != 0)
|
||||
{
|
||||
// quantized weight data
|
||||
float quantization_value[256];
|
||||
nread = fread(quantization_value, 256 * sizeof(float), 1, binfp);
|
||||
if (nread != 1)
|
||||
{
|
||||
fprintf(stderr, "Convolution read quantization_value failed %d\n", nread);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int align_weight_data_size = alignSize(weight_data_size * sizeof(unsigned char), 4);
|
||||
std::vector<unsigned char> index_array;
|
||||
index_array.resize(align_weight_data_size);
|
||||
nread = fread(index_array.data(), align_weight_data_size, 1, binfp);
|
||||
if (nread != 1)
|
||||
{
|
||||
fprintf(stderr, "Convolution read index_array failed %d\n", nread);
|
||||
return -1;
|
||||
}
|
||||
|
||||
float* weight_data_ptr = weight_data;
|
||||
for (int i = 0; i < weight_data_size; i++)
|
||||
{
|
||||
weight_data_ptr[i] = quantization_value[ index_array[i] ];
|
||||
}
|
||||
}
|
||||
else if (flag_struct.f0 == 0)
|
||||
{
|
||||
// raw weight data
|
||||
nread = fread(weight_data, weight_data_size * sizeof(float), 1, binfp);
|
||||
if (nread != 1)
|
||||
{
|
||||
fprintf(stderr, "Convolution read weight_data failed %d\n", nread);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (bias_term)
|
||||
{
|
||||
bias_data.create(num_output);
|
||||
if (bias_data.empty())
|
||||
return -100;
|
||||
nread = fread(bias_data, num_output * sizeof(float), 1, binfp);
|
||||
if (nread != 1)
|
||||
{
|
||||
fprintf(stderr, "Convolution read bias_data failed %d\n", nread);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // NCNN_STDIO
|
||||
|
||||
int Convolution::load_model(const unsigned char*& mem)
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned char f0;
|
||||
unsigned char f1;
|
||||
unsigned char f2;
|
||||
unsigned char f3;
|
||||
};
|
||||
unsigned int tag;
|
||||
} flag_struct;
|
||||
|
||||
memcpy(&flag_struct, mem, sizeof(flag_struct));
|
||||
mem += sizeof(flag_struct);
|
||||
|
||||
unsigned int flag = flag_struct.f0 + flag_struct.f1 + flag_struct.f2 + flag_struct.f3;
|
||||
|
||||
if (flag_struct.tag == 0x01306B47)
|
||||
{
|
||||
// half-precision weight data
|
||||
weight_data = Mat::from_float16((unsigned short*)mem, weight_data_size);
|
||||
mem += alignSize(weight_data_size * sizeof(unsigned short), 4);
|
||||
if (weight_data.empty())
|
||||
return -100;
|
||||
}
|
||||
else if (flag != 0)
|
||||
{
|
||||
// quantized weight data
|
||||
const float* quantization_value = (const float*)mem;
|
||||
mem += 256 * sizeof(float);
|
||||
|
||||
const unsigned char* index_array = (const unsigned char*)mem;
|
||||
mem += alignSize(weight_data_size * sizeof(unsigned char), 4);
|
||||
|
||||
weight_data.create(weight_data_size);
|
||||
if (weight_data.empty())
|
||||
return -100;
|
||||
float* weight_data_ptr = weight_data;
|
||||
for (int i = 0; i < weight_data_size; i++)
|
||||
{
|
||||
weight_data_ptr[i] = quantization_value[ index_array[i] ];
|
||||
}
|
||||
}
|
||||
else if (flag_struct.f0 == 0)
|
||||
{
|
||||
// raw weight data
|
||||
weight_data = Mat(weight_data_size, (float*)mem);
|
||||
mem += weight_data_size * sizeof(float);
|
||||
}
|
||||
|
||||
if (bias_term)
|
||||
{
|
||||
bias_data = Mat(num_output, (float*)mem);
|
||||
mem += num_output * sizeof(float);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
## new model load api proposed
|
||||
### Pros
|
||||
#### clean and simple api
|
||||
#### element type detection
|
||||
```cpp
|
||||
int Convolution::load_model(const ModelBin& mb)
|
||||
{
|
||||
// auto detect element type
|
||||
weight_data = mb.load(weight_data_size, 0);
|
||||
if (weight_data.empty())
|
||||
return -100;
|
||||
|
||||
if (bias_term)
|
||||
{
|
||||
// certain type specified
|
||||
bias_data = mb.load(num_output, 1);
|
||||
if (bias_data.empty())
|
||||
return -100;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
92
3rdparty/ncnn/docs/developer-guide/new-param-load-api.md
vendored
Normal file
92
3rdparty/ncnn/docs/developer-guide/new-param-load-api.md
vendored
Normal file
@ -0,0 +1,92 @@
|
||||
## current param load api
|
||||
### Cons
|
||||
#### long and awful code
|
||||
#### three functions
|
||||
#### not extensible
|
||||
#### no default value
|
||||
#### no variable length array
|
||||
```
|
||||
MyLayer mylayer 1 1 in out 100 1.250000
|
||||
```
|
||||
```
|
||||
binary 100
|
||||
binary 1.250000
|
||||
```
|
||||
```cpp
|
||||
#if NCNN_STDIO
|
||||
#if NCNN_STRING
|
||||
int MyLayer::load_param(FILE* paramfp)
|
||||
{
|
||||
int nscan = fscanf(paramfp, "%d %f", &a, &b);
|
||||
if (nscan != 2)
|
||||
{
|
||||
fprintf(stderr, "MyLayer load_param failed %d\n", nscan);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // NCNN_STRING
|
||||
int MyLayer::load_param_bin(FILE* paramfp)
|
||||
{
|
||||
fread(&a, sizeof(int), 1, paramfp);
|
||||
|
||||
fread(&b, sizeof(float), 1, paramfp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // NCNN_STDIO
|
||||
|
||||
int MyLayer::load_param(const unsigned char*& mem)
|
||||
{
|
||||
a = *(int*)(mem);
|
||||
mem += 4;
|
||||
|
||||
b = *(float*)(mem);
|
||||
mem += 4;
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
## new param load api proposed
|
||||
### Pros
|
||||
#### clean and simple api
|
||||
#### default value
|
||||
#### extensible
|
||||
#### variable length array
|
||||
```
|
||||
7767517
|
||||
MyLayer mylayer 1 1 in out 0=100 1=1.250000 -23303=5,0.1,0.2,0.4,0.8,1.0
|
||||
```
|
||||
```
|
||||
binary 0xDD857600(magic)
|
||||
|
||||
binary 0
|
||||
binary 100
|
||||
binary 1
|
||||
binary 1.250000
|
||||
binary -23303
|
||||
binary 5
|
||||
binary 0.1
|
||||
binary 0.2
|
||||
binary 0.4
|
||||
binary 0.8
|
||||
binary 1.0
|
||||
binary -233(EOP)
|
||||
```
|
||||
```cpp
|
||||
int MyLayer::load_param(const ParamDict& pd)
|
||||
{
|
||||
// pd.get( param id (seq), default value );
|
||||
a = pd.get(0, 100);
|
||||
b = pd.get(1, 1.25f);
|
||||
|
||||
// get default value for c if not specified in param file
|
||||
c = pd.get(2, 0.001);
|
||||
|
||||
// get array
|
||||
d = pd.get(3, Mat(len, array));
|
||||
return 0;
|
||||
}
|
||||
```
|
303
3rdparty/ncnn/docs/developer-guide/operation-param-weight-table.md
vendored
Normal file
303
3rdparty/ncnn/docs/developer-guide/operation-param-weight-table.md
vendored
Normal file
@ -0,0 +1,303 @@
|
||||
|
||||
|operation|param id|param phase|default value|weight order|
|
||||
|:---:|:---:|:---:|:---:|:---:|
|
||||
|AbsVal|||
|
||||
|ArgMax|0|out_max_val|0|
|
||||
||1|topk|1|
|
||||
|BatchNorm|0|channels|0|slope mean variance bias|
|
||||
||1|eps|0.f|
|
||||
|Bias|0|bias_data_size|0|
|
||||
|BinaryOp|0|op_type|0|
|
||||
||1|with_scalar|0|
|
||||
||2|b|0.f|
|
||||
|BNLL|||
|
||||
|Cast|0|type_from|0|
|
||||
||1|type_to|0|
|
||||
|Clip|0|min|-FLT_MAX|
|
||||
||1|max|FLT_MAX|
|
||||
|Concat|0|axis|0|
|
||||
|Convolution|0|num_output|0|weight bias|
|
||||
||1|kernel_w|0|
|
||||
||2|dilation_w|1|
|
||||
||3|stride_w|1|
|
||||
||4|pad_left|0|
|
||||
||5|bias_term|0|
|
||||
||6|weight_data_size|0|
|
||||
||8|int8_scale_term|0|
|
||||
||9|activation_type|0|
|
||||
||10|activation_params|[ ]|
|
||||
||11|kernel_h|kernel_w|
|
||||
||12|dilation_h|dilation_w|
|
||||
||13|stride_h|stride_w|
|
||||
||15|pad_right|pad_left|
|
||||
||14|pad_top|pad_left|
|
||||
||16|pad_bottom|pad_top|
|
||||
||17|impl_type|0|
|
||||
||18|pad_value|0.f|
|
||||
|ConvolutionDepthWise|0|num_output|0|weight bias|
|
||||
||1|kernel_w|0|
|
||||
||2|dilation_w|1|
|
||||
||3|stride_w|1|
|
||||
||4|pad_left|0|
|
||||
||5|bias_term|0|
|
||||
||6|weight_data_size|0|
|
||||
||7|group|1|
|
||||
||8|int8_scale_term|0|
|
||||
||9|activation_type|0|
|
||||
||10|activation_params|[ ]|
|
||||
||11|kernel_h|kernel_w|
|
||||
||12|dilation_h|dilation_w|
|
||||
||13|stride_h|stride_w|
|
||||
||15|pad_right|pad_left|
|
||||
||14|pad_top|pad_left|
|
||||
||16|pad_bottom|pad_top|
|
||||
||18|pad_value|0.f|
|
||||
|Crop|0|woffset|0|
|
||||
||1|hoffset|0|
|
||||
||2|coffset|0|
|
||||
||3|outw|0|
|
||||
||4|outh|0|
|
||||
||5|outc|0|
|
||||
||6|woffset2|0|
|
||||
||7|hoffset2|0|
|
||||
||8|coffset2|0|
|
||||
||9|starts|[ ]|
|
||||
||10|ends|[ ]|
|
||||
||11|axes|[ ]|
|
||||
|Deconvolution|0|num_output|0|weight bias|
|
||||
||1|kernel_w|0|
|
||||
||2|dilation_w|1|
|
||||
||3|stride_w|1|
|
||||
||4|pad_left|0|
|
||||
||5|bias_term|0|
|
||||
||6|weight_data_size|0|
|
||||
||9|activation_type|0|
|
||||
||10|activation_params|[ ]|
|
||||
||11|kernel_h|kernel_w|
|
||||
||12|dilation_h|dilation_w|
|
||||
||13|stride_h|stride_w|
|
||||
||15|pad_right|pad_left|
|
||||
||14|pad_top|pad_left|
|
||||
||16|pad_bottom|pad_top|
|
||||
||18|output_pad_right|0|
|
||||
||19|output_pad_bottom|output_pad_right|
|
||||
||20|output_w|0|
|
||||
||21|output_h|output_w|
|
||||
|DeconvolutionDepthWise|0|num_output|0|weight bias|
|
||||
||1|kernel_w|0|
|
||||
||2|dilation_w|1|
|
||||
||3|stride_w|1|
|
||||
||4|pad_left|0|
|
||||
||5|bias_term|0|
|
||||
||6|weight_data_size|0|
|
||||
||7|group|1|
|
||||
||9|activation_type|0|
|
||||
||10|activation_params|[ ]|
|
||||
||11|kernel_h|kernel_w|
|
||||
||12|dilation_h|dilation_w|
|
||||
||13|stride_h|stride_w|
|
||||
||15|pad_right|pad_left|
|
||||
||14|pad_top|pad_left|
|
||||
||16|pad_bottom|pad_top|
|
||||
||18|output_pad_right|0|
|
||||
||19|output_pad_bottom|output_pad_right|
|
||||
||20|output_w|0|
|
||||
||21|output_h|output_w|
|
||||
|Dequantize|0|scale|1.f|bias|
|
||||
||1|bias_term|0|
|
||||
||2|bias_data_size|0|
|
||||
|DetectionOutput|0|num_class|0|
|
||||
||1|nms_threshold|0.05f|
|
||||
||2|nms_top_k|300|
|
||||
||3|keep_top_k|100|
|
||||
||4|confidence_threshold|0.5f|
|
||||
||5|variances[0]|0.1f|
|
||||
||6|variances[1]|0.1f|
|
||||
||7|variances[2]|0.2f|
|
||||
||8|variances[3]|0.2f|
|
||||
|Dropout|0|scale|1.f|
|
||||
|Eltwise|0|op_type|0|
|
||||
||1|coeffs|[ ]|
|
||||
|ELU|0|alpha|0.1f|
|
||||
|Embed|0|num_output|0|weight bias|
|
||||
||1|input_dim|0|
|
||||
||2|bias_term|0|
|
||||
||3|weight_data_size|0|
|
||||
|Exp|0|base|-1.f|
|
||||
||1|scale|1.f|
|
||||
||2|shift|0.f|
|
||||
|ExpandDims|0|expand_w|0|
|
||||
||1|expand_h|0|
|
||||
||2|expand_c|0|
|
||||
||3|axes|[ ]|
|
||||
|Flatten|||
|
||||
|HardSigmoid|0|alpha|0.2f||
|
||||
||1|beta|0.5f|
|
||||
|HardSwish|0|alpha|0.2f||
|
||||
||1|beta|0.5f|
|
||||
|InnerProduct|0|num_output|0|weight bias|
|
||||
||1|bias_term|0|
|
||||
||2|weight_data_size|0|
|
||||
||8|int8_scale_term|0|
|
||||
||9|activation_type|0|
|
||||
||10|activation_params|[ ]|
|
||||
|Input|0|w|0|
|
||||
||1|h|0|
|
||||
||2|c|0|
|
||||
|InstanceNorm|0|channels|0|gamma bias|
|
||||
||1|eps|0.001f|
|
||||
|Interp|0|resize_type|0|
|
||||
||1|height_scale|1.f|
|
||||
||2|width_scale|1.f|
|
||||
||3|output_height|0|
|
||||
||4|output_width|0|
|
||||
|Log|0|base|-1.f|
|
||||
||1|scale|1.f|
|
||||
||2|shift|0.f|
|
||||
|LRN|0|region_type|0|
|
||||
||1|local_size|5|
|
||||
||2|alpha|1.f|
|
||||
||3|beta|0.75f|
|
||||
||4|bias|1.f|
|
||||
|LSTM|0|num_output|0|
|
||||
||1|weight_data_size|1|
|
||||
||2|direction|0|
|
||||
|MemoryData|0|w|0|
|
||||
||1|h|0|
|
||||
||2|c|0|
|
||||
|Mish|||
|
||||
|MVN|0|normalize_variance|0|
|
||||
||1|across_channels|0|
|
||||
||2|eps|0.0001f|
|
||||
|Noop|||
|
||||
|Normalize|0|across_spatial|0|scale|
|
||||
||4|across_channel|0|
|
||||
||1|channel_shared|0|
|
||||
||2|eps|0.0001f|
|
||||
||9|eps_mode|0|
|
||||
||3|scale_data_size|0|
|
||||
|Packing|0|out_packing|1|
|
||||
||1|use_padding|0|
|
||||
||2|cast_type_from|0|
|
||||
||3|cast_type_to|0|
|
||||
||4|storage_type_from|0|
|
||||
||5|storage_type_to|0|
|
||||
|Padding|0|top|0|per_channel_pad_data|
|
||||
||1|bottom|0|
|
||||
||2|left|0|
|
||||
||3|right|0|
|
||||
||4|type|0|
|
||||
||5|value|0.f|
|
||||
||6|per_channel_pad_data_size|0|
|
||||
||7|front|0|
|
||||
||8|behind|0|
|
||||
|Permute|0|order_type|0|
|
||||
|PixelShuffle|0|upscale_factor|1|
|
||||
|Pooling|0|pooling_type(0: max 1: avg)|0|
|
||||
||1|kernel_w|0|
|
||||
||11|kernel_h|kernel_w|
|
||||
||2|stride_w|1|
|
||||
||12|stride_h|stride_w|
|
||||
||3|pad_left|0|
|
||||
||14|pad_right|pad_left|
|
||||
||13|pad_top|pad_left|
|
||||
||15|pad_bottom|pad_top|
|
||||
||4|global_pooling|0|
|
||||
||5|pad_mode|0|
|
||||
|Power|0|power|1.f|
|
||||
||1|scale|1.f|
|
||||
||2|shift|0.f|
|
||||
|PReLU|0|num_slope|0|slope|
|
||||
|PriorBox|0|min_sizes|[ ]|
|
||||
||1|max_sizes|[ ]|
|
||||
||2|aspect_ratios|[ ]|
|
||||
||3|varainces[0]|0.f|
|
||||
||4|varainces[1]|0.f|
|
||||
||5|varainces[2]|0.f|
|
||||
||6|varainces[3]|0.f|
|
||||
||7|flip|1|
|
||||
||8|clip|0|
|
||||
||9|image_width|0|
|
||||
||10|image_height|0|
|
||||
||11|step_width|-233.f|
|
||||
||12|step_height|-233.f|
|
||||
||13|offset|0.f|
|
||||
||14|step_mmdetection|0|
|
||||
||15|center_mmdetection|0|
|
||||
|Proposal|0|feat_stride|16|
|
||||
||1|base_size|16|
|
||||
||2|pre_nms_topN|6000|
|
||||
||3|after_nms_topN|300|
|
||||
||4|num_thresh|0.7f|
|
||||
||5|min_size|16|
|
||||
|PSROIPooling|0|pooled_width|7|
|
||||
||1|pooled_height|7|
|
||||
||2|spatial_scale|0.0625f|
|
||||
||3|output_dim|0|
|
||||
|Quantize|0|scale|1.f|
|
||||
|Reduction|0|operation|0|
|
||||
||1|dim|0|
|
||||
||2|coeff|1.f|
|
||||
||3|axes|[ ]|
|
||||
||4|keepdims|0|
|
||||
|ReLU|0|slope|0.f|
|
||||
|Reorg|0|stride|0|
|
||||
|Requantize|0|scale_in|1.f|bias|
|
||||
||1|scale_out|1.f|
|
||||
||2|bias_term|0|
|
||||
||3|bias_data_size|0|
|
||||
||4|fusion_relu|0|
|
||||
|Reshape|0|w|-233|
|
||||
||1|h|-233|
|
||||
||2|c|-233|
|
||||
||3|permute|0|
|
||||
|ROIAlign|0|pooled_width|0|
|
||||
||1|pooled_height|0|
|
||||
||2|spatial_scale|1.f|
|
||||
||3|sampling_ratio|0|
|
||||
||4|aligned|0|
|
||||
||5|version|0|
|
||||
|ROIPooling|0|pooled_width|0|
|
||||
||1|pooled_height|0|
|
||||
||2|spatial_scale|1.f|
|
||||
|Scale|0|scale_data_size|0|scale bias|
|
||||
||1|bias_term|0|
|
||||
|SELU|0|alpha|1.67326324f||
|
||||
||1|lambda|1.050700987f|
|
||||
|ShuffleChannel|0|group|1|
|
||||
|Sigmoid|||
|
||||
|Slice|0|slices|[ ]|
|
||||
||1|axis|0|
|
||||
|Softmax|0|axis|0|
|
||||
|Split|||
|
||||
|SPP|0|pooling_type|0|
|
||||
||1|pyramid_height|1|
|
||||
|Squeeze|0|squeeze_w|0|
|
||||
||1|squeeze_h|0|
|
||||
||2|squeeze_c|0|
|
||||
||3|axes|[ ]|
|
||||
|StatisticsPooling|0|include_stddev|0|
|
||||
|Swish|||
|
||||
|TanH|||
|
||||
|Threshold|0|threshold|0.f|
|
||||
|Tile|0|dim|0|
|
||||
||1|tiles|1|
|
||||
|UnaryOp|0|op_type|0|
|
||||
|YoloDetectionOutput|0|num_class|20|
|
||||
||1|num_box|5|
|
||||
||2|confidence_threshold|0.01f|
|
||||
||3|num_threshold|0.45f|
|
||||
||4|biases|[]|
|
||||
|Yolov3DetectionOutput|0|num_class|20|
|
||||
||1|num_box|5|
|
||||
||2|confidence_threshold|0.01f|
|
||||
||3|num_threshold|0.45f|
|
||||
||4|biases|[]|
|
||||
||5|mask|[]|
|
||||
||6|anchors_scale|[]|
|
||||
|RNN|0|num_output|0|
|
||||
||1|weight_data_size|0|
|
||||
||2|direction|0|
|
||||
|MultiHeadAttention|0|embed_dim|0|
|
||||
||1|num_head|1|
|
||||
||2|weight_data_size|0|
|
1643
3rdparty/ncnn/docs/developer-guide/operators.md
vendored
Normal file
1643
3rdparty/ncnn/docs/developer-guide/operators.md
vendored
Normal file
File diff suppressed because it is too large
Load Diff
64
3rdparty/ncnn/docs/developer-guide/param-and-model-file-structure.md
vendored
Normal file
64
3rdparty/ncnn/docs/developer-guide/param-and-model-file-structure.md
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
## net.param
|
||||
### example
|
||||
```
|
||||
7767517
|
||||
3 3
|
||||
Input input 0 1 data 0=4 1=4 2=1
|
||||
InnerProduct ip 1 1 data fc 0=10 1=1 2=80
|
||||
Softmax softmax 1 1 fc prob 0=0
|
||||
```
|
||||
### overview
|
||||
```
|
||||
[magic]
|
||||
```
|
||||
* magic number : 7767517
|
||||
```
|
||||
[layer count] [blob count]
|
||||
```
|
||||
* layer count : count of the layer line follows, should be exactly the count of all layer names
|
||||
* blob count : count of all blobs, usually greater than or equals to the layer count
|
||||
### layer line
|
||||
```
|
||||
[layer type] [layer name] [input count] [output count] [input blobs] [output blobs] [layer specific params]
|
||||
```
|
||||
* layer type : type name, such as Convolution Softmax etc
|
||||
* layer name : name of this layer, must be unique among all layer names
|
||||
* input count : count of the blobs this layer needs as input
|
||||
* output count : count of the blobs this layer produces as output
|
||||
* input blobs : name list of all the input blob names, separated by space, must be unique among input blob names of all layers
|
||||
* output blobs : name list of all the output blob names, separated by space, must be unique among output blob names of all layers
|
||||
* layer specific params : key=value pair list, separated by space
|
||||
### layer param
|
||||
```
|
||||
0=1 1=2.5 -23303=2,2.0,3.0
|
||||
```
|
||||
key index should be unique in each layer line, pair can be omitted if the default value used
|
||||
|
||||
the meaning of existing param key index can be looked up at [operation-param-weight-table](operation-param-weight-table)
|
||||
|
||||
* integer or float key : index 0 ~ 19
|
||||
* integer value : int
|
||||
* float value : float
|
||||
* integer array or float array key : -23300 minus index 0 ~ 19
|
||||
* integer array value : [array size],int,int,...,int
|
||||
* float array value : [array size],float,float,...,float
|
||||
|
||||
## net.bin
|
||||
```
|
||||
+---------+---------+---------+---------+---------+---------+
|
||||
| weight1 | weight2 | weight3 | weight4 | ....... | weightN |
|
||||
+---------+---------+---------+---------+---------+---------+
|
||||
^ ^ ^ ^
|
||||
0x0 0x80 0x140 0x1C0
|
||||
```
|
||||
the model binary is the concatenation of all weight data, each weight buffer is aligned by 32bit
|
||||
|
||||
### weight buffer
|
||||
```
|
||||
[flag] (optional)
|
||||
[raw data]
|
||||
[padding] (optional)
|
||||
```
|
||||
* flag : unsigned int, little-endian, indicating the weight storage type, 0 => float32, 0x01306B47 => float16, otherwise => quantized int8, may be omitted if the layer implementation forced the storage type explicitly
|
||||
* raw data : raw weight data, little-endian, float32 data or float16 data or quantized table and indexes depending on the storage type flag
|
||||
* padding : padding space for 32bit alignment, may be omitted if already aligned
|
29
3rdparty/ncnn/docs/developer-guide/preload-practice.zh.md
vendored
Normal file
29
3rdparty/ncnn/docs/developer-guide/preload-practice.zh.md
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
## 只是实践经验,没有理论,不一定正确
|
||||
|
||||
```
|
||||
prfm pldl1keep, [x0, #256]
|
||||
```
|
||||
* 放在 ld1 [x0] 前面 0~8 条指令
|
||||
* #256 表示把 x0+256 的内容放进 L1 cache
|
||||
* ldp 也适用
|
||||
* (经验)不写 offset 不如写个 #128
|
||||
* (经验)pldl1strm 似乎没啥意思,也没 pldl1keep 快
|
||||
* (经验)x0 ~ x0+256 的内容也会进来
|
||||
* (经验)load 128bit 用 #128,256bit或更多用 #256
|
||||
* (经验)避免 pld a,pld b,load a,load b 顺序,可能相互干扰
|
||||
* (经验)提前太多会失效
|
||||
* (经验)适合连续读
|
||||
|
||||
```
|
||||
prfm pldl2strm, [x0, #256]
|
||||
```
|
||||
* 放在 ld1 [x0] 前面 N 条指令,N 尽量大些
|
||||
* #256 表示把 x0+256 的内容放进 L2 cache
|
||||
* ldp 也适用
|
||||
* (经验)不写 offset 不如写个 #128
|
||||
* (经验)pldl2strm 效果稍好于 pldl2keep
|
||||
* (经验)x0 ~ x0+256 的内容也会进来
|
||||
* (经验)load 128bit 用 #128,256bit 用 #256
|
||||
* (经验)读很多数据,用不同 offset 连续两次 pldl2strm
|
||||
* (经验)后面不要对同位置再 pldl1keep,会变慢
|
||||
* (经验)适合提前准备要跳到很远的地方读,比如换 channel
|
57
3rdparty/ncnn/docs/developer-guide/tensorflow-op-combination.md
vendored
Normal file
57
3rdparty/ncnn/docs/developer-guide/tensorflow-op-combination.md
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
## batchnorm
|
||||
```
|
||||
Input A 0 1 A 0 0 0
|
||||
MemoryData sub/y 0 1 sub/y 16 0 0
|
||||
BinaryOp sub 2 1 A sub/y sub 1
|
||||
MemoryData div/y 0 1 div/y 16 0 0
|
||||
BinaryOp div 2 1 sub div/y div 3
|
||||
MemoryData mul/y 0 1 mul/y 16 0 0
|
||||
BinaryOp mul 2 1 div mul/y mul 2
|
||||
MemoryData BiasAdd/bias 0 1 BiasAdd/bias 16 0 0
|
||||
BinaryOp BiasAdd 2 1 mul BiasAdd/bias BiasAdd 0
|
||||
```
|
||||
## convolution
|
||||
```
|
||||
Input A 0 1 A 0 0 0
|
||||
Convolution Conv2D 1 1 A Conv2D 10 3 1 1 0 0 270
|
||||
MemoryData biases/read 0 1 biases/read 10 0 0
|
||||
BinaryOp BiasAdd 2 1 Conv2D biases/read BiasAdd 0
|
||||
```
|
||||
## innerproduct
|
||||
```
|
||||
Input A 0 1 A 0 0 0
|
||||
MemoryData biases/read 0 1 biases/read 10 0 0
|
||||
InnerProduct MatMul 1 1 A MatMul 10 0 2560
|
||||
BinaryOp conv6 2 1 MatMul biases/read conv6 0
|
||||
```
|
||||
## leakyrelu
|
||||
```
|
||||
Input A 0 1 A 0 0 0
|
||||
Split splitncnn_0 1 2 A A_splitncnn_0 A_splitncnn_1
|
||||
MemoryData mul_1/x 0 1 mul_1/x 0 0 0
|
||||
BinaryOp mul_1 2 1 mul_1/x A_splitncnn_1 mul_1 2
|
||||
BinaryOp leaky 2 1 mul_1 A_splitncnn_0 leaky 4
|
||||
```
|
||||
## prelu
|
||||
```
|
||||
Input A 0 1 A 0 0 0
|
||||
Split splitncnn_0 1 2 A A_splitncnn_0 A_splitncnn_1
|
||||
MemoryData prelu/alpha 0 1 prelu/alpha 10 0 0
|
||||
ReLU prelu/Relu 1 1 A_splitncnn_1 prelu/Relu 0.000000
|
||||
UnaryOp prelu/Neg 1 1 A_splitncnn_0 prelu/Neg 1
|
||||
ReLU prelu/Relu_1 1 1 prelu/Neg prelu/Relu_1 0.000000
|
||||
UnaryOp prelu/Neg_1 1 1 prelu/Relu_1 prelu/Neg_1 1
|
||||
BinaryOp prelu/Mul 2 1 prelu/alpha prelu/Neg_1 prelu/Mul 2
|
||||
BinaryOp prelu/add 2 1 prelu/Relu prelu/Mul prelu/add 0
|
||||
```
|
||||
## softmax
|
||||
```
|
||||
Input A 0 1 A 0 0 0
|
||||
Split splitncnn_4 1 2 A A_splitncnn_0 A_splitncnn_1
|
||||
Reduction Max 1 1 A_splitncnn_1 Max 4 -2 1.000000
|
||||
BinaryOp sub 2 1 A_splitncnn_0 Max sub 1
|
||||
UnaryOp Exp 1 1 sub Exp 7
|
||||
Split splitncnn_5 1 2 Exp Exp_splitncnn_0 Exp_splitncnn_1
|
||||
Reduction Sum 1 1 Exp_splitncnn_1 Sum 0 -2 1.000000
|
||||
BinaryOp prob 2 1 Exp_splitncnn_0 Sum prob 3
|
||||
```
|
676
3rdparty/ncnn/docs/faq.md
vendored
Normal file
676
3rdparty/ncnn/docs/faq.md
vendored
Normal file
@ -0,0 +1,676 @@
|
||||
|
||||
|
||||
# 如何加入技术交流QQ群?
|
||||
|
||||
- 打开QQ→点击群聊搜索→搜索群号637093648→输入问题答案:卷卷卷卷卷→进入群聊→准备接受图灵测试(bushi)
|
||||
- 前往QQ搜索Pocky群:677104663(超多大佬),问题答案:multi level intermediate representation
|
||||
|
||||
# 如何看作者b站直播?
|
||||
|
||||
- nihui的bilibili直播间:[水竹院落](https://live.bilibili.com/1264617)
|
||||
|
||||
# 编译
|
||||
|
||||
- ## 怎样下载完整源码?
|
||||
|
||||
git clone --recursive https://github.com/Tencent/ncnn/
|
||||
|
||||
或者
|
||||
|
||||
下载 [ncnn-xxxxx-full-source.zip](https://github.com/Tencent/ncnn/releases)
|
||||
|
||||
- ## 怎么交叉编译?cmake 工具链怎么设置啊?
|
||||
|
||||
参见 https://github.com/Tencent/ncnn/wiki/how-to-build
|
||||
|
||||
- ## The submodules were not downloaded! Please update submodules with "git submodule update --init" and try again
|
||||
|
||||
如上,下载完整源码。或者按提示执行: git submodule update --init
|
||||
|
||||
- ## Could NOT find Protobuf (missing: Protobuf_INCLUDE_DIR)
|
||||
|
||||
sudo apt-get install libprotobuf-dev protobuf-compiler
|
||||
|
||||
- ## Could NOT find CUDA (missing: CUDA_TOOLKIT_ROOT_DIR CUDA_INCLUDE_DIRS CUDA_CUDART_LIBRARY)
|
||||
|
||||
https://github.com/Tencent/ncnn/issues/1873
|
||||
|
||||
- ## Could not find a package configuration file provided by "OpenCV" with any of the following names: OpenCVConfig.cmake opencv-config.cmake
|
||||
|
||||
sudo apt-get install libopencv-dev
|
||||
|
||||
或者自行编译安装,set(OpenCV_DIR {OpenCVConfig.cmake所在目录})
|
||||
|
||||
- ## Could not find a package configuration file provided by "ncnn" with any of the following names: ncnnConfig.cmake ncnn-config.cmake
|
||||
|
||||
set(ncnn_DIR {ncnnConfig.cmake所在目录})
|
||||
|
||||
- ## 找不到 Vulkan,
|
||||
|
||||
cmake版本 3.10,否则没有带 FindVulkan.cmake
|
||||
|
||||
android-api >= 24
|
||||
|
||||
macos 要先执行安装脚本
|
||||
|
||||
- ## 如何安装 vulkan sdk
|
||||
|
||||
- ## 找不到库(需要根据系统/编译器指定)
|
||||
|
||||
undefined reference to __kmpc_for_static_init_4 __kmpc_for_static_fini __kmpc_fork_call ...
|
||||
|
||||
需要链接openmp库
|
||||
|
||||
undefined reference to vkEnumerateInstanceExtensionProperties vkGetInstanceProcAddr vkQueueSubmit ...
|
||||
|
||||
需要 vulkan-1.lib
|
||||
|
||||
undefined reference to glslang::InitializeProcess() glslang::TShader::TShader(EShLanguage) ...
|
||||
|
||||
需要 glslang.lib OGLCompiler.lib SPIRV.lib OSDependent.lib
|
||||
|
||||
undefined reference to AAssetManager_fromJava AAssetManager_open AAsset_seek ...
|
||||
|
||||
find_library和target_like_libraries中增加 android
|
||||
|
||||
find_package(ncnn)
|
||||
|
||||
- ## undefined reference to typeinfo for ncnn::Layer
|
||||
|
||||
opencv rtti -> opencv-mobile
|
||||
|
||||
- ## undefined reference to __cpu_model
|
||||
|
||||
升级编译器 / libgcc_s libgcc
|
||||
|
||||
- ## unrecognized command line option "-mavx2"
|
||||
|
||||
升级 gcc
|
||||
|
||||
- ## 为啥自己编译的ncnn android库特别大?
|
||||
|
||||
https://github.com/Tencent/ncnn/wiki/build-for-android.zh 以及见 如何裁剪更小的 ncnn 库
|
||||
|
||||
- ## ncnnoptimize和自定义层
|
||||
|
||||
先ncnnoptimize再增加自定义层,避免ncnnoptimize不能处理自定义层保存。
|
||||
|
||||
|
||||
- ## rtti/exceptions冲突
|
||||
|
||||
产生原因是项目工程中使用的库配置不一样导致冲突,根据自己的实际情况分析是需要开启还是关闭。ncnn默认是ON,在重新编译ncnn时增加以下2个参数即可:
|
||||
- 开启:-DNCNN_DISABLE_RTTI=OFF -DNCNN_DISABLE_EXCEPTION=OFF
|
||||
- 关闭:-DNCNN_DISABLE_RTTI=ON -DNCNN_DISABLE_EXCEPTION=ON
|
||||
|
||||
|
||||
- ## error: undefined symbol: ncnn::Extractor::extract(char const*, ncnn::Mat&)
|
||||
|
||||
可能的情况:
|
||||
- 尝试升级 Android Studio 的 NDK 版本
|
||||
|
||||
|
||||
# 怎样添加ncnn库到项目中?cmake方式怎么用?
|
||||
|
||||
编译ncnn,make install。linux/windows set/export ncnn_DIR 指向 isntall目录下下包含ncnnConfig.cmake 的目录
|
||||
|
||||
- ## android
|
||||
|
||||
- ## ios
|
||||
|
||||
- ## linux
|
||||
|
||||
- ## windows
|
||||
|
||||
- ## macos
|
||||
|
||||
- ## arm linux
|
||||
|
||||
|
||||
# 转模型问题
|
||||
|
||||
- ## caffe
|
||||
|
||||
`./caffe2ncnn caffe.prototxt caffe.caffemodel ncnn.param ncnn.bin`
|
||||
|
||||
- ## mxnet
|
||||
|
||||
` ./mxnet2ncnn mxnet-symbol.json mxnet.params ncnn.param ncnn.bin`
|
||||
|
||||
- ## darknet
|
||||
|
||||
[https://github.com/xiangweizeng/darknet2ncnn](https://github.com/xiangweizeng/darknet2ncnn)
|
||||
|
||||
- ## pytorch - onnx
|
||||
|
||||
[use ncnn with pytorch or onnx](https://github.com/Tencent/ncnn/wiki/use-ncnn-with-pytorch-or-onnx)
|
||||
|
||||
- ## tensorflow 1.x/2.x - keras
|
||||
|
||||
[https://github.com/MarsTechHAN/keras2ncnn](https://github.com/MarsTechHAN/keras2ncnn) **[@MarsTechHAN](https://github.com/MarsTechHAN)**
|
||||
|
||||
- ## tensorflow 2.x - mlir
|
||||
|
||||
[通过MLIR将tensorflow2模型转换到ncnn](https://zhuanlan.zhihu.com/p/152535430) **@[nihui](https://www.zhihu.com/people/nihui-2)**
|
||||
|
||||
- ## Shape not supported yet! Gather not supported yet! Cast not supported yet!
|
||||
|
||||
onnx-simplifier 静态shape
|
||||
|
||||
- ## convertmodel
|
||||
|
||||
[https://convertmodel.com/](https://convertmodel.com/) **[@大老师](https://github.com/daquexian)**
|
||||
|
||||
- ## netron
|
||||
|
||||
[https://github.com/lutzroeder/netron](https://github.com/lutzroeder/netron)
|
||||
|
||||
- ## 怎么生成有固定 shape 信息的模型?
|
||||
|
||||
Input 0=w 1=h 2=c
|
||||
|
||||
- ## why gpu能更快
|
||||
|
||||
- ## ncnnoptimize 怎么转成 fp16 模型
|
||||
|
||||
`ncnnoptimize model.param model.bin yolov5s-opt.param yolov5s-opt.bin 65536`
|
||||
|
||||
- ## ncnnoptimize 怎样查看模型的 FLOPS / 内存占用情况
|
||||
|
||||
- ## 怎么修改模型支持动态 shape?
|
||||
|
||||
Interp Reshape
|
||||
|
||||
- ## 如何将模型转换为代码内嵌到程序里?
|
||||
|
||||
ncnn2mem
|
||||
|
||||
- ## 如何加密模型?
|
||||
|
||||
https://zhuanlan.zhihu.com/p/268327784
|
||||
|
||||
- ## Linux下转的ncnn模型,Windows/MacOS/Android/.. 也能直接用吗?
|
||||
|
||||
Yes,全平台通用
|
||||
|
||||
- ## 如何去掉后处理,再导出 onnx?
|
||||
|
||||
检测:
|
||||
|
||||
参考up的一篇文章<https://zhuanlan.zhihu.com/p/128974102>,步骤三就是去掉后处理,再导出onnx,其中去掉后处理可以是项目内测试时去掉后续步骤的结果。
|
||||
|
||||
- ## pytorch 有的层导不出 onnx 怎么办?
|
||||
|
||||
方式一:
|
||||
|
||||
ONNX_ATEN_FALLBACK
|
||||
完全自定义的op,先改成能导出的(如 concat slice),转到 ncnn 后再修改 param
|
||||
|
||||
方式二:
|
||||
|
||||
可以使用PNNX来试试,参考以下文章大概说明:
|
||||
|
||||
1. [Windows/Linux/macOS 编译 PNNX 步骤](https://zhuanlan.zhihu.com/p/431833958)
|
||||
|
||||
2. [5分钟学会!用 PNNX 转换 TorchScript 模型到 ncnn 模型](https://zhuanlan.zhihu.com/p/427512763)
|
||||
|
||||
# 使用
|
||||
|
||||
- ## vkEnumeratePhysicalDevices failed -3
|
||||
|
||||
- ## vkCreateInstance failed -9
|
||||
|
||||
出现此类问题请先更新GPU驱动。Please upgrade your GPU driver if you encounter this crash or error.
|
||||
这里提供了一些品牌的GPU驱动下载网址.We have provided some drivers' download pages here.
|
||||
[Intel](https://downloadcenter.intel.com/product/80939/Graphics-Drivers),[AMD](https://www.amd.com/en/support),[Nvidia](https://www.nvidia.com/Download/index.aspx)
|
||||
|
||||
- ## ModuleNotFoundError: No module named 'ncnn.ncnn'
|
||||
|
||||
python setup.py develop
|
||||
|
||||
- ## fopen nanodet-m.param failed
|
||||
|
||||
文件路径 working dir
|
||||
|
||||
File not found or not readable. Make sure that XYZ.param/XYZ.bin is accessible.
|
||||
|
||||
- ## find_blob_index_by_name data / output / ... failed
|
||||
|
||||
layer name vs blob name
|
||||
|
||||
param.bin 应该用 xxx.id.h 的枚举
|
||||
|
||||
- ## parse magic failed
|
||||
|
||||
- ## param is too old, please regenerate
|
||||
|
||||
模型本身有问题
|
||||
|
||||
Your model file is being the old format converted by an old caffe2ncnn tool.
|
||||
|
||||
Checkout the latest ncnn code, build it and regenerate param and model binary files, and that should work.
|
||||
|
||||
Make sure that your param file starts with the magic number 7767517.
|
||||
|
||||
you may find more info on use-ncnn-with-alexnet
|
||||
|
||||
When adding the softmax layer yourself, you need to add 1=1
|
||||
|
||||
- ## set_vulkan_compute failed, network use_vulkan_compute disabled
|
||||
|
||||
你应该在 load_param / load_model 之前设置 net.opt.use_vulkan_compute = true;
|
||||
|
||||
- ## 多个blob输入,多个blob输出,怎么做?
|
||||
多次执行`ex.input()` 和 `ex.extract()`
|
||||
```
|
||||
ex.input("data1", in_1);
|
||||
ex.input("data2", in_2);
|
||||
ex.extract("output1", out_1);
|
||||
ex.extract("output2", out_2);
|
||||
```
|
||||
- ## Extractor extract 多次会重复计算吗?
|
||||
|
||||
不会
|
||||
|
||||
- ## 如何看每一层的耗时?
|
||||
|
||||
cmake -DNCNN_BENCHMARK=ON ..
|
||||
|
||||
- ## 如何转换 cv::Mat CV_8UC3 BGR 图片
|
||||
|
||||
from_pixels to_pixels
|
||||
|
||||
- ## 如何转换 float 数据为 ncnn::Mat
|
||||
|
||||
首先,自己申请的内存需要自己管理,此时ncnn::Mat不会自动给你释放你传过来的float数据
|
||||
``` c++
|
||||
std::vector<float> testData(60, 1.0); // 利用std::vector<float>自己管理内存的申请和释放
|
||||
ncnn::Mat in1(60, (void*)testData.data()).reshape(4, 5, 3); // 把float数据的指针转成void*传过去即可,甚至还可以指定维度(up说最好使用reshape用来解决channel gap)
|
||||
float* a = new float[60]; // 自己new一块内存,后续需要自己释放
|
||||
ncnn::Mat in2 = ncnn::Mat(60, (void*)a).reshape(4, 5, 3).clone(); // 使用方法和上面相同,clone() to transfer data owner
|
||||
```
|
||||
|
||||
- ## 如何初始化 ncnn::Mat 为全 0
|
||||
|
||||
`mat.fill(0.f);`
|
||||
|
||||
- ## 如何查看/获取版本号
|
||||
|
||||
cmake时会打印
|
||||
|
||||
c_api.h ncnn_version()
|
||||
|
||||
自己拼 1.0+yyyymmdd
|
||||
|
||||
- ## 如何转换 yuv 数据
|
||||
|
||||
yuv420sp2rgb yuv420sp2rgb_nv12
|
||||
|
||||
**[@metarutaiga](https://github.com/metarutaiga/xxYUV)**
|
||||
|
||||
- ## 如何 resize crop rotate 图片
|
||||
|
||||
[efficient roi resize rotate](https://github.com/Tencent/ncnn/wiki/efficient-roi-resize-rotate)
|
||||
|
||||
- ## 如何人脸5点对齐
|
||||
|
||||
get_affine_transform
|
||||
|
||||
warpaffine_bilinear_c3
|
||||
|
||||
```c
|
||||
// 计算变换矩阵 并且求逆变换
|
||||
int type = 0; // 0->区域外填充为v[0],v[1],v[2], -233->区域外不处理
|
||||
unsigned int v = 0;
|
||||
float tm[6];
|
||||
float tm_inv[6];
|
||||
// 人脸区域在原图上的坐标和宽高
|
||||
float src_x = target->det.rect.x / target->det.w * pIveImageU8C3->u32Width;
|
||||
float src_y = target->det.rect.y / target->det.h * pIveImageU8C3->u32Height;
|
||||
float src_w = target->det.rect.w / target->det.w * pIveImageU8C3->u32Width;
|
||||
float src_h = target->det.rect.h / target->det.h * pIveImageU8C3->u32Height;
|
||||
float point_src[10] = {
|
||||
src_x + src_w * target->attr.land[0][0], src_x + src_w * target->attr.land[0][1],
|
||||
src_x + src_w * target->attr.land[1][0], src_x + src_w * target->attr.land[1][1],
|
||||
src_x + src_w * target->attr.land[2][0], src_x + src_w * target->attr.land[2][1],
|
||||
src_x + src_w * target->attr.land[3][0], src_x + src_w * target->attr.land[3][1],
|
||||
src_x + src_w * target->attr.land[4][0], src_x + src_w * target->attr.land[4][1],
|
||||
};
|
||||
float point_dst[10] = { // +8 是因为我们处理112*112的图
|
||||
30.2946f + 8.0f, 51.6963f,
|
||||
65.5318f + 8.0f, 51.5014f,
|
||||
48.0252f + 8.0f, 71.7366f,
|
||||
33.5493f + 8.0f, 92.3655f,
|
||||
62.7299f + 8.0f, 92.2041f,
|
||||
};
|
||||
// 第一种方式:先计算变换在求逆
|
||||
AffineTrans::get_affine_transform(point_src, point_dst, 5, tm);
|
||||
AffineTrans::invert_affine_transform(tm, tm_inv);
|
||||
// 第二种方式:直接拿到求逆的结果
|
||||
// AffineTrans::get_affine_transform(point_dst, point_src, 5, tm_inv);
|
||||
// rgb 分离的,所以要单独处理
|
||||
for(int c = 0; c < 3; c++)
|
||||
{
|
||||
unsigned char* pSrc = malloc(xxx);
|
||||
unsigned char* pDst = malloc(xxx);
|
||||
ncnn::warpaffine_bilinear_c1(pSrc, SrcWidth, SrcHeight, SrcStride[c], pDst, DstWidth, DstHeight, DstStride[c], tm_inv, type, v);
|
||||
}
|
||||
// rgb packed则可以一次处理
|
||||
ncnn::warpaffine_bilinear_c3(pSrc, SrcWidth, SrcHeight, SrcStride, pDst, DstWidth, DstHeight, DstStride, tm_inv, type, v);
|
||||
```
|
||||
|
||||
- ## 如何获得中间层的blob输出
|
||||
|
||||
ncnn::Mat output;
|
||||
|
||||
ex.extract("your_blob_name", output);
|
||||
|
||||
- ## 为什么我使用GPU,但是GPU占用为0
|
||||
|
||||
windows 10 任务管理器 - 性能选项卡 - GPU - 选择其中一个视图左上角的下拉箭头切换到 Compute_0 / Compute_1 / Cuda
|
||||
|
||||
你还可以安装软件:GPU-Z
|
||||
|
||||
- ## layer XYZ not exists or registered
|
||||
|
||||
Your network contains some operations that are not implemented in ncnn.
|
||||
|
||||
You may implement them as custom layer followed in how-to-implement-custom-layer-step-by-step.
|
||||
|
||||
Or you could simply register them as no-op if you are sure those operations make no sense.
|
||||
|
||||
```
|
||||
class Noop : public ncnn::Layer {};
|
||||
DEFINE_LAYER_CREATOR(Noop)
|
||||
|
||||
net.register_custom_layer("LinearRegressionOutput", Noop_layer_creator);
|
||||
net.register_custom_layer("MAERegressionOutput", Noop_layer_creator);
|
||||
```
|
||||
|
||||
- ## network graph not ready
|
||||
|
||||
You shall call Net::load_param() first, then Net::load_model().
|
||||
|
||||
This error may also happens when Net::load_param() failed, but not properly handled.
|
||||
|
||||
For more information about the ncnn model load api, see ncnn-load-model
|
||||
|
||||
- ## memory not 32-bit aligned at XYZ
|
||||
|
||||
The pointer passed to Net::load_param() or Net::load_model() is not 32bit aligned.
|
||||
|
||||
In practice, the head pointer of std::vector is not guaranteed to be 32bit aligned.
|
||||
|
||||
you can store your binary buffer in ncnn::Mat structure, its internal memory is aligned.
|
||||
|
||||
- ## crash on android with '__kmp_abort_process'
|
||||
|
||||
This usually happens if you bundle multiple shared library with openmp linked
|
||||
|
||||
It is actually an issue of the android ndk https://github.com/android/ndk/issues/1028
|
||||
|
||||
On old android ndk, modify the link flags as
|
||||
|
||||
-Wl,-Bstatic -lomp -Wl,-Bdynamic
|
||||
|
||||
For recent ndk >= 21
|
||||
|
||||
-fstatic-openmp
|
||||
|
||||
- ## dlopen failed: library "libomp.so" not found
|
||||
Newer android ndk defaults to dynamic openmp runtime
|
||||
|
||||
modify the link flags as
|
||||
|
||||
-fstatic-openmp -fopenmp
|
||||
|
||||
- ## crash when freeing a ncnn dynamic library(.dll/.so) built with openMP
|
||||
|
||||
for optimal performance, the openmp threadpool spin waits for about a second prior to shutting down in case more work becomes available.
|
||||
|
||||
If you unload a dynamic library that's in the process of spin-waiting, it will crash in the manner you see (most of the time).
|
||||
|
||||
Just set OMP_WAIT_POLICY=passive in your environment, before calling loadlibrary. or Just wait a few seconds before calling freelibrary.
|
||||
|
||||
You can also use the following method to set environment variables in your code:
|
||||
|
||||
for msvc++:
|
||||
|
||||
SetEnvironmentVariable(_T("OMP_WAIT_POLICY"), _T("passive"));
|
||||
|
||||
for g++:
|
||||
|
||||
setenv("OMP_WAIT_POLICY", "passive", 1)
|
||||
|
||||
reference: https://stackoverflow.com/questions/34439956/vc-crash-when-freeing-a-dll-built-with-openmp
|
||||
|
||||
# 跑出来的结果对不上
|
||||
|
||||
[ncnn-produce-wrong-result](https://github.com/Tencent/ncnn/wiki/FAQ-ncnn-produce-wrong-result)
|
||||
|
||||
- ## 如何打印 ncnn::Mat 的值?
|
||||
|
||||
```C++
|
||||
void pretty_print(const ncnn::Mat& m)
|
||||
{
|
||||
for (int q=0; q<m.c; q++)
|
||||
{
|
||||
const float* ptr = m.channel(q);
|
||||
for (int y=0; y<m.h; y++)
|
||||
{
|
||||
for (int x=0; x<m.w; x++)
|
||||
{
|
||||
printf("%f ", ptr[x]);
|
||||
}
|
||||
ptr += m.w;
|
||||
printf("\n");
|
||||
}
|
||||
printf("------------------------\n");
|
||||
}
|
||||
}
|
||||
```
|
||||
In Android Studio, `printf` will not work, you can use `__android_log_print` instead. Example :
|
||||
```C++
|
||||
#include <android/log.h> // Don't forget this
|
||||
|
||||
void pretty_print(const ncnn::Mat& m)
|
||||
{
|
||||
for (int q=0; q<m.c; q++)
|
||||
{
|
||||
for (int y=0; y<m.h; y++)
|
||||
{
|
||||
for (int x=0; x<m.w; x++)
|
||||
{
|
||||
__android_log_print(ANDROID_LOG_DEBUG,"LOG_TAG","ncnn Mat is : %f", m.channel(q).row(y)[x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- ## 如何可视化 ncnn::Mat 的值?
|
||||
|
||||
```
|
||||
void visualize(const char* title, const ncnn::Mat& m)
|
||||
{
|
||||
std::vector<cv::Mat> normed_feats(m.c);
|
||||
|
||||
for (int i=0; i<m.c; i++)
|
||||
{
|
||||
cv::Mat tmp(m.h, m.w, CV_32FC1, (void*)(const float*)m.channel(i));
|
||||
|
||||
cv::normalize(tmp, normed_feats[i], 0, 255, cv::NORM_MINMAX, CV_8U);
|
||||
|
||||
cv::cvtColor(normed_feats[i], normed_feats[i], cv::COLOR_GRAY2BGR);
|
||||
|
||||
// check NaN
|
||||
for (int y=0; y<m.h; y++)
|
||||
{
|
||||
const float* tp = tmp.ptr<float>(y);
|
||||
uchar* sp = normed_feats[i].ptr<uchar>(y);
|
||||
for (int x=0; x<m.w; x++)
|
||||
{
|
||||
float v = tp[x];
|
||||
if (v != v)
|
||||
{
|
||||
sp[0] = 0;
|
||||
sp[1] = 0;
|
||||
sp[2] = 255;
|
||||
}
|
||||
|
||||
sp += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int tw = m.w < 10 ? 32 : m.w < 20 ? 16 : m.w < 40 ? 8 : m.w < 80 ? 4 : m.w < 160 ? 2 : 1;
|
||||
int th = (m.c - 1) / tw + 1;
|
||||
|
||||
cv::Mat show_map(m.h * th, m.w * tw, CV_8UC3);
|
||||
show_map = cv::Scalar(127);
|
||||
|
||||
// tile
|
||||
for (int i=0; i<m.c; i++)
|
||||
{
|
||||
int ty = i / tw;
|
||||
int tx = i % tw;
|
||||
|
||||
normed_feats[i].copyTo(show_map(cv::Rect(tx * m.w, ty * m.h, m.w, m.h)));
|
||||
}
|
||||
|
||||
cv::resize(show_map, show_map, cv::Size(0,0), 2, 2, cv::INTER_NEAREST);
|
||||
cv::imshow(title, show_map);
|
||||
}
|
||||
```
|
||||
|
||||
- ## 总是输出第一张图的结果
|
||||
|
||||
复用 Extractor?!
|
||||
|
||||
- ## 启用fp16时的精度有差异
|
||||
|
||||
net.opt.use_fp16_packed = false;
|
||||
|
||||
net.opt.use_fp16_storage = false;
|
||||
|
||||
net.opt.use_fp16_arithmetic = false;
|
||||
|
||||
[ncnn-produce-wrong-result](https://github.com/Tencent/ncnn/wiki/FAQ-ncnn-produce-wrong-result)
|
||||
|
||||
|
||||
# 如何跑得更快?内存占用更少?库体积更小?
|
||||
|
||||
- ## fp32 fp16
|
||||
|
||||
- ## 大小核绑定
|
||||
ncnn::set_cpu_powersave(int)绑定大核或小核
|
||||
注意windows系统不支持绑核。
|
||||
ncnn支持不同的模型运行在不同的核心。假设硬件平台有2个大核,4个小核,你想把netA运行在大核,netB运行在小核。
|
||||
可以通过std::thread or pthread创建两个线程,运行如下代码:
|
||||
0:全部
|
||||
1:小核
|
||||
2:大核
|
||||
```
|
||||
void thread_1()
|
||||
{
|
||||
ncnn::set_cpu_powersave(2); // bind to big cores
|
||||
netA.opt.num_threads = 2;
|
||||
}
|
||||
|
||||
void thread_2()
|
||||
{
|
||||
ncnn::set_cpu_powersave(1); // bind to little cores
|
||||
netB.opt.num_threads = 4;
|
||||
}
|
||||
```
|
||||
|
||||
[openmp-best-practice.zh.md](https://github.com/Tencent/ncnn/blob/master/docs/how-to-use-and-FAQ/openmp-best-practice.zh.md)
|
||||
|
||||
- ## 查看 CPU 或 GPU 数量
|
||||
get_cpu_count
|
||||
|
||||
get_gpu_count
|
||||
|
||||
- ## ncnnoptimize
|
||||
|
||||
使用方式一:
|
||||
- ./ncnnoptimize ncnn.param ncnn.bin new.param new.bin flag
|
||||
<br/>注意这里的flag指的是fp32和fp16,其中0指的是fp32,1指的是fp16
|
||||
|
||||
使用方式二:
|
||||
- ./ncnnoptimize ncnn.param ncnn.bin new.param new.bin flag cutstartname cutendname
|
||||
<br/>cutstartname:模型截取的起点
|
||||
<br/>cutendname:模型截取的终点
|
||||
|
||||
|
||||
- ## 如何使用量化工具?
|
||||
|
||||
[Post Training Quantization Tools](https://github.com/Tencent/ncnn/tree/master/tools/quantize)
|
||||
|
||||
- ## 如何设置线程数?
|
||||
|
||||
opt.num_threads
|
||||
|
||||
- ## 如何降低CPU占用率?
|
||||
|
||||
net.opt.openmp_blocktime = 0;
|
||||
|
||||
OMP_WAIT_POLICY=passive
|
||||
|
||||
- ## 如何 batch inference?
|
||||
|
||||
```
|
||||
int max_batch_size = vkdev->info.compute_queue_count;
|
||||
|
||||
ncnn::Mat inputs[1000];
|
||||
ncnn::Mat outputs[1000];
|
||||
|
||||
#pragma omp parallel for num_threads(max_batch_size)
|
||||
for (int i=0; i<1000; i++)
|
||||
{
|
||||
ncnn::Extractor ex = net1.create_extractor();
|
||||
ex.input("data", inputs[i]);
|
||||
ex.extract("prob", outputs[i]);
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
- ## partial graph inference
|
||||
|
||||
先 extract 分类,判断后,再 extract bbox
|
||||
|
||||
- ## 如何启用 bf16s 加速?
|
||||
|
||||
```
|
||||
net.opt.use_packing_layout = true;
|
||||
net.opt.use_bf16_storage = true;
|
||||
```
|
||||
|
||||
[用bf16加速ncnn](https://zhuanlan.zhihu.com/p/112564372) **@[nihui](https://www.zhihu.com/people/nihui-2)**
|
||||
|
||||
A53
|
||||
|
||||
- ## 如何裁剪更小的 ncnn 库?
|
||||
|
||||
[build-minimal-library](https://github.com/Tencent/ncnn/wiki/build-minimal-library)
|
||||
|
||||
- ## net.opt sgemm winograd fp16_storage 各是有什么作用?
|
||||
|
||||
对内存消耗的影响
|
||||
|
||||
# 白嫖项目
|
||||
|
||||
- ## nanodet
|
||||
|
||||
# 其他
|
||||
|
||||
- ## up主用的什么系统/编辑器/开发环境?
|
||||
|
||||
| 软件类型 | 软件名称 |
|
||||
| ------------| ----------- |
|
||||
| 系统 | Fedora |
|
||||
| 桌面环境 | KDE |
|
||||
| 编辑器 | Kate |
|
||||
| 画草图 | kolourpaint |
|
||||
| 画函数图像 | kmplot |
|
||||
| bilibili直播 | OBS |
|
139
3rdparty/ncnn/docs/how-to-build/build-for-VisualStudio.zh.md
vendored
Normal file
139
3rdparty/ncnn/docs/how-to-build/build-for-VisualStudio.zh.md
vendored
Normal file
@ -0,0 +1,139 @@
|
||||
# 用 Visual Studio 编译
|
||||
|
||||
[TOC]
|
||||
|
||||
## 预先准备
|
||||
|
||||
Visual Studio 2015 / 2017 / 2019 / 2022 Preview 的 Community Edition 版本, 使用动态的 CRT 运行库
|
||||
|
||||
CMake, 推荐 >= 3.17 的版本
|
||||
|
||||
## 开始编译
|
||||
|
||||
### 最简编译
|
||||
|
||||
https://github.com/Tencent/ncnn.git
|
||||
|
||||
#### 命令提示符版本
|
||||
|
||||
```batch
|
||||
mkdir build-vs2019
|
||||
cd build-vs2019
|
||||
cmake -G "Visual Studio 16 2019" -A x64 ..
|
||||
cmake --build . --config Release
|
||||
cmake --install . --config Release
|
||||
cmake --build . --config Debug
|
||||
cmake --install . --config Debug
|
||||
```
|
||||
|
||||
会安装在 build-vs2019/install 里头,debug 版本的库会带有 `d` 后缀。
|
||||
|
||||
#### x64 本机工具命令提示符 版本 (VS2022无X64)
|
||||
ncnn
|
||||
protobuf参照后文定义参数
|
||||
|
||||
```batch
|
||||
mkdir build-vs2019
|
||||
cd build-vs2019
|
||||
cmake ..
|
||||
cmake --build .
|
||||
cmake --install . --config Debug
|
||||
|
||||
//默认build生成Debug版本;默认install安装Relase版本。 参照命令提示符版本
|
||||
```
|
||||
|
||||
|
||||
### 编译安装带 Vulkan 支持的 ncnn 库
|
||||
|
||||
#### 设备和 Vulkan 准备
|
||||
确认设备支持 Vulkan, 安装显卡驱动。
|
||||
|
||||
下载和安装 Vulkan SDK: https://vulkan.lunarg.com/sdk/home
|
||||
|
||||
连同子模块一起,获取源码:
|
||||
- 可从 http://github.com/Tencent/ncnn/releases 找到 "ncnn-YYYYMMDD-full-source.zip" 下载
|
||||
- 或用 git 获取最新版本:
|
||||
|
||||
```batch
|
||||
git clone https://github.com/tencent/ncnn
|
||||
git submodule update --init
|
||||
```
|
||||
|
||||
#### 编译安装 ncnn
|
||||
```batch
|
||||
mkdir build-vs2019
|
||||
cd build-vs2019
|
||||
cmake -G "Visual Studio 16 2019" -A x64 -DCMAKE_INSTALL_PREFIX="%cd%/install" -DNCNN_VULKAN=ON
|
||||
cmake --build . --config Release
|
||||
cmake --install . --config Release
|
||||
cmake --build . --config Debug
|
||||
cmake --install . --config Debug
|
||||
```
|
||||
|
||||
### 编译安装 ncnn 库和模型转换工具
|
||||
|
||||
- 此步骤用于编译模型转换工具,可跳过,直接使用 https://convertmodel.com 工具转换
|
||||
|
||||
- 以下命令行均使用 **适用于 VS 2019 的 x64 本机工具命令提示**
|
||||
|
||||
*注:若在 cmd / PowerShell 进行,需修改:*
|
||||
- `-G"NMake Makefile"` 改为合适的 Generator 如 `-G "Visual Studio 16 2019" -A x64`
|
||||
- `nmake` 改为 `cmake --build . --config Release`, 或打开 `.sln` 手动触发 `protobuf` / `ncnn` 项的构建
|
||||
- `nmake install` 改为 `cmake --install . --config Release`,或打开 `.sln` 手动触发 `INSTALL` 项的构建
|
||||
|
||||
|
||||
#### 编译安装 protobuf
|
||||
|
||||
用于生成 caffe2ncnn 和 onnx2ncnn 工具
|
||||
|
||||
https://github.com/google/protobuf/archive/v3.4.0.zip
|
||||
|
||||
我下载到 C:/Users/shuiz/source 解压缩
|
||||
|
||||
```batch
|
||||
mkdir build-vs2019
|
||||
cd build-vs2019
|
||||
cmake -G"NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="%cd%/install" ^
|
||||
-Dprotobuf_BUILD_TESTS=OFF ^
|
||||
-Dprotobuf_MSVC_STATIC_RUNTIME=OFF ../cmake
|
||||
nmake
|
||||
nmake install
|
||||
```
|
||||
|
||||
protobuf 会安装在 build-vs2019/install 里头
|
||||
|
||||
#### 编译安装 ncnn
|
||||
|
||||
https://github.com/Tencent/ncnn.git
|
||||
|
||||
cmake 命令中的 protobuf 路径要相应修改成自己的
|
||||
|
||||
```batch
|
||||
mkdir build-vs2019
|
||||
cd build-vs2019
|
||||
cmake -G"NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="%cd%/install" ^
|
||||
-DProtobuf_INCLUDE_DIR=C:/Users/shuiz/source/protobuf-3.4.0/build-vs2019/install/include ^
|
||||
-DProtobuf_LIBRARIES=C:/Users/shuiz/source/protobuf-3.4.0/build-vs2019/install/lib/libprotobuf.lib ^
|
||||
-DProtobuf_PROTOC_EXECUTABLE=C:/Users/shuiz/source/protobuf-3.4.0/build-vs2019/install/bin/protoc.exe ..
|
||||
nmake
|
||||
nmake install
|
||||
```
|
||||
|
||||
ncnn 会安装在 build-vs2019/install 里头
|
||||
|
||||
ncnn 转换工具在 build-vs2019/tools 里头
|
||||
|
||||
#### mlir2ncnn
|
||||
|
||||
见 [build-mlir2ncnn](build-mlir2ncnn.md)
|
||||
|
||||
## 使用编译好的 ncnn 库
|
||||
|
||||
CMakeLists 里写
|
||||
```cmake
|
||||
set(ncnn_DIR "C:/Users/shuiz/source/ncnn/build-vs2019/install/lib/cmake/ncnn" CACHE PATH "包含 ncnnConfig.cmake 的目录")
|
||||
find_package(ncnn REQUIRED)
|
||||
target_link_libraries(my_target ncnn)
|
||||
```
|
||||
|
||||
进一步了解 [use-ncnn-with-own-project](../how-to-use-and-FAQ/use-ncnn-with-own-project.md)
|
54
3rdparty/ncnn/docs/how-to-build/build-mlir2ncnn.md
vendored
Normal file
54
3rdparty/ncnn/docs/how-to-build/build-mlir2ncnn.md
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
# mlir2ncnn
|
||||
|
||||
## Compile
|
||||
|
||||
**Clone LLVM**
|
||||
```bash
|
||||
https://github.com/llvm/llvm-project.git
|
||||
git checkout -b mlir <a_working_commit_id>
|
||||
```
|
||||
Current working commit id is 74e6030bcbcc8e628f9a99a424342a0c656456f9:
|
||||
```
|
||||
$ git log
|
||||
|
||||
commit 74e6030bcbcc8e628f9a99a424342a0c656456f9 (HEAD -> main, origin/main, origin/HEAD)
|
||||
Author: Craig Topper <craig.topper@sifive.com>
|
||||
Date: Thu Mar 4 22:30:38 2021 -0800
|
||||
|
||||
[TargetLowering] Use HandleSDNodes to prevent nodes from being deleted by recursive calls in getNegatedExpression.
|
||||
```
|
||||
|
||||
It is determined by query lastest git commit date of `tools/mlir` directory.
|
||||
|
||||
|
||||
**Compile mlir**
|
||||
```bash
|
||||
cd llvm-project
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G Ninja -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DLLVM_ENABLE_PROJECTS="mlir" -DLLVM_TARGETS_TO_BUILD="" -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF ../llvm/
|
||||
ninja -j8
|
||||
ninja install
|
||||
```
|
||||
|
||||
**Compile mlir2ncnn**
|
||||
```bash
|
||||
cd tools/mlir
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -D LLVM_DIR=<path/to/your/llvm_install/lib/cmake/llvm>
|
||||
make
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
**Export `.mlir`**
|
||||
|
||||
See https://zhuanlan.zhihu.com/p/152535430
|
||||
|
||||
|
||||
**Usage mlir2ncnn**
|
||||
|
||||
```
|
||||
./mlir2ncnn pix2pix.mlir pix2pix.param pix2pix.bin
|
||||
```
|
668
3rdparty/ncnn/docs/how-to-build/how-to-build.md
vendored
Normal file
668
3rdparty/ncnn/docs/how-to-build/how-to-build.md
vendored
Normal file
@ -0,0 +1,668 @@
|
||||
### Git clone ncnn repo with submodule
|
||||
|
||||
```
|
||||
$ git clone https://github.com/Tencent/ncnn.git
|
||||
$ cd ncnn
|
||||
$ git submodule update --init
|
||||
```
|
||||
|
||||
* [Build for Linux / NVIDIA Jetson / Raspberry Pi](#build-for-linux)
|
||||
* [Build for Windows x64 using VS2017](#build-for-windows-x64-using-visual-studio-community-2017)
|
||||
* [Build for macOS](#build-for-macos)
|
||||
* [Build for ARM Cortex-A family with cross-compiling](#build-for-arm-cortex-a-family-with-cross-compiling)
|
||||
* [Build for Hisilicon platform with cross-compiling](#build-for-hisilicon-platform-with-cross-compiling)
|
||||
* [Build for Android](#build-for-android)
|
||||
* [Build for iOS on macOS with xcode](#build-for-ios-on-macos-with-xcode)
|
||||
* [Build for WebAssembly](#build-for-webassembly)
|
||||
* [Build for AllWinner D1](#build-for-allwinner-d1)
|
||||
* [Build for Loongson 2K1000](#build-for-loongson-2k1000)
|
||||
* [Build for Termux on Android](#Build-for-Termux-on-Android)
|
||||
|
||||
***
|
||||
|
||||
### Build for Linux
|
||||
|
||||
Install required build dependencies:
|
||||
|
||||
* git
|
||||
* g++
|
||||
* cmake
|
||||
* protocol buffer (protobuf) headers files and protobuf compiler
|
||||
* vulkan header files and loader library
|
||||
* glslang
|
||||
* (optional) opencv # For building examples
|
||||
|
||||
Generally if you have Intel, AMD or Nvidia GPU from last 10 years, Vulkan can be easily used.
|
||||
|
||||
On some systems there are no Vulkan drivers easily available at the moment (October 2020), so you might need to disable use of Vulkan on them. This applies to Raspberry Pi 3 (but there is experimental open source Vulkan driver in the works, which is not ready yet). Nvidia Tegra series devices (like Nvidia Jetson) should support Vulkan. Ensure you have most recent software installed for best experience.
|
||||
|
||||
On Debian, Ubuntu or Raspberry Pi OS, you can install all required dependencies using:
|
||||
```shell
|
||||
sudo apt install build-essential git cmake libprotobuf-dev protobuf-compiler libvulkan-dev vulkan-utils libopencv-dev
|
||||
```
|
||||
To use Vulkan backend install Vulkan header files, a vulkan driver loader, GLSL to SPIR-V compiler and vulkaninfo tool. Preferably from your distribution repositories. Alternatively download and install full Vulkan SDK (about 200MB in size; it contains all header files, documentation and prebuilt loader, as well some extra tools and source code of everything) from https://vulkan.lunarg.com/sdk/home
|
||||
|
||||
```shell
|
||||
wget https://sdk.lunarg.com/sdk/download/1.2.189.0/linux/vulkansdk-linux-x86_64-1.2.189.0.tar.gz?Human=true -O vulkansdk-linux-x86_64-1.2.189.0.tar.gz
|
||||
tar -xf vulkansdk-linux-x86_64-1.2.189.0.tar.gz
|
||||
export VULKAN_SDK=$(pwd)/1.2.189.0/x86_64
|
||||
```
|
||||
|
||||
To use Vulkan after building ncnn later, you will also need to have Vulkan driver for your GPU. For AMD and Intel GPUs these can be found in Mesa graphics driver, which usually is installed by default on all distros (i.e. `sudo apt install mesa-vulkan-drivers` on Debian/Ubuntu). For Nvidia GPUs the proprietary Nvidia driver must be downloaded and installed (some distros will allow easier installation in some way). After installing Vulkan driver, confirm Vulkan libraries and driver are working, by using `vulkaninfo` or `vulkaninfo | grep deviceType`, it should list GPU device type. If there are more than one GPU installed (including the case of integrated GPU and discrete GPU, commonly found in laptops), you might need to note the order of devices to use later on.
|
||||
|
||||
#### Nvidia Jetson
|
||||
|
||||
The Vulkan driver is a default component of the Linux For Tegra BSP release, check [the device list](https://developer.nvidia.com/embedded/vulkan).
|
||||
|
||||
```shell
|
||||
cd ncnn
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=../toolchains/jetson.toolchain.cmake -DNCNN_VULKAN=ON -DNCNN_BUILD_EXAMPLES=ON ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
#### Raspberry Pi
|
||||
|
||||
Vulkan drivers do exists, but are not mature. You are free to experiment at your own discretion, and report results and performance.
|
||||
|
||||
```shell
|
||||
cd ncnn
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DNCNN_VULKAN=ON -DNCNN_SYSTEM_GLSLANG=ON -DNCNN_BUILD_EXAMPLES=ON ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
You can add `-GNinja` to `cmake` above to use Ninja build system (invoke build using `ninja` or `cmake --build .`).
|
||||
|
||||
For Rasberry Pi 3, add `-DCMAKE_TOOLCHAIN_FILE=../toolchains/pi3.toolchain.cmake -DPI3=ON` to cmake. You can also consider disabling Vulkan support as the Vulkan drivers for Rasberry Pi are still not mature, but it doesn't hurt to build the support in, but not use it.
|
||||
|
||||
#### Verification
|
||||
|
||||
Verify build by running some examples:
|
||||
|
||||
```shell
|
||||
cd ../examples
|
||||
../build/examples/squeezenet ../images/256-ncnn.png
|
||||
[0 AMD RADV FIJI (LLVM 10.0.1)] queueC=1[4] queueG=0[1] queueT=0[1]
|
||||
[0 AMD RADV FIJI (LLVM 10.0.1)] bugsbn1=0 buglbia=0 bugcopc=0 bugihfa=0
|
||||
[0 AMD RADV FIJI (LLVM 10.0.1)] fp16p=1 fp16s=1 fp16a=0 int8s=1 int8a=1
|
||||
532 = 0.163452
|
||||
920 = 0.093140
|
||||
716 = 0.061584
|
||||
```
|
||||
|
||||
You can also run benchmarks (the 4th argument is a GPU device index to use, refer to `vulkaninfo`, if you have more than one GPU):
|
||||
|
||||
```shell
|
||||
cd ../benchmark
|
||||
../build/benchmark/benchncnn 10 $(nproc) 0 0
|
||||
[0 AMD RADV FIJI (LLVM 10.0.1)] queueC=1[4] queueG=0[1] queueT=0[1]
|
||||
[0 AMD RADV FIJI (LLVM 10.0.1)] bugsbn1=0 buglbia=0 bugcopc=0 bugihfa=0
|
||||
[0 AMD RADV FIJI (LLVM 10.0.1)] fp16p=1 fp16s=1 fp16a=0 int8s=1 int8a=1
|
||||
num_threads = 4
|
||||
powersave = 0
|
||||
gpu_device = 0
|
||||
cooling_down = 1
|
||||
squeezenet min = 4.68 max = 4.99 avg = 4.85
|
||||
squeezenet_int8 min = 38.52 max = 66.90 avg = 48.52
|
||||
...
|
||||
```
|
||||
|
||||
To run benchmarks on a CPU, set the 5th argument to `-1`.
|
||||
|
||||
|
||||
***
|
||||
|
||||
### Build for Windows x64 using Visual Studio Community 2017
|
||||
|
||||
Download and Install Visual Studio Community 2017 from https://visualstudio.microsoft.com/vs/community/
|
||||
|
||||
Start the command prompt: `Start → Programs → Visual Studio 2017 → Visual Studio Tools → x64 Native Tools Command Prompt for VS 2017`
|
||||
|
||||
Download protobuf-3.4.0 from https://github.com/google/protobuf/archive/v3.4.0.zip
|
||||
|
||||
Build protobuf library:
|
||||
|
||||
```shell
|
||||
cd <protobuf-root-dir>
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G"NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=%cd%/install -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_MSVC_STATIC_RUNTIME=OFF ../cmake
|
||||
nmake
|
||||
nmake install
|
||||
```
|
||||
(optional) Download and install Vulkan SDK from https://vulkan.lunarg.com/sdk/home
|
||||
|
||||
Build ncnn library (replace <protobuf-root-dir> with a proper path):
|
||||
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -G"NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=%cd%/install -DProtobuf_INCLUDE_DIR=<protobuf-root-dir>/build/install/include -DProtobuf_LIBRARIES=<protobuf-root-dir>/build/install/lib/libprotobuf.lib -DProtobuf_PROTOC_EXECUTABLE=<protobuf-root-dir>/build/install/bin/protoc.exe -DNCNN_VULKAN=ON ..
|
||||
nmake
|
||||
nmake install
|
||||
```
|
||||
|
||||
Note: To speed up compilation process on multi core machines, configuring `cmake` to use `jom` or `ninja` using `-G` flag is recommended.
|
||||
|
||||
***
|
||||
### Build for macOS
|
||||
|
||||
We've published ncnn to [brew](https://formulae.brew.sh/formula/ncnn#default) now, you can just use following method to install ncnn if you have the Xcode Command Line Tools installed.
|
||||
|
||||
```shell
|
||||
brew update
|
||||
brew install ncnn
|
||||
```
|
||||
|
||||
Or if you want to compile and build ncnn locally, first install Xcode or Xcode Command Line Tools according to your needs.
|
||||
|
||||
Then install `protobuf` and `libomp` via homebrew
|
||||
|
||||
```shell
|
||||
brew install protobuf libomp
|
||||
```
|
||||
|
||||
Download and install Vulkan SDK from <https://vulkan.lunarg.com/sdk/home>
|
||||
|
||||
|
||||
```shell
|
||||
wget https://sdk.lunarg.com/sdk/download/1.2.189.0/mac/vulkansdk-macos-1.2.189.0.dmg?Human=true -O vulkansdk-macos-1.2.189.0.dmg
|
||||
hdiutil attach vulkansdk-macos-1.2.189.0.dmg
|
||||
sudo /Volumes/vulkansdk-macos-1.2.189.0/InstallVulkan.app/Contents/MacOS/InstallVulkan --root `pwd`/vulkansdk-macos-1.2.189.0 --accept-licenses --default-answer --confirm-command install
|
||||
hdiutil detach /Volumes/vulkansdk-macos-1.2.189.0
|
||||
|
||||
# setup env
|
||||
export VULKAN_SDK=`pwd`/vulkansdk-macos-1.2.189.0/macOS
|
||||
```
|
||||
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build
|
||||
cd build
|
||||
|
||||
cmake -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" \
|
||||
-DVulkan_INCLUDE_DIR=`pwd`/../vulkansdk-macos-1.2.189.0/MoltenVK/include \
|
||||
-DVulkan_LIBRARY=`pwd`/../vulkansdk-macos-1.2.189.0/MoltenVK/dylib/macOS/libMoltenVK.dylib \
|
||||
-DNCNN_VULKAN=ON -DNCNN_BUILD_EXAMPLES=ON ..
|
||||
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
*Note: If you encounter `libomp` related errors during installation, you can also check our GitHub Actions at [here](https://github.com/Tencent/ncnn/blob/d91cccf/.github/workflows/macos-x64-gpu.yml#L50-L68) to install and use `openmp`.*
|
||||
***
|
||||
|
||||
### Build for ARM Cortex-A family with cross-compiling
|
||||
Download ARM toolchain from https://developer.arm.com/open-source/gnu-toolchain/gnu-a/downloads
|
||||
|
||||
```shell
|
||||
export PATH="<your-toolchain-compiler-path>:${PATH}"
|
||||
```
|
||||
|
||||
Alternatively install a cross-compiler provided by the distribution (i.e. on Debian / Ubuntu, you can do `sudo apt install g++-arm-linux-gnueabi g++-arm-linux-gnueabihf g++-aarch64-linux-gnu`).
|
||||
|
||||
Depending on your needs build one or more of the below targets.
|
||||
|
||||
AArch32 target with soft float (arm-linux-gnueabi)
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build-arm-linux-gnueabi
|
||||
cd build-arm-linux-gnueabi
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabi.toolchain.cmake ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
AArch32 target with hard float (arm-linux-gnueabihf)
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build-arm-linux-gnueabihf
|
||||
cd build-arm-linux-gnueabihf
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/arm-linux-gnueabihf.toolchain.cmake ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
AArch64 GNU/Linux target (aarch64-linux-gnu)
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build-aarch64-linux-gnu
|
||||
cd build-aarch64-linux-gnu
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### Build for Hisilicon platform with cross-compiling
|
||||
Download and install Hisilicon SDK. The toolchain should be in `/opt/hisi-linux/x86-arm`
|
||||
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build
|
||||
cd build
|
||||
|
||||
# Choose one cmake toolchain file depends on your target platform
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv300.toolchain.cmake ..
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/hisiv500.toolchain.cmake ..
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/himix100.toolchain.cmake ..
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/himix200.toolchain.cmake ..
|
||||
|
||||
make -j$(nproc)
|
||||
make install
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### Build for Android
|
||||
You can use the pre-build ncnn-android-lib.zip from https://github.com/Tencent/ncnn/releases
|
||||
|
||||
Download Android NDK from http://developer.android.com/ndk/downloads/index.html and install it, for example:
|
||||
|
||||
```shell
|
||||
unzip android-ndk-r21d-linux-x86_64.zip
|
||||
export ANDROID_NDK=<your-ndk-root-path>
|
||||
```
|
||||
|
||||
(optional) remove the hardcoded debug flag in Android NDK [android-ndk issue](https://github.com/android-ndk/ndk/issues/243)
|
||||
```
|
||||
# open $ANDROID_NDK/build/cmake/android.toolchain.cmake
|
||||
# delete "-g" line
|
||||
list(APPEND ANDROID_COMPILER_FLAGS
|
||||
-g
|
||||
-DANDROID
|
||||
```
|
||||
|
||||
Build armv7 library
|
||||
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build-android-armv7
|
||||
cd build-android-armv7
|
||||
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
||||
-DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON \
|
||||
-DANDROID_PLATFORM=android-14 ..
|
||||
|
||||
# If you want to enable Vulkan, platform api version >= android-24 is needed
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
||||
-DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON \
|
||||
-DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
|
||||
|
||||
make -j$(nproc)
|
||||
make install
|
||||
```
|
||||
|
||||
Pick `build-android-armv7/install` folder for further JNI usage.
|
||||
|
||||
|
||||
Build aarch64 library:
|
||||
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build-android-aarch64
|
||||
cd build-android-aarch64
|
||||
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake"\
|
||||
-DANDROID_ABI="arm64-v8a" \
|
||||
-DANDROID_PLATFORM=android-21 ..
|
||||
|
||||
# If you want to enable Vulkan, platform api version >= android-24 is needed
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
||||
-DANDROID_ABI="arm64-v8a" \
|
||||
-DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
|
||||
|
||||
make -j$(nproc)
|
||||
make install
|
||||
```
|
||||
|
||||
Pick `build-android-aarch64/install` folder for further JNI usage.
|
||||
|
||||
***
|
||||
|
||||
### Build for iOS on macOS with xcode
|
||||
You can use the pre-build ncnn.framework glslang.framework and openmp.framework from https://github.com/Tencent/ncnn/releases
|
||||
|
||||
Install xcode
|
||||
|
||||
You can replace ```-DENABLE_BITCODE=0``` to ```-DENABLE_BITCODE=1``` in the following cmake arguments if you want to build bitcode enabled libraries.
|
||||
|
||||
Download and install openmp for multithreading inference feature on iPhoneOS
|
||||
```shell
|
||||
wget https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.0/openmp-11.0.0.src.tar.xz
|
||||
tar -xf openmp-11.0.0.src.tar.xz
|
||||
cd openmp-11.0.0.src
|
||||
|
||||
# apply some compilation fix
|
||||
sed -i'' -e '/.size __kmp_unnamed_critical_addr/d' runtime/src/z_Linux_asm.S
|
||||
sed -i'' -e 's/__kmp_unnamed_critical_addr/___kmp_unnamed_critical_addr/g' runtime/src/z_Linux_asm.S
|
||||
|
||||
mkdir -p build-ios
|
||||
cd build-ios
|
||||
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=<ncnn-root-dir>/toolchains/ios.toolchain.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install \
|
||||
-DIOS_PLATFORM=OS -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 -DIOS_ARCH="armv7;arm64;arm64e" \
|
||||
-DPERL_EXECUTABLE=/usr/local/bin/perl \
|
||||
-DLIBOMP_ENABLE_SHARED=OFF -DLIBOMP_OMPT_SUPPORT=OFF -DLIBOMP_USE_HWLOC=OFF ..
|
||||
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
|
||||
# copy openmp library and header files to xcode toolchain sysroot
|
||||
# <xcode-dir> is usually /Applications/Xcode.app or /Applications/Xcode-beta.app depends on your Xcode version
|
||||
sudo cp install/include/* <xcode-dir>/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/include
|
||||
sudo cp install/lib/libomp.a <xcode-dir>/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/lib
|
||||
```
|
||||
|
||||
Download and install openmp for multithreading inference feature on iPhoneSimulator
|
||||
```shell
|
||||
wget https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.0/openmp-11.0.0.src.tar.xz
|
||||
tar -xf openmp-11.0.0.src.tar.xz
|
||||
cd openmp-11.0.0.src
|
||||
|
||||
# apply some compilation fix
|
||||
sed -i'' -e '/.size __kmp_unnamed_critical_addr/d' runtime/src/z_Linux_asm.S
|
||||
sed -i'' -e 's/__kmp_unnamed_critical_addr/___kmp_unnamed_critical_addr/g' runtime/src/z_Linux_asm.S
|
||||
|
||||
mkdir -p build-ios-sim
|
||||
cd build-ios-sim
|
||||
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=<ncnn-root-dir>/toolchains/ios.toolchain.cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install \
|
||||
-DIOS_PLATFORM=SIMULATOR -DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 -DIOS_ARCH="i386;x86_64" \
|
||||
-DPERL_EXECUTABLE=/usr/local/bin/perl \
|
||||
-DLIBOMP_ENABLE_SHARED=OFF -DLIBOMP_OMPT_SUPPORT=OFF -DLIBOMP_USE_HWLOC=OFF ..
|
||||
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
|
||||
# copy openmp library and header files to xcode toolchain sysroot
|
||||
# <xcode-dir> is usually /Applications/Xcode.app or /Applications/Xcode-beta.app depends on your Xcode version
|
||||
sudo cp install/include/* <xcode-dir>/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/usr/include
|
||||
sudo cp install/lib/libomp.a <xcode-dir>/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/usr/lib
|
||||
```
|
||||
|
||||
Package openmp framework:
|
||||
```shell
|
||||
cd <openmp-root-dir>
|
||||
|
||||
mkdir -p openmp.framework/Versions/A/Headers
|
||||
mkdir -p openmp.framework/Versions/A/Resources
|
||||
ln -s A openmp.framework/Versions/Current
|
||||
ln -s Versions/Current/Headers openmp.framework/Headers
|
||||
ln -s Versions/Current/Resources openmp.framework/Resources
|
||||
ln -s Versions/Current/openmp openmp.framework/openmp
|
||||
lipo -create build-ios/install/lib/libomp.a build-ios-sim/install/lib/libomp.a -o openmp.framework/Versions/A/openmp
|
||||
cp -r build-ios/install/include/* openmp.framework/Versions/A/Headers/
|
||||
sed -e 's/__NAME__/openmp/g' -e 's/__IDENTIFIER__/org.llvm.openmp/g' -e 's/__VERSION__/11.0/g' <ncnn-root-dir>/Info.plist > openmp.framework/Versions/A/Resources/Info.plist
|
||||
```
|
||||
|
||||
Download and install Vulkan SDK from https://vulkan.lunarg.com/sdk/home
|
||||
```shell
|
||||
wget https://sdk.lunarg.com/sdk/download/1.2.189.0/mac/vulkansdk-macos-1.2.189.0.dmg?Human=true -O vulkansdk-macos-1.2.189.0.dmg
|
||||
hdiutil attach vulkansdk-macos-1.2.189.0.dmg
|
||||
sudo /Volumes/vulkansdk-macos-1.2.189.0/InstallVulkan.app/Contents/MacOS/InstallVulkan --root `pwd`/vulkansdk-macos-1.2.189.0 --accept-licenses --default-answer --confirm-command install
|
||||
hdiutil detach /Volumes/vulkansdk-macos-1.2.189.0
|
||||
|
||||
# setup env
|
||||
export VULKAN_SDK=`pwd`/vulkansdk-macos-1.2.189.0/macOS
|
||||
```
|
||||
|
||||
Build library for iPhoneOS:
|
||||
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
git submodule update --init
|
||||
mkdir -p build-ios
|
||||
cd build-ios
|
||||
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DIOS_PLATFORM=OS -DIOS_ARCH="armv7;arm64;arm64e" \
|
||||
-DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 \
|
||||
-DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
|
||||
-DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
|
||||
-DOpenMP_libomp_LIBRARY="/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/lib/libomp.a" \
|
||||
-DNCNN_BUILD_BENCHMARK=OFF ..
|
||||
|
||||
# vulkan is only available on arm64 devices
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DIOS_PLATFORM=OS64 -DIOS_ARCH="arm64;arm64e" \
|
||||
-DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 \
|
||||
-DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
|
||||
-DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
|
||||
-DOpenMP_libomp_LIBRARY="/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk/usr/lib/libomp.a" \
|
||||
-DVulkan_INCLUDE_DIR=$VULKAN_SDK/../MoltenVK/include \
|
||||
-DVulkan_LIBRARY=$VULKAN_SDK/../MoltenVK/dylib/iOS/libMoltenVK.dylib \
|
||||
-DNCNN_VULKAN=ON -DNCNN_BUILD_BENCHMARK=OFF ..
|
||||
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
Build library for iPhoneSimulator:
|
||||
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
mkdir -p build-ios-sim
|
||||
cd build-ios-sim
|
||||
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/ios.toolchain.cmake -DIOS_PLATFORM=SIMULATOR -DIOS_ARCH="i386;x86_64" \
|
||||
-DENABLE_BITCODE=0 -DENABLE_ARC=0 -DENABLE_VISIBILITY=0 \
|
||||
-DOpenMP_C_FLAGS="-Xclang -fopenmp" -DOpenMP_CXX_FLAGS="-Xclang -fopenmp" \
|
||||
-DOpenMP_C_LIB_NAMES="libomp" -DOpenMP_CXX_LIB_NAMES="libomp" \
|
||||
-DOpenMP_libomp_LIBRARY="/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/usr/lib/libomp.a" \
|
||||
-DNCNN_BUILD_BENCHMARK=OFF ..
|
||||
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
Package glslang framework:
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
|
||||
mkdir -p glslang.framework/Versions/A/Headers
|
||||
mkdir -p glslang.framework/Versions/A/Resources
|
||||
ln -s A glslang.framework/Versions/Current
|
||||
ln -s Versions/Current/Headers glslang.framework/Headers
|
||||
ln -s Versions/Current/Resources glslang.framework/Resources
|
||||
ln -s Versions/Current/glslang glslang.framework/glslang
|
||||
libtool -static build-ios/install/lib/libglslang.a build-ios/install/lib/libSPIRV.a build-ios/install/lib/libOGLCompiler.a build-ios/install/lib/libOSDependent.a -o build-ios/install/lib/libglslang_combined.a
|
||||
libtool -static build-ios-sim/install/lib/libglslang.a build-ios-sim/install/lib/libSPIRV.a build-ios-sim/install/lib/libOGLCompiler.a build-ios-sim/install/lib/libOSDependent.a -o build-ios-sim/install/lib/libglslang_combined.a
|
||||
lipo -create build-ios/install/lib/libglslang_combined.a build-ios-sim/install/lib/libglslang_combined.a -o glslang.framework/Versions/A/glslang
|
||||
cp -r build/install/include/glslang glslang.framework/Versions/A/Headers/
|
||||
sed -e 's/__NAME__/glslang/g' -e 's/__IDENTIFIER__/org.khronos.glslang/g' -e 's/__VERSION__/1.0/g' Info.plist > glslang.framework/Versions/A/Resources/Info.plist
|
||||
```
|
||||
|
||||
Package ncnn framework:
|
||||
```shell
|
||||
cd <ncnn-root-dir>
|
||||
|
||||
mkdir -p ncnn.framework/Versions/A/Headers
|
||||
mkdir -p ncnn.framework/Versions/A/Resources
|
||||
ln -s A ncnn.framework/Versions/Current
|
||||
ln -s Versions/Current/Headers ncnn.framework/Headers
|
||||
ln -s Versions/Current/Resources ncnn.framework/Resources
|
||||
ln -s Versions/Current/ncnn ncnn.framework/ncnn
|
||||
lipo -create build-ios/install/lib/libncnn.a build-ios-sim/install/lib/libncnn.a -o ncnn.framework/Versions/A/ncnn
|
||||
cp -r build-ios/install/include/* ncnn.framework/Versions/A/Headers/
|
||||
sed -e 's/__NAME__/ncnn/g' -e 's/__IDENTIFIER__/com.tencent.ncnn/g' -e 's/__VERSION__/1.0/g' Info.plist > ncnn.framework/Versions/A/Resources/Info.plist
|
||||
```
|
||||
|
||||
Pick `ncnn.framework` `glslang.framework` and `openmp.framework` folder for app development.
|
||||
|
||||
***
|
||||
|
||||
### Build for WebAssembly
|
||||
|
||||
Install Emscripten
|
||||
|
||||
```shell
|
||||
git clone https://github.com/emscripten-core/emsdk.git
|
||||
cd emsdk
|
||||
./emsdk install 2.0.8
|
||||
./emsdk activate 2.0.8
|
||||
|
||||
source emsdk/emsdk_env.sh
|
||||
```
|
||||
|
||||
Build without any extension for general compatibility:
|
||||
```shell
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../emsdk/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake \
|
||||
-DNCNN_THREADS=OFF -DNCNN_OPENMP=OFF -DNCNN_SIMPLEOMP=OFF -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF \
|
||||
-DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF ..
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
Build with WASM SIMD extension:
|
||||
```shell
|
||||
mkdir -p build-simd
|
||||
cd build-simd
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../emsdk/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake \
|
||||
-DNCNN_THREADS=OFF -DNCNN_OPENMP=OFF -DNCNN_SIMPLEOMP=OFF -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=ON -DNCNN_AVX2=OFF -DNCNN_AVX=OFF \
|
||||
-DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF ..
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
Build with WASM Thread extension:
|
||||
```shell
|
||||
mkdir -p build-threads
|
||||
cd build-threads
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../emsdk/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake \
|
||||
-DNCNN_THREADS=ON -DNCNN_OPENMP=ON -DNCNN_SIMPLEOMP=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=OFF -DNCNN_AVX2=OFF -DNCNN_AVX=OFF \
|
||||
-DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF ..
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
Build with WASM SIMD and Thread extension:
|
||||
```shell
|
||||
mkdir -p build-simd-threads
|
||||
cd build-simd-threads
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../emsdk/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake \
|
||||
-DNCNN_THREADS=ON -DNCNN_OPENMP=ON -DNCNN_SIMPLEOMP=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_SSE2=ON -DNCNN_AVX2=OFF -DNCNN_AVX=OFF \
|
||||
-DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_BENCHMARK=OFF ..
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
Pick `build-XYZ/install` folder for further usage.
|
||||
|
||||
***
|
||||
|
||||
### Build for AllWinner D1
|
||||
|
||||
Download c906 toolchain package from https://occ.t-head.cn/community/download?id=3913221581316624384
|
||||
|
||||
```shell
|
||||
tar -xf riscv64-linux-x86_64-20210512.tar.gz
|
||||
export RISCV_ROOT_PATH=/home/nihui/osd/riscv64-linux-x86_64-20210512
|
||||
```
|
||||
|
||||
Build ncnn with riscv-v vector and simpleocv enabled:
|
||||
```shell
|
||||
mkdir -p build-c906
|
||||
cd build-c906
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/c906.toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE=relwithdebinfo -DNCNN_OPENMP=OFF -DNCNN_THREADS=OFF -DNCNN_RUNTIME_CPU=OFF -DNCNN_RVV=ON \
|
||||
-DNCNN_SIMPLEOCV=ON -DNCNN_BUILD_EXAMPLES=ON ..
|
||||
cmake --build . -j 4
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
Pick `build-c906/install` folder for further usage.
|
||||
|
||||
You can upload binary inside `build-c906/examples` folder and run on D1 board for testing.
|
||||
|
||||
***
|
||||
|
||||
### Build for Loongson 2K1000
|
||||
|
||||
For gcc version < 8.5, you need to fix msa.h header for workaround msa fmadd/fmsub/maddv/msubv bug.
|
||||
|
||||
Open ```/usr/lib/gcc/mips64el-linux-gnuabi64/8/include/msa.h```, find ```__msa_fmadd``` and ```__msa_fmsub``` and apply changes as the following
|
||||
```c
|
||||
// #define __msa_fmadd_w __builtin_msa_fmadd_w
|
||||
// #define __msa_fmadd_d __builtin_msa_fmadd_d
|
||||
// #define __msa_fmsub_w __builtin_msa_fmsub_w
|
||||
// #define __msa_fmsub_d __builtin_msa_fmsub_d
|
||||
#define __msa_fmadd_w(a, b, c) __builtin_msa_fmadd_w(c, b, a)
|
||||
#define __msa_fmadd_d(a, b, c) __builtin_msa_fmadd_d(c, b, a)
|
||||
#define __msa_fmsub_w(a, b, c) __builtin_msa_fmsub_w(c, b, a)
|
||||
#define __msa_fmsub_d(a, b, c) __builtin_msa_fmsub_d(c, b, a)
|
||||
```
|
||||
|
||||
find ```__msa_maddv``` and ```__msa_msubv``` and apply changes as the following
|
||||
```c
|
||||
// #define __msa_maddv_b __builtin_msa_maddv_b
|
||||
// #define __msa_maddv_h __builtin_msa_maddv_h
|
||||
// #define __msa_maddv_w __builtin_msa_maddv_w
|
||||
// #define __msa_maddv_d __builtin_msa_maddv_d
|
||||
// #define __msa_msubv_b __builtin_msa_msubv_b
|
||||
// #define __msa_msubv_h __builtin_msa_msubv_h
|
||||
// #define __msa_msubv_w __builtin_msa_msubv_w
|
||||
// #define __msa_msubv_d __builtin_msa_msubv_d
|
||||
#define __msa_maddv_b(a, b, c) __builtin_msa_maddv_b(c, b, a)
|
||||
#define __msa_maddv_h(a, b, c) __builtin_msa_maddv_h(c, b, a)
|
||||
#define __msa_maddv_w(a, b, c) __builtin_msa_maddv_w(c, b, a)
|
||||
#define __msa_maddv_d(a, b, c) __builtin_msa_maddv_d(c, b, a)
|
||||
#define __msa_msubv_b(a, b, c) __builtin_msa_msubv_b(c, b, a)
|
||||
#define __msa_msubv_h(a, b, c) __builtin_msa_msubv_h(c, b, a)
|
||||
#define __msa_msubv_w(a, b, c) __builtin_msa_msubv_w(c, b, a)
|
||||
#define __msa_msubv_d(a, b, c) __builtin_msa_msubv_d(c, b, a)
|
||||
```
|
||||
|
||||
Build ncnn with mips msa and simpleocv enabled:
|
||||
```shell
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -DNCNN_DISABLE_RTTI=ON -DNCNN_DISABLE_EXCEPTION=ON -DNCNN_RUNTIME_CPU=OFF -DNCNN_MSA=ON -DNCNN_MMI=ON -DNCNN_SIMPLEOCV=ON ..
|
||||
cmake --build . -j 2
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
Pick `build/install` folder for further usage.
|
||||
|
||||
You can run binary inside `build/examples` folder for testing.
|
||||
|
||||
***
|
||||
|
||||
### Build for Termux on Android
|
||||
|
||||
Install app Termux on your phone,and install Ubuntu in Termux.
|
||||
|
||||
If you want use ssh, just install openssh in Termux
|
||||
|
||||
```
|
||||
pkg install proot-distro
|
||||
proot-distro install ubuntu
|
||||
```
|
||||
|
||||
or you can see what system can be installed using `proot-distro list`
|
||||
|
||||
while you install ubuntu successfully, using `proot-distro login ubuntu` to login Ubuntu.
|
||||
|
||||
Then make ncnn,no need to install any other dependencies.
|
||||
|
||||
```
|
||||
git clone https://github.com/Tencent/ncnn.git
|
||||
cd ncnn
|
||||
git submodule update --init
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DNCNN_BUILD_EXAMPLES=ON -DNCNN_PLATFORM_API=OFF -DNCNN_SIMPLEOCV=ON ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
Then you can run a test
|
||||
|
||||
> on my Pixel 3 XL using Qualcomm 845,cant load `256-ncnn.png`
|
||||
|
||||
```
|
||||
cd ../examples
|
||||
../build/examples/squeezenet ../images/128-ncnn.png
|
||||
```
|
||||
|
172
3rdparty/ncnn/docs/how-to-use-and-FAQ/FAQ-ncnn-produce-wrong-result.md
vendored
Normal file
172
3rdparty/ncnn/docs/how-to-use-and-FAQ/FAQ-ncnn-produce-wrong-result.md
vendored
Normal file
@ -0,0 +1,172 @@
|
||||
### caffemodel should be row-major
|
||||
|
||||
`caffe2ncnn` tool assumes the caffemodel is row-major (produced by c++ caffe train command).
|
||||
|
||||
The kernel 3x3 weights should be stored as
|
||||
```
|
||||
a b c
|
||||
d e f
|
||||
g h i
|
||||
```
|
||||
|
||||
However, matlab caffe produced col-major caffemodel.
|
||||
|
||||
You have to transpose all the kernel weights by yourself or re-training using c++ caffe train command.
|
||||
|
||||
Besides, you may interest in https://github.com/conanhujinming/matcaffe2caffe
|
||||
|
||||
### check input is RGB or BGR
|
||||
|
||||
If your caffemodel is trained using c++ caffe and opencv, then the input image should be BGR order.
|
||||
|
||||
If your model is trained using matlab caffe or pytorch or mxnet or tensorflow, the input image would probably be RGB order.
|
||||
|
||||
The channel order can be changed on-the-fly through proper pixel type enum
|
||||
```
|
||||
// construct RGB blob from rgb image
|
||||
ncnn::Mat in_rgb = ncnn::Mat::from_pixels(rgb_data, ncnn::Mat::PIXEL_RGB, w, h);
|
||||
|
||||
// construct BGR blob from bgr image
|
||||
ncnn::Mat in_bgr = ncnn::Mat::from_pixels(bgr_data, ncnn::Mat::PIXEL_BGR, w, h);
|
||||
|
||||
// construct BGR blob from rgb image
|
||||
ncnn::Mat in_bgr = ncnn::Mat::from_pixels(rgb_data, ncnn::Mat::PIXEL_RGB2BGR, w, h);
|
||||
|
||||
// construct RGB blob from bgr image
|
||||
ncnn::Mat in_rgb = ncnn::Mat::from_pixels(bgr_data, ncnn::Mat::PIXEL_BGR2RGB, w, h);
|
||||
```
|
||||
|
||||
|
||||
### image decoding
|
||||
|
||||
JPEG(`.jpg`,`.jpeg`) is loss compression, people may get different pixel value for same image on same position.
|
||||
|
||||
`.bmp` images are recommended instead.
|
||||
|
||||
### interpolation / resizing
|
||||
|
||||
There are several image resizing methods, which may generate different result for same input image.
|
||||
|
||||
Even we specify same interpolation method, different frameworks/libraries and their various versions may also introduce difference.
|
||||
|
||||
A good practice is feed same size image as the input layer expected, e.g. read a 224x244 bmp image when input layer need 224x224 size.
|
||||
|
||||
|
||||
### Mat::from_pixels/from_pixels_resize assume that the pixel data is continuous
|
||||
|
||||
You shall pass continuous pixel buffer to from_pixels family.
|
||||
|
||||
If your image is an opencv submat from an image roi, call clone() to get a continuous one.
|
||||
```
|
||||
cv::Mat image;// the image
|
||||
cv::Rect facerect;// the face rectangle
|
||||
|
||||
cv::Mat faceimage = image(facerect).clone();// get a continuous sub image
|
||||
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels(faceimage.data, ncnn::Mat::PIXEL_BGR, faceimage.cols, faceimage.rows);
|
||||
```
|
||||
|
||||
### pre process
|
||||
Apply pre process according to your training configuration
|
||||
|
||||
Different model has different pre process config, you may find the following transform config in Data layer section
|
||||
```
|
||||
transform_param {
|
||||
mean_value: 103.94
|
||||
mean_value: 116.78
|
||||
mean_value: 123.68
|
||||
scale: 0.017
|
||||
}
|
||||
```
|
||||
Then the corresponding code for ncnn pre process is
|
||||
```cpp
|
||||
const float mean_vals[3] = { 103.94f, 116.78f, 123.68f };
|
||||
const float norm_vals[3] = { 0.017f, 0.017f, 0.017f };
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
```
|
||||
|
||||
Mean file is not supported currently
|
||||
|
||||
So you have to pre process the input data by yourself (use opencv or something)
|
||||
```
|
||||
transform_param {
|
||||
mean_file: "imagenet_mean.binaryproto"
|
||||
}
|
||||
```
|
||||
|
||||
For pytorch or mxnet-gluon
|
||||
```python
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
```
|
||||
Then the corresponding code for ncnn pre process is
|
||||
```cpp
|
||||
// R' = (R / 255 - 0.485) / 0.229 = (R - 0.485 * 255) / 0.229 / 255
|
||||
// G' = (G / 255 - 0.456) / 0.224 = (G - 0.456 * 255) / 0.224 / 255
|
||||
// B' = (B / 255 - 0.406) / 0.225 = (B - 0.406 * 255) / 0.225 / 255
|
||||
const float mean_vals[3] = {0.485f*255.f, 0.456f*255.f, 0.406f*255.f};
|
||||
const float norm_vals[3] = {1/0.229f/255.f, 1/0.224f/255.f, 1/0.225f/255.f};
|
||||
in.substract_mean_normalize(mean_vals, norm_vals);
|
||||
```
|
||||
|
||||
### use the desired blob
|
||||
The blob names for input and extract are differ among models.
|
||||
|
||||
For example, squeezenet v1.1 use "data" as input blob and "prob" as output blob while mobilenet-ssd use "data" as input blob and "detection_out" as output blob.
|
||||
|
||||
Some models may need multiple input or produce multiple output.
|
||||
|
||||
```cpp
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
|
||||
ex.input("data", in);// change "data" to yours
|
||||
ex.input("mask", mask);// change "mask" to yours
|
||||
|
||||
ex.extract("output1", out1);// change "output1" to yours
|
||||
ex.extract("output2", out2);// change "output2" to yours
|
||||
```
|
||||
|
||||
### blob may have channel gap
|
||||
Each channel pointer is aligned by 128bit in ncnn Mat structure.
|
||||
|
||||
blob may have gaps between channels if (width x height) can not divided exactly by 4
|
||||
|
||||
Prefer using ncnn::Mat::from_pixels or ncnn::Mat::from_pixels_resize for constructing input blob from image data
|
||||
|
||||
If you do need a continuous blob buffer, reshape the output.
|
||||
```cpp
|
||||
// out is the output blob extracted
|
||||
ncnn::Mat flattened_out = out.reshape(out.w * out.h * out.c);
|
||||
|
||||
// plain array, C-H-W
|
||||
const float* outptr = flattened_out;
|
||||
```
|
||||
|
||||
### create new Extractor for each image
|
||||
The `ncnn::Extractor` object is stateful, if you reuse for different input, you will always get exact the same result cached inside.
|
||||
|
||||
Always create new Extractor to process images in loop unless you do know how the stateful Extractor works.
|
||||
```cpp
|
||||
for (int i=0; i<count; i++)
|
||||
{
|
||||
// always create Extractor
|
||||
// it's cheap and almost instantly !
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
|
||||
// use
|
||||
ex.input(your_data[i]);
|
||||
}
|
||||
```
|
||||
|
||||
### use proper loading api
|
||||
|
||||
If you want to load plain param file buffer, you shall use Net::load_param_mem instead of Net::load_param.
|
||||
|
||||
For more information about the ncnn model load api, see [ncnn-load-model](ncnn-load-model)
|
||||
|
||||
```cpp
|
||||
ncnn::Net net;
|
||||
|
||||
// param_buffer is the content buffe of XYZ.param file
|
||||
net.load_param_mem(param_buffer);
|
||||
```
|
73
3rdparty/ncnn/docs/how-to-use-and-FAQ/FAQ-ncnn-protobuf-problem.zh.md
vendored
Normal file
73
3rdparty/ncnn/docs/how-to-use-and-FAQ/FAQ-ncnn-protobuf-problem.zh.md
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
### Linux 编译 `caffe2ncnn` 时报 `Protobuf not found`
|
||||
|
||||
一般是因为 protobuf 未安装或环境变量未设置
|
||||
|
||||
1. 安装 protobuf
|
||||
|
||||
Ubuntu 系统尝试以下命令
|
||||
> sudo apt-get install libprotobuf-dev protobuf-compiler
|
||||
|
||||
CentOS 尝试
|
||||
> sudo yum install protobuf-devel.x86_64 protobuf-compiler.x86_64
|
||||
|
||||
2. 然后设置 C++ 环境
|
||||
打开`~/.bashrc`,在末尾增加
|
||||
> export LD_LIBRARY_PATH=${YOUR_PROTOBUF_LIB_PATH}:$LD_LIBRARY_PATH
|
||||
|
||||
3. 让配置生效
|
||||
> source ~/.bashrc
|
||||
|
||||
|
||||
### 编译 `caffe2ncnn` 时报 protoc 和 protobuf.so 版本不匹配
|
||||
|
||||
一般是因为系统安装了不止一个 protobuf。
|
||||
|
||||
#### 直接改链接路径
|
||||
1. 先看 protoc 需要的 so 版本号
|
||||
> ldd \`whereis protoc| awk '{print $2}'\` | grep libprotobuf.so
|
||||
|
||||
例如是 libprotobuf.so.10
|
||||
|
||||
2. 然后搜这个文件所在的路径
|
||||
> cd / && find . -type f | grep libprotobuf.so.10
|
||||
|
||||
假设在`/home/user/mydir`
|
||||
|
||||
3. 设置 protobuf.so 的搜索目录
|
||||
打开`~/.bashrc`,在末尾增加
|
||||
> export LD_LIBRARY_PATH=/home/user/mydir:$LD_LIBRARY_PATH
|
||||
|
||||
4. 让配置生效
|
||||
> source ~/.bashrc
|
||||
|
||||
#### 如果以上办法不行的话,尝试源码安装 protobuf
|
||||
|
||||
1. 首先在 [protobuf/releases](https://github.com/protocolbuffers/protobuf/releases/tag/v3.10.0) 下载所需的 pb 版本,例如需要 v3.10.0 。注意要下载 -cpp 后缀的压缩包。
|
||||
|
||||
2. 解压到某一目录,然后编译
|
||||
> tar xvf protobuf-cpp-3.10.0.tar.gz && cd protobuf-3.10.0/
|
||||
./configure --prefix=/your_install_dir && make -j 3 && make install
|
||||
|
||||
3. **不不不要**忽略`--prefix`直接安装到系统目录,源码编译好的 so 和头文件在`your_install_dir`里
|
||||
|
||||
4. 设置 protobuf.so 的搜索目录
|
||||
打开`~/.bashrc`,在末尾增加
|
||||
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=/your_install_dir/lib:$LD_LIBRARY_PATH
|
||||
export CPLUS_INCLUDE_PATH=/your_install_dir/include:$CPLUS_INCLUDE_PATH
|
||||
```
|
||||
|
||||
5. 让配置生效
|
||||
> source ~/.bashrc
|
||||
|
||||
#### 如果以上办法还不行
|
||||
尝试删除已有protobuf(注意不要删到系统自带的,新手请谨慎),然后用以下命令重装所需的 so
|
||||
> sudo apt-get install --reinstall libprotobuf8
|
||||
|
||||
版本号需改为自己的版本号
|
||||
|
||||
### Windows 出现此类问题,基本思路也是 IDE 改环境变量
|
||||
|
||||
### 行走江湖必备
|
||||
关于环境变量设置、工具和技巧,强烈建议学习下 https://missing.csail.mit.edu/
|
129
3rdparty/ncnn/docs/how-to-use-and-FAQ/FAQ-ncnn-throw-error.md
vendored
Normal file
129
3rdparty/ncnn/docs/how-to-use-and-FAQ/FAQ-ncnn-throw-error.md
vendored
Normal file
@ -0,0 +1,129 @@
|
||||
### param is too old, please regenerate
|
||||
|
||||
Your model file is being the old format converted by an old caffe2ncnn tool.
|
||||
|
||||
Checkout the latest ncnn code, build it and regenerate param and model binary files, and that should work.
|
||||
|
||||
Make sure that your param file starts with the magic number 7767517.
|
||||
|
||||
you may find more info on [use-ncnn-with-alexnet](use-ncnn-with-alexnet)
|
||||
|
||||
### find_blob_index_by_name XYZ failed
|
||||
|
||||
That means ncnn couldn't find the XYZ blob in the network.
|
||||
|
||||
You shall call Extractor::input()/extract() by blob name instead of layer name.
|
||||
|
||||
For models loaded from binary param file or external memory, you shall call Extractor::input()/extract() by the enum defined in xxx.id.h because all the visible string literals have been stripped in binary form.
|
||||
|
||||
This error usually happens when the input layer is not properly converted.
|
||||
|
||||
You shall upgrade caffe prototxt/caffemodel before converting it to ncnn. Following snippet type shall be ok.
|
||||
|
||||
```
|
||||
layer {
|
||||
name: "data"
|
||||
type: "Input"
|
||||
top: "data"
|
||||
input_param { shape: { dim: 1 dim: 3 dim: 227 dim: 227 } }
|
||||
}
|
||||
```
|
||||
|
||||
you may find more info on [use-ncnn-with-alexnet](use-ncnn-with-alexnet).
|
||||
|
||||
### layer XYZ not exists or registered
|
||||
|
||||
Your network contains some operations that are not implemented in ncnn.
|
||||
|
||||
You may implement them as custom layer followed in [how-to-implement-custom-layer-step-by-step](how-to-implement-custom-layer-step-by-step).
|
||||
|
||||
Or you could simply register them as no-op if you are sure those operations make no sense.
|
||||
|
||||
```cpp
|
||||
class Noop : public ncnn::Layer {};
|
||||
DEFINE_LAYER_CREATOR(Noop)
|
||||
|
||||
net.register_custom_layer("LinearRegressionOutput", Noop_layer_creator);
|
||||
net.register_custom_layer("MAERegressionOutput", Noop_layer_creator);
|
||||
```
|
||||
|
||||
### fopen XYZ.param/XYZ.bin failed
|
||||
|
||||
File not found or not readable. Make sure that XYZ.param/XYZ.bin is accessible.
|
||||
|
||||
### network graph not ready
|
||||
|
||||
You shall call Net::load_param() first, then Net::load_model().
|
||||
|
||||
This error may also happens when Net::load_param() failed, but not properly handled.
|
||||
|
||||
For more information about the ncnn model load api, see [ncnn-load-model](ncnn-load-model)
|
||||
|
||||
### memory not 32-bit aligned at XYZ
|
||||
|
||||
The pointer passed to Net::load_param() or Net::load_model() is not 32bit aligned.
|
||||
|
||||
In practice, the head pointer of std::vector<unsigned char> is not guaranteed to be 32bit aligned.
|
||||
|
||||
you can store your binary buffer in ncnn::Mat structure, its internal memory is aligned.
|
||||
|
||||
### undefined reference to '__kmpc_XYZ_XYZ'
|
||||
|
||||
use clang for building android shared library
|
||||
|
||||
comment the following line in your Application.mk
|
||||
```
|
||||
NDK_TOOLCHAIN_VERSION := 4.9
|
||||
```
|
||||
|
||||
### crash on android with '__kmp_abort_process'
|
||||
|
||||
This usually happens if you bundle multiple shared library with openmp linked
|
||||
|
||||
It is actually an issue of the android ndk https://github.com/android/ndk/issues/1028
|
||||
|
||||
On old android ndk, modify the link flags as
|
||||
|
||||
```
|
||||
-Wl,-Bstatic -lomp -Wl,-Bdynamic
|
||||
```
|
||||
|
||||
For recent ndk >= 21
|
||||
|
||||
```
|
||||
-fstatic-openmp
|
||||
```
|
||||
|
||||
### dlopen failed: library "libomp.so" not found
|
||||
|
||||
Newer android ndk defaults to dynamic openmp runtime
|
||||
|
||||
modify the link flags as
|
||||
|
||||
```
|
||||
-fstatic-openmp -fopenmp
|
||||
```
|
||||
|
||||
### crash when freeing a ncnn dynamic library(*.dll/*.so) built with openMP
|
||||
|
||||
for optimal performance, the openmp threadpool spin waits for about a second prior to shutting down in case more work becomes available.
|
||||
|
||||
If you unload a dynamic library that's in the process of spin-waiting, it will crash in the manner you see (most of the time).
|
||||
|
||||
Just set OMP_WAIT_POLICY=passive in your environment, before calling loadlibrary. or Just wait a few seconds before calling freelibrary.
|
||||
|
||||
You can also use the following method to set environment variables in your code:
|
||||
|
||||
for msvc++:
|
||||
|
||||
```
|
||||
SetEnvironmentVariable(_T("OMP_WAIT_POLICY"), _T("passive"));
|
||||
```
|
||||
|
||||
for g++:
|
||||
|
||||
```
|
||||
setenv("OMP_WAIT_POLICY", "passive", 1)
|
||||
```
|
||||
|
||||
reference: https://stackoverflow.com/questions/34439956/vc-crash-when-freeing-a-dll-built-with-openmp
|
124
3rdparty/ncnn/docs/how-to-use-and-FAQ/FAQ-ncnn-vulkan.md
vendored
Normal file
124
3rdparty/ncnn/docs/how-to-use-and-FAQ/FAQ-ncnn-vulkan.md
vendored
Normal file
@ -0,0 +1,124 @@
|
||||
### how to enable ncnn vulkan capability
|
||||
|
||||
follow [the build and install instruction](https://github.com/Tencent/ncnn/blob/master/docs/how-to-build/how-to-build.md)
|
||||
|
||||
make sure you have installed vulkan sdk from [lunarg vulkan sdk website](https://vulkan.lunarg.com/sdk/home)
|
||||
|
||||
Usually, you can enable the vulkan compute inference feature by adding only one line of code to your application.
|
||||
|
||||
```cpp
|
||||
// enable vulkan compute feature before loading
|
||||
ncnn::Net net;
|
||||
net.opt.use_vulkan_compute = 1;
|
||||
```
|
||||
|
||||
### does my graphics device support vulkan
|
||||
|
||||
Some platforms have been tested and known working. In theory, if your platform support vulkan api, either 1.0 or 1.1, it shall work.
|
||||
|
||||
* Y = known work
|
||||
* ? = shall work, not confirmed
|
||||
* / = not applied
|
||||
|
||||
| |windows|linux|android|mac|ios|
|
||||
|---|---|---|---|---|---|
|
||||
|intel|Y|Y|?|?|/|
|
||||
|amd|Y|Y|/|?|/|
|
||||
|nvidia|Y|Y|?|/|/|
|
||||
|qcom|/|/|Y|/|/|
|
||||
|apple|/|/|/|Y|Y|
|
||||
|arm|/|?|Y|/|/|
|
||||
|
||||
You can search [the vulkan database](https://vulkan.gpuinfo.org) to see if your device supports vulkan.
|
||||
|
||||
Some old buggy drivers may produce wrong result, that are blacklisted in ncnn and treated as non-vulkan capable device.
|
||||
You could check if your device and driver have this issue with [my conformance test here](vulkan-conformance-test).
|
||||
Most of these systems are android with version lower than 8.1.
|
||||
|
||||
### why using vulkan over cuda/opencl/metal
|
||||
|
||||
In the beginning, I had no GPGPU programming experience, and I had to learn one.
|
||||
|
||||
vulkan is considered more portable and well supported by venders and the cross-platform low-overhead graphics api. As a contrast, cuda is only available on nvidia device, metal is only available on macos and ios, while loading opencl library is banned in android 7.0+ and does not work on ios.
|
||||
|
||||
### I got errors like "vkCreateComputePipelines failed -1000012000" or random stalls or crashes
|
||||
|
||||
Upgrade your vulkan driver.
|
||||
|
||||
[intel https://downloadcenter.intel.com/product/80939/Graphics-Drivers](https://downloadcenter.intel.com/product/80939/Graphics-Drivers)
|
||||
|
||||
[amd https://www.amd.com/en/support](https://www.amd.com/en/support)
|
||||
|
||||
[nvidia https://www.nvidia.com/Download/index.aspx](https://www.nvidia.com/Download/index.aspx)
|
||||
|
||||
### how to use ncnn vulkan on android
|
||||
|
||||
minimum android ndk version: android-ndk-r18b
|
||||
|
||||
minimum sdk platform api version: android-24
|
||||
|
||||
link your jni project with libvulkan.so
|
||||
|
||||
[The squeezencnn example](https://github.com/Tencent/ncnn/tree/master/examples/squeezencnn) have equipped gpu inference, you could take it as reference.
|
||||
|
||||
### how to use ncnn vulkan on ios
|
||||
|
||||
setup vulkan sdk (https://vulkan.lunarg.com/sdk/home#mac)
|
||||
|
||||
metal only works on real device with arm64 cpu (iPhone 5s and later)
|
||||
|
||||
link your project with MoltenVK framework and Metal
|
||||
|
||||
### what about the layers without vulkan support
|
||||
|
||||
These layers have vulkan support currently
|
||||
|
||||
AbsVal, BatchNorm, BinaryOp, Cast, Clip, Concat, Convolution, ConvolutionDepthWise, Crop, Deconvolution, DeconvolutionDepthWise, Dropout, Eltwise, Flatten, HardSigmoid, InnerProduct, Interp, LRN, Packing, Padding, Permute, Pooling(pad SAME not supported), PReLU, PriorBox, ReLU, Reorg, Reshape, Scale, ShuffleChannel, Sigmoid, Softmax, TanH, UnaryOp
|
||||
|
||||
For these layers without vulkan support, ncnn inference engine will automatically fallback to cpu path.
|
||||
|
||||
Thus, it is usually not a serious issue if your network only has some special head layers like SSD or YOLO. All examples in ncnn are known working properly with vulkan enabled.
|
||||
|
||||
### my model runs slower on gpu than cpu
|
||||
|
||||
The current vulkan inference implementation is far from the preferred state. Many handful optimization techniques are planned, such as winograd convolution, operator fusion, fp16 storage and arithmetic etc.
|
||||
|
||||
It is common that your model runs slower on gpu than cpu on arm devices like mobile phones, since we have quite good arm optimization in ncnn ;)
|
||||
|
||||
### vulkan device not found / extra high cpu utility while vulkan is enabled on nvidia gpu
|
||||
|
||||
There are severel reasons could lead to this outcome. First please check your driver status with `nvidia-smi`. If you have correctly installed your driver, you should see something like this:
|
||||
|
||||
```bash
|
||||
$ nvidia-smi
|
||||
Sat Mar 06 19:53:16 2021
|
||||
+-----------------------------------------------------------------------------+
|
||||
| NVIDIA-SMI 451.48 Driver Version: 451.48 CUDA Version: 11.0 |
|
||||
|-------------------------------+----------------------+----------------------+
|
||||
| GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC |
|
||||
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|
||||
|===============================+======================+======================|
|
||||
| 0 GeForce GTX 1060 WDDM | 00000000:02:00.0 Off | N/A |
|
||||
| N/A 31C P8 5W / N/A | 90MiB / 6144MiB | 0% Default |
|
||||
+-------------------------------+----------------------+----------------------+
|
||||
|
||||
+-----------------------------------------------------------------------------+
|
||||
| Processes: |
|
||||
| GPU GI CI PID Type Process name GPU Memory |
|
||||
| ID ID Usage |
|
||||
|=============================================================================|
|
||||
| No running processes found |
|
||||
+-----------------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
If `nvidia-smi` crashes or cannot be found, please reinstall your graphics driver.
|
||||
|
||||
If ncnn *is* utilizing the Tesla GPU, you can see your program in the `Processes` block at the bottom. In that case, it's likely some operators are not yet supported in Vulkan, and have fallbacked to the CPU, thus leading to a low utilization of the GPU.
|
||||
|
||||
If you *couldn't* find your process running, plase check the active driver model, which can be found to the right of your device name. For Geforce and Titan GPUs, the default driver model is WDDM (Windows Desktop Driver Model), which supports both rendering graphics as well as computing. But for Tesla GPUs, without configuration, the driver model is defualted to TCC ([Tesla Computing Cluster](https://docs.nvidia.com/gameworks/content/developertools/desktop/tesla_compute_cluster.htm)). NVIDIA's TCC driver does not support Vulkan, so you need to use the following command to set the driver model back to WDDM, to use Vulkan:
|
||||
|
||||
```bash
|
||||
$ nvidia-smi -g 0 -dm 0
|
||||
```
|
||||
|
||||
The number following `-g` is the GPU ID (which can be found to the left of your device name in `nvidia-smi` output); and `-dm` stands for driver model, 0 refers to WDDM and 1 means TCC.
|
136
3rdparty/ncnn/docs/how-to-use-and-FAQ/build-minimal-library.md
vendored
Normal file
136
3rdparty/ncnn/docs/how-to-use-and-FAQ/build-minimal-library.md
vendored
Normal file
@ -0,0 +1,136 @@
|
||||
For some reason, if you're not happy with the binary size of the ncnn library, then here is the cheatsheet that helps you to build a minimal ncnn :P
|
||||
|
||||
### disable c++ rtti and exceptions
|
||||
|
||||
```
|
||||
cmake -DNCNN_DISABLE_RTTI=ON -DNCNN_DISABLE_EXCEPTION=ON ..
|
||||
```
|
||||
* Cannot use RTTI and Exceptions when ncnn functions are called.
|
||||
|
||||
### disable vulkan support
|
||||
|
||||
```
|
||||
cmake -DNCNN_VULKAN=OFF ..
|
||||
```
|
||||
|
||||
* Cannot use GPU acceleration.
|
||||
|
||||
### disable NCNN_STDIO
|
||||
|
||||
```
|
||||
cmake -DNCNN_STDIO=OFF ..
|
||||
```
|
||||
|
||||
* Cannot load model from files, but can load model from memory or by Android Assets.
|
||||
|
||||
Read more [here](https://github.com/Tencent/ncnn/blob/master/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md#load-model).
|
||||
|
||||
### disable NCNN_STRING
|
||||
|
||||
```
|
||||
cmake -DNCNN_STRING=OFF ..
|
||||
```
|
||||
|
||||
* Cannot load human-readable param files with visible strings, but can load binary param.bin files.
|
||||
|
||||
Read more [here](https://github.com/Tencent/ncnn/blob/master/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md#strip-visible-string)
|
||||
|
||||
* Cannot identify blobs by string name when calling `Extractor::input / extract`, but can identify them by enum value in `id.h`.
|
||||
|
||||
Read more [here](https://github.com/Tencent/ncnn/blob/master/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md#input-and-output).
|
||||
|
||||
### disable NCNN_BF16
|
||||
|
||||
```
|
||||
cmake -DNCNN_BF16=OFF ..
|
||||
```
|
||||
|
||||
* Cannot use bf16 storage type in inference.
|
||||
|
||||
|
||||
### disable NCNN_INT8
|
||||
|
||||
```
|
||||
cmake -DNCNN_INT8=OFF ..
|
||||
```
|
||||
|
||||
* Cannot use quantized int8 inference.
|
||||
|
||||
|
||||
### drop pixel drawing functions
|
||||
|
||||
```
|
||||
cmake -DNCNN_PIXEL_DRAWING=OFF ..
|
||||
```
|
||||
|
||||
* Cannot use functions doing drawing basic shape and text like `ncnn::draw_rectangle_xx / ncnn::draw_circle_xx / ncnn::draw_text_xx`, but functions like `Mat::from_pixels / from_pixels_resize` are still available.
|
||||
|
||||
|
||||
### drop pixel rotate and affine functions
|
||||
|
||||
```
|
||||
cmake -DNCNN_PIXEL_ROTATE=OFF -DNCNN_PIXEL_AFFINE=OFF ..
|
||||
```
|
||||
|
||||
* Cannot use functions doing rotatation and affine transformation like `ncnn::kanna_rotate_xx / ncnn::warpaffine_bilinear_xx`, but functions like `Mat::from_pixels / from_pixels_resize` are still available.
|
||||
|
||||
### drop pixel functions
|
||||
|
||||
```
|
||||
cmake -DNCNN_PIXEL=OFF ..
|
||||
```
|
||||
|
||||
* Cannot use functions transferring from image to pixels like `Mat::from_pixels / from_pixels_resize / to_pixels / to_pixels_resize`, and need create a Mat and fill in data by hand.
|
||||
|
||||
### disable openmp
|
||||
|
||||
```
|
||||
cmake -DNCNN_OPENMP=OFF ..
|
||||
```
|
||||
|
||||
* Cannot use openmp multi-threading acceleration. If you want to run a model in single thread on your target machine, it is recommended to close the option.
|
||||
|
||||
### disable avx2 and arm82 optimized kernel
|
||||
|
||||
```
|
||||
cmake -DNCNN_AVX2=OFF -DNCNN_ARM82=OFF ..
|
||||
```
|
||||
|
||||
* Do not compile optimized kernels using avx2 / arm82 instruction set extensions. If your target machine does not support some of them, it is recommended to close the related options.
|
||||
|
||||
### disable runtime cpu instruction dispatch
|
||||
|
||||
```
|
||||
cmake -DNCNN_RUNTIME_CPU=OFF ..
|
||||
```
|
||||
|
||||
* Cannot check supported cpu instruction set extensions and use related optimized kernels in runtime.
|
||||
* If you know which instruction set extensions are supported on your target machine like avx2 / arm82, you can open related options like `-DNCNN_AVX2=ON / -DNCNN_ARM82=ON` by hand and then sse2 / arm8 version kernels will not be compiled.
|
||||
|
||||
### drop layers not used
|
||||
|
||||
```
|
||||
cmake -DWITH_LAYER_absval=OFF -DWITH_LAYER_bnll=OFF ..
|
||||
```
|
||||
|
||||
* If your model does not include some layers, taking absval / bnll as a example above, you can drop them.
|
||||
* Some key or dependency layers should not be dropped, like convolution / innerproduct, their dependency like padding / flatten, and activation like relu / clip.
|
||||
|
||||
### disable c++ stl
|
||||
|
||||
```
|
||||
cmake -DNCNN_SIMPLESTL=ON ..
|
||||
```
|
||||
|
||||
* STL provided by compiler is no longer depended on, and use `simplestl` provided by ncnn as a replacement. Users also can only use `simplestl` when ncnn functions are called.
|
||||
* Usually with compiler parameters `-nodefaultlibs -fno-builtin -nostdinc++ -lc`
|
||||
* Need cmake parameters `cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_STL=system` to avoid STL conflict when compiling to Android.
|
||||
|
||||
### drop optimized kernel not used
|
||||
|
||||
* Modify the source code under `ncnn/src/layer/arm/` to delete unnecessary optimized kernels or replace them with empty functions.
|
||||
* You can also drop layers and related optimized kernels by `-DWITH_LAYER_absval=OFF` as mentioned above.
|
||||
|
||||
### drop operators from BinaryOp UnaryOp
|
||||
|
||||
* Modify `ncnn/src/layer/binaryop.cpp unaryop.cpp` and `ncnn/src/layer/arm/binaryop.cpp unaryop_arm.cpp` by hand to delete unnecessary operators.
|
162
3rdparty/ncnn/docs/how-to-use-and-FAQ/efficient-roi-resize-rotate.md
vendored
Normal file
162
3rdparty/ncnn/docs/how-to-use-and-FAQ/efficient-roi-resize-rotate.md
vendored
Normal file
@ -0,0 +1,162 @@
|
||||
|
||||
### image roi crop + convert to ncnn::Mat
|
||||
|
||||
```
|
||||
+--------------+
|
||||
| y | /-------/
|
||||
| x +-------+ | +-------+|
|
||||
| | roih |im_h => | roih
|
||||
| +-roiw--+ | +-roiw--+/
|
||||
| |
|
||||
+-----im_w-----+
|
||||
```
|
||||
```cpp
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_roi(im.data, ncnn::Mat::PIXEL_RGB, im_w, im_h, x, y, roiw, roih);
|
||||
```
|
||||
For Android Application, it is :
|
||||
```cpp
|
||||
ncnn::Mat in = ncnn::Mat::from_android_bitmap_roi(env, image, ncnn::Mat::PIXEL_RGBA2RGB, x, y, roiw, roih);
|
||||
```
|
||||
|
||||
### image roi crop + resize + convert to ncnn::Mat
|
||||
|
||||
```
|
||||
+--------------+
|
||||
| y | /----/
|
||||
| x +-------+ | +----+|
|
||||
| | roih |im_h => | target_h
|
||||
| +-roiw--+ | | ||
|
||||
| | +----+/
|
||||
+-----im_w-----+ target_w
|
||||
```
|
||||
```cpp
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_roi_resize(im.data, ncnn::Mat::PIXEL_RGB, im_w, im_h, x, y, roiw, roih, target_w, target_h);
|
||||
```
|
||||
For Android Application, it is :
|
||||
```cpp
|
||||
ncnn::Mat in = ncnn::Mat::from_android_bitmap_roi_resize(env, image, ncnn::Mat::PIXEL_RGBA2RGB, x, y, roiw, roih, target_w, target_h);
|
||||
```
|
||||
|
||||
### ncnn::Mat export image + offset paste
|
||||
|
||||
```
|
||||
+--------------+
|
||||
/-------/ | y |
|
||||
+-------+| | x +-------+ |
|
||||
| h| => | | h |im_h
|
||||
+---w---+/ | +---w---+ |
|
||||
| |
|
||||
+-----im_w-----+
|
||||
```
|
||||
```cpp
|
||||
const unsigned char* data = im.data + (y * im_w + x) * 3;
|
||||
out.to_pixels(data, ncnn::Mat::PIXEL_RGB, im_w * 3);
|
||||
```
|
||||
|
||||
### ncnn::Mat export image + resize + roi paste
|
||||
|
||||
```
|
||||
+--------------+
|
||||
/----/ | y |
|
||||
+----+| | x +-------+ |
|
||||
| h| => | | roih|im_h
|
||||
| || | +-roiw--+ |
|
||||
+-w--+/ | |
|
||||
+-----im_w-----+
|
||||
```
|
||||
```cpp
|
||||
const unsigned char* data = im.data + (y * im_w + x) * 3;
|
||||
out.to_pixels_resize(data, ncnn::Mat::PIXEL_RGB, roiw, roih, im_w * 3);
|
||||
```
|
||||
|
||||
### image roi crop + resize
|
||||
```
|
||||
+--------------+
|
||||
| y |
|
||||
| x +-------+ | +----+
|
||||
| | roih|im_h => | target_h
|
||||
| +-roiw--+ | | |
|
||||
| | +----+
|
||||
+-----im_w-----+ target_w
|
||||
```
|
||||
```cpp
|
||||
const unsigned char* data = im.data + (y * im_w + x) * 3;
|
||||
ncnn::resize_bilinear_c3(data, roiw, roih, im_w * 3, outdata, target_w, target_h, target_w * 3);
|
||||
```
|
||||
|
||||
### image resize + offset paste
|
||||
```
|
||||
+--------------+
|
||||
| y |
|
||||
+----+ | x +-------+ |
|
||||
| h => | | roih |im_h
|
||||
| | | +-roiw--+ |
|
||||
+-w--+ | |
|
||||
+-----im_w-----+
|
||||
```
|
||||
```cpp
|
||||
unsigned char* outdata = im.data + (y * im_w + x) * 3;
|
||||
ncnn::resize_bilinear_c3(data, w, h, w * 3, outdata, roiw, roih, im_w * 3);
|
||||
```
|
||||
|
||||
### image roi crop + resize + roi paste
|
||||
```
|
||||
+--------------+ +-----------------+
|
||||
| y | | roiy |
|
||||
| x +-------+ | |roix----------+ |
|
||||
| | h |im_h => | | target_h|outim_h
|
||||
| +---w---+ | | | | |
|
||||
| | | +-target_w-+ |
|
||||
+-----im_w-----+ +-----outim_w-----+
|
||||
```
|
||||
```cpp
|
||||
const unsigned char* data = im.data + (y * im_w + x) * 3;
|
||||
unsigned char* outdata = outim.data + (roiy * outim_w + roix) * 3;
|
||||
ncnn::resize_bilinear_c3(data, w, h, im_w * 3, outdata, target_w, target_h, outim_w * 3);
|
||||
```
|
||||
|
||||
### image roi crop + rotate
|
||||
```
|
||||
+--------------+
|
||||
| y |
|
||||
| x +-------+ | +---+
|
||||
| | < < h |im_h => | ^ |w
|
||||
| +---w---+ | | ^ |
|
||||
| | +---+
|
||||
+-----im_w-----+ h
|
||||
```
|
||||
```cpp
|
||||
const unsigned char* data = im.data + (y * im_w + x) * 3;
|
||||
ncnn::kanna_rotate_c3(data, w, h, im_w * 3, outdata, h, w, h * 3, 6);
|
||||
```
|
||||
|
||||
### image rotate + offset paste
|
||||
```
|
||||
+--------------+
|
||||
| y |
|
||||
+---+ | x +-------+ |
|
||||
| ^ |h => | | < < w |im_h
|
||||
| ^ | | +---h---+ |
|
||||
+---+ | |
|
||||
w +-----im_w-----+
|
||||
```
|
||||
```cpp
|
||||
unsigned char* outdata = im.data + (y * im_w + x) * 3;
|
||||
ncnn::kanna_rotate_c3(data, w, h, w * 3, outdata, h, w, im_w * 3, 7);
|
||||
```
|
||||
|
||||
### image roi crop + rotate + roi paste
|
||||
```
|
||||
+--------------+ +-----------------+
|
||||
| y | | roiy |
|
||||
| x +-------+ | | roix +---+ |
|
||||
| | < < h |im_h => | | ^ w |outim_h
|
||||
| +---w---+ | | | ^ | |
|
||||
| | | +-h-+ |
|
||||
+-----im_w-----+ +-----outim_w-----+
|
||||
```
|
||||
```cpp
|
||||
const unsigned char* data = im.data + (y * im_w + x) * 3;
|
||||
unsigned char* outdata = outim.data + (roiy * outim_w + roix) * 3;
|
||||
ncnn::kanna_rotate_c3(data, w, h, im_w * 3, outdata, h, w, outim_w * 3, 6);
|
||||
```
|
26
3rdparty/ncnn/docs/how-to-use-and-FAQ/ncnn-load-model.md
vendored
Normal file
26
3rdparty/ncnn/docs/how-to-use-and-FAQ/ncnn-load-model.md
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
### the comprehensive model loading api table
|
||||
|
||||
|load from|alexnet.param|alexnet.param.bin|alexnet.bin|
|
||||
|---|---|---|---|
|
||||
|file path|load_param(const char*)|load_param_bin(const char*)|load_model(const char*)|
|
||||
|file descriptor|load_param(FILE*)|load_param_bin(FILE*)|load_model(FILE*)|
|
||||
|file memory|load_param_mem(const char*)|load_param(const unsigned char*)|load_model(const unsigned char*)|
|
||||
|android asset|load_param(AAsset*)|load_param_bin(AAsset*)|load_model(AAsset*)|
|
||||
|android asset path|load_param(AAssetManager*, const char*)|load_param_bin(AAssetManager*, const char*)|load_model(AAssetManager*, const char*)|
|
||||
|custom IO reader|load_param(const DataReader&)|load_param_bin(const DataReader&)|load_model(const DataReader&)|
|
||||
|
||||
### points to note
|
||||
|
||||
1. Either of the following combination shall be enough for loading model
|
||||
* alexnet.param + alexnet.bin
|
||||
* alexnet.param.bin + alexnet.bin
|
||||
|
||||
2. Never modify Net opt member after loading
|
||||
|
||||
3. Most loading functions return 0 if success, except loading alexnet.param.bin and alexnet.bin from file memory, which returns the bytes consumed after loading
|
||||
* int Net::load_param(const unsigned char*)
|
||||
* int Net::load_model(const unsigned char*)
|
||||
|
||||
4. It is recommended to load model from Android asset directly to avoid copying them to sdcard on Android platform
|
||||
|
||||
5. The custom IO reader interface can be used to implement on-the-fly model decryption and loading
|
74
3rdparty/ncnn/docs/how-to-use-and-FAQ/openmp-best-practice.md
vendored
Executable file
74
3rdparty/ncnn/docs/how-to-use-and-FAQ/openmp-best-practice.md
vendored
Executable file
@ -0,0 +1,74 @@
|
||||
ncnn openmp best practice
|
||||
|
||||
### CPU loadaverage is too high with ncnn.
|
||||
|
||||
When inference the neural network with ncnn, the cpu occupancy is very high even all CPU cores occupancy close to 100%.
|
||||
|
||||
If there are other threads or processes that require more cpu resources, the running speed of the program will drop severely.
|
||||
|
||||
### The root cause of high CPU usage
|
||||
|
||||
1. ncnn uses openmp API to speed up the inference compute. the thread count equals to the cpu core count. If the computing work need to run frequently, it must consume many cpu resources.
|
||||
|
||||
2. There is a thread pool managed by openmp, the pool size is equal to the cpu core size. (the max vulue is 15 if there are much more cpu cores?)
|
||||
Openmp need to sync the thread when acquiring and returning threads to the pool. In order to improve efficiency, almost all omp implementations use spinlock synchronization (except for simpleomp).
|
||||
The default spin time of the spinlock is 200ms. So after a thread is scheduled, the thread need to busy-wait up to 200ms.
|
||||
|
||||
### Why the CPU usage is still high even using vulkan GPU acceleration.
|
||||
|
||||
1. Openmp is also used when loading the param bin file, and this part runs on cpu.
|
||||
|
||||
2. The fp32 to fp16 conversion before and after the GPU memory upload is executed on the cpu, and this part of the logic also uses openmp.
|
||||
|
||||
### Solution
|
||||
```
|
||||
1. Bind to the specific cpu core.
|
||||
```
|
||||
If you use a device with large and small core CPUs, it is recommended to bind large or small cores through ncnn::set_cpu_powersave(int). Note that Windows does not support binding cores. By the way, it's possible to have multiple threadpool using openmp. A new threadpool will be created for a new thread scope.
|
||||
Suppose your platform is 2 big cores + 4 little cores, and you want to execute model A on 2 big cores and model B on 4 little cores concurrently.
|
||||
|
||||
create two threads via std::thread or pthread
|
||||
```
|
||||
void thread_1()
|
||||
{
|
||||
ncnn::set_cpu_powersave(2); // bind to big cores
|
||||
netA.opt.num_threads = 2;
|
||||
}
|
||||
|
||||
void thread_2()
|
||||
{
|
||||
ncnn::set_cpu_powersave(1); // bind to little cores
|
||||
netB.opt.num_threads = 4;
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
2. Use fewer threads.
|
||||
```
|
||||
Set the number of threads to half of the cpu cores count or less through ncnn::set_omp_num_threads(int) or change net.opt.num_threads field. If you are coding with clang libomp, it's recommended that the number of threads does not exceed 8. If you use other omp libraries, it is recommended that the number of threads does not exceed 4.
|
||||
```
|
||||
3. Reduce openmp spinlock blocktime.
|
||||
```
|
||||
You can modify openmp blocktime by call ncnn::set_kmp_blocktime(int) method or modify net.opt.openmp_blocktime field.
|
||||
This argument is the spin time set by the ncnn API, and the default is 20ms.You can set a smaller value according to
|
||||
the situation, or directly change it to 0.
|
||||
|
||||
Limitations: At present, only the libomp library of clang is implemented. Neither vcomp nor libgomp have corresponding interfaces.
|
||||
If it is not compiled with clang, this value is still 200ms by default.
|
||||
If you use vcomp or libgomp, you can use the environment variable OMP_WAIT_POLICY=PASSIVE to disable spin time. If you use simpleomp,
|
||||
It's no need to set this parameter.
|
||||
```
|
||||
4. Limit the number of threads available in the openmp thread pool.
|
||||
```
|
||||
Even if the number of openmp threads is reduced, the CPU occupancy rate may still be high. This is more common on servers with
|
||||
particularly many CPU cores.
|
||||
This is because the waiting threads in the thread pool use a spinlock to busy-wait, which can be reducedby limiting the number of
|
||||
threads available in the thread pool.
|
||||
|
||||
Generally, you can set the OMP_THREAD_LIMIT environment variable. simpleomp currently does not support this feature so it's no need to be set.
|
||||
Note that this environment variable is only valid if it is set before the program starts.
|
||||
```
|
||||
5. Disable openmp completely
|
||||
```
|
||||
If there is only one cpu core, or use the vulkan gpu acceleration, it is recommended to disable openmp, just specify -DNCNN_OPENMP=OFF
|
||||
when compiling with cmake.
|
70
3rdparty/ncnn/docs/how-to-use-and-FAQ/openmp-best-practice.zh.md
vendored
Executable file
70
3rdparty/ncnn/docs/how-to-use-and-FAQ/openmp-best-practice.zh.md
vendored
Executable file
@ -0,0 +1,70 @@
|
||||
ncnn openmp 最佳实践
|
||||
|
||||
### ncnn占用过多cpu资源
|
||||
|
||||
使用ncnn推理运算,cpu占用非常高甚至所有核心占用都接近100%。
|
||||
|
||||
如果还有其它线程或进程需要较多的cpu资源,运行速度下降严重。
|
||||
|
||||
### cpu占用高的根本原因
|
||||
|
||||
1. ncnn使用openmp API控制多线程加速推理计算。默认情况下,线程数等于cpu内核数。如果推理需要高频率运行,必然占用大部分
|
||||
cpu资源。
|
||||
|
||||
2. openmp内部维护一个线程池,线程池最大可用线程数等于cpu内核数。(核心过多时最大限制是15?)获取和归还线程时需要同步。
|
||||
|
||||
为了提高效率,几乎所有omp实现都使用了自旋锁同步(simpleomp除外)。自旋锁默认的spin time是200ms。因此一个线程被调度后,
|
||||
需要忙等待最多200ms。
|
||||
|
||||
### 为什么使用vulkan加速后cpu占用依然很高。
|
||||
|
||||
1. 加载参数文件时也使用了openmp,这部分是在cpu上运行的。
|
||||
|
||||
2. 显存上传前和下载后的 fp32 fp16转换是在cpu上执行的,这部分逻辑也使用了openmp。
|
||||
|
||||
### 解决方法
|
||||
|
||||
```
|
||||
1. 绑核
|
||||
```
|
||||
如果使用有大小核cpu的设备,建议通过ncnn::set_cpu_powersave(int)绑定大核或小核,注意windows系统不支持绑核。顺便说一下,ncnn支持不同的模型运行在不同的核心。假设硬件平台有2个大核,4个小核,你想把netA运行在大核,netB运行在小核。
|
||||
可以通过std::thread or pthread创建两个线程,运行如下代码:
|
||||
|
||||
```
|
||||
void thread_1()
|
||||
{
|
||||
ncnn::set_cpu_powersave(2); // bind to big cores
|
||||
netA.opt.num_threads = 2;
|
||||
}
|
||||
|
||||
void thread_2()
|
||||
{
|
||||
ncnn::set_cpu_powersave(1); // bind to little cores
|
||||
netB.opt.num_threads = 4;
|
||||
}
|
||||
```
|
||||
|
||||
```
|
||||
2. 使用更少的线程数。
|
||||
```
|
||||
通过ncnn::set_omp_num_threads(int)或者net.opt.num_threads字段设置线程数为cpu内核数的一半或更小。如果使用clang的libomp,
|
||||
建议线程数不超过8,如果使用其它omp库,建议线程数不超过4。
|
||||
```
|
||||
3. 减小openmp blocktime。
|
||||
```
|
||||
可以修改ncnn::set_kmp_blocktime(int)或者修改net.opt.openmp_blocktime,这个参数是ncnn API设置的spin time,默认是20ms。
|
||||
可以根据情况设置更小的值,或者直接改为0。
|
||||
|
||||
局限:目前只有clang的libomp库有实现,vcomp和libgomp都没有相应接口,如果不是使用clang编译的,这个值默认还是200ms。
|
||||
如果使用vcomp或libgomp, 可以使用环境变量OMP_WAIT_POLICY=PASSIVE禁用spin time,如果使用simpleomp,不需要设置这个参数。
|
||||
```
|
||||
4. 限制openmp线程池可用线程数量。
|
||||
```
|
||||
即使减小了openmp线程数量,cpu占用率仍然可能会很高。这在cpu核心特别多的服务器上比较常见。这是因为线程池中的等待线程使用
|
||||
自旋锁忙等待,可以通过限制线程池可用线程数量减轻这种影响。
|
||||
|
||||
一般可以通过设置OMP_THREAD_LIMIT环境变量。simpleomp目前不支持这一特性,不需要设置。注意这个环境变量仅在程序启动前设置才有效。
|
||||
```
|
||||
5. 完全禁用openmp
|
||||
```
|
||||
如果只有一个cpu核心,或者使用vulkan加速,建议关闭openmp, cmake编译时指定-DNCNN_OPENMP=OFF即可。
|
71
3rdparty/ncnn/docs/how-to-use-and-FAQ/quantized-int8-inference.md
vendored
Normal file
71
3rdparty/ncnn/docs/how-to-use-and-FAQ/quantized-int8-inference.md
vendored
Normal file
@ -0,0 +1,71 @@
|
||||
# Post Training Quantization Tools
|
||||
|
||||
To support int8 model deployment on mobile devices,we provide the universal post training quantization tools which can convert the float32 model to int8 model.
|
||||
|
||||
## User Guide
|
||||
|
||||
Example with mobilenet, just need three steps.
|
||||
|
||||
### 1. Optimize model
|
||||
|
||||
```shell
|
||||
./ncnnoptimize mobilenet.param mobilenet.bin mobilenet-opt.param mobilenet-opt.bin 0
|
||||
```
|
||||
|
||||
### 2. Create the calibration table file
|
||||
|
||||
We suggest that using the verification dataset for calibration, which is more than 5000 images.
|
||||
|
||||
Some imagenet sample images here https://github.com/nihui/imagenet-sample-images
|
||||
|
||||
```shell
|
||||
find images/ -type f > imagelist.txt
|
||||
./ncnn2table mobilenet-opt.param mobilenet-opt.bin imagelist.txt mobilenet.table mean=[104,117,123] norm=[0.017,0.017,0.017] shape=[224,224,3] pixel=BGR thread=8 method=kl
|
||||
```
|
||||
|
||||
* mean and norm are the values you passed to ```Mat::substract_mean_normalize()```
|
||||
* shape is the blob shape of your model, [w,h] or [w,h,c]
|
||||
|
||||
>
|
||||
* if w and h both are given, image will be resized to exactly size.
|
||||
* if w and h both are zero or negative, image will not be resized.
|
||||
* if only h is zero or negative, image's width will scaled resize to w, keeping aspect ratio.
|
||||
* if only w is zero or negative, image's height will scaled resize to h
|
||||
|
||||
* pixel is the pixel format of your model, image pixels will be converted to this type before ```Extractor::input()```
|
||||
* thread is the CPU thread count that could be used for parallel inference
|
||||
* method is the post training quantization algorithm, kl and aciq are currently supported
|
||||
|
||||
If your model has multiple input nodes, you can use multiple list files and other parameters
|
||||
|
||||
```shell
|
||||
./ncnn2table mobilenet-opt.param mobilenet-opt.bin imagelist-bgr.txt,imagelist-depth.txt mobilenet.table mean=[104,117,123],[128] norm=[0.017,0.017,0.017],[0.0078125] shape=[224,224,3],[224,224,1] pixel=BGR,GRAY thread=8 method=kl
|
||||
```
|
||||
|
||||
### 3. Quantize model
|
||||
|
||||
```shell
|
||||
./ncnn2int8 mobilenet-opt.param mobilenet-opt.bin mobilenet-int8.param mobilenet-int8.bin mobilenet.table
|
||||
```
|
||||
|
||||
## use ncnn int8 inference
|
||||
|
||||
the ncnn library would use int8 inference automatically, nothing changed in your code
|
||||
|
||||
```cpp
|
||||
ncnn::Net mobilenet;
|
||||
mobilenet.load_param("mobilenet-int8.param");
|
||||
mobilenet.load_model("mobilenet-int8.bin");
|
||||
```
|
||||
|
||||
## mixed precision inference
|
||||
|
||||
Before quantize your model, comment the layer weight scale line in table file, then the layer will do the float32 inference
|
||||
|
||||
```
|
||||
conv1_param_0 156.639840536
|
||||
```
|
||||
|
||||
```
|
||||
#conv1_param_0 156.639840536
|
||||
```
|
162
3rdparty/ncnn/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md
vendored
Normal file
162
3rdparty/ncnn/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.md
vendored
Normal file
@ -0,0 +1,162 @@
|
||||
We use alexnet as an example
|
||||
|
||||
### prepare caffe prototxt and model
|
||||
|
||||
These files will usually generated when trained with caffe
|
||||
```
|
||||
train.prototxt
|
||||
deploy.prototxt
|
||||
snapshot_10000.caffemodel
|
||||
```
|
||||
deploy.prototxt and caffemodel file are enough for TEST phase
|
||||
|
||||
alexnet deploy.prototxt can be downloaded here
|
||||
|
||||
https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet
|
||||
|
||||
alexnet caffemodel can be downloaded here
|
||||
|
||||
http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel
|
||||
|
||||
### convert to ncnn model
|
||||
|
||||
Convert old caffe prototxt and caffemodel to new ones using tools in caffe
|
||||
|
||||
because the ncnn convert tool needs the new format
|
||||
```
|
||||
upgrade_net_proto_text [old prototxt] [new prototxt]
|
||||
upgrade_net_proto_binary [old caffemodel] [new caffemodel]
|
||||
```
|
||||
|
||||
Use Input layer as input, set N dim as 1 since only one image can be processed each time
|
||||
```
|
||||
layer {
|
||||
name: "data"
|
||||
type: "Input"
|
||||
top: "data"
|
||||
input_param { shape: { dim: 1 dim: 3 dim: 227 dim: 227 } }
|
||||
}
|
||||
```
|
||||
Use caffe2ncnn tool to convert caffe model to ncnn model
|
||||
```
|
||||
caffe2ncnn deploy.prototxt bvlc_alexnet.caffemodel alexnet.param alexnet.bin
|
||||
```
|
||||
|
||||
### strip visible string
|
||||
|
||||
It is already enough for deploying with param and bin file only, but there are visible strings in param file, it may not be suitable to distribute plain neural network information in your APP.
|
||||
|
||||
You can use ncnn2mem tool to convert plain model file to binary representation. It will generate alexnet.param.bin and two static array code files.
|
||||
```
|
||||
ncnn2mem alexnet.param alexnet.bin alexnet.id.h alexnet.mem.h
|
||||
```
|
||||
|
||||
### load model
|
||||
|
||||
Load param and bin file, the easy way
|
||||
```cpp
|
||||
ncnn::Net net;
|
||||
net.load_param("alexnet.param");
|
||||
net.load_model("alexnet.bin");
|
||||
```
|
||||
Load binary param.bin and bin file, no visible strings included, suitable for bundled as APP resource
|
||||
```cpp
|
||||
ncnn::Net net;
|
||||
net.load_param_bin("alexnet.param.bin");
|
||||
net.load_model("alexnet.bin");
|
||||
```
|
||||
Load network and model from external memory, no visible strings included, no external resource files bundled, the whole model is hardcoded in your program
|
||||
|
||||
You may use this way to load from android asset resource
|
||||
```cpp
|
||||
#include "alexnet.mem.h"
|
||||
ncnn::Net net;
|
||||
net.load_param(alexnet_param_bin);
|
||||
net.load_model(alexnet_bin);
|
||||
```
|
||||
You can choose either way to load model. Loading from external memory is zero-copy, which means you must keep your memory buffer during processing
|
||||
|
||||
### unload model
|
||||
```cpp
|
||||
net.clear();
|
||||
```
|
||||
|
||||
### input and output
|
||||
|
||||
ncnn Mat is the data structure for input and output data
|
||||
|
||||
Input image should be converted to Mat, and subtracted mean values and normalized when needed
|
||||
|
||||
```cpp
|
||||
#include "mat.h"
|
||||
unsigned char* rgbdata;// data pointer to RGB image pixels
|
||||
int w;// image width
|
||||
int h;// image height
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels(rgbdata, ncnn::Mat::PIXEL_RGB, w, h);
|
||||
|
||||
const float mean_vals[3] = {104.f, 117.f, 123.f};
|
||||
in.substract_mean_normalize(mean_vals, 0);
|
||||
```
|
||||
Execute the network inference and retrieve the result
|
||||
```cpp
|
||||
#include "net.h"
|
||||
ncnn::Mat in;// input blob as above
|
||||
ncnn::Mat out;
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
ex.set_light_mode(true);
|
||||
ex.input("data", in);
|
||||
ex.extract("prob", out);
|
||||
```
|
||||
If you load model with binary param.bin file, you should use the enum value in alexnet.id.h file instead of the blob name
|
||||
```cpp
|
||||
#include "net.h"
|
||||
#include "alexnet.id.h"
|
||||
ncnn::Mat in;// input blob as above
|
||||
ncnn::Mat out;
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
ex.set_light_mode(true);
|
||||
ex.input(alexnet_param_id::BLOB_data, in);
|
||||
ex.extract(alexnet_param_id::BLOB_prob, out);
|
||||
```
|
||||
Read the data in the output Mat. Iterate data to get all classification scores.
|
||||
```cpp
|
||||
ncnn::Mat out_flatterned = out.reshape(out.w * out.h * out.c);
|
||||
std::vector<float> scores;
|
||||
scores.resize(out_flatterned.w);
|
||||
for (int j=0; j<out_flatterned.w; j++)
|
||||
{
|
||||
scores[j] = out_flatterned[j];
|
||||
}
|
||||
```
|
||||
|
||||
### some tricks
|
||||
|
||||
Set multithreading thread number with Extractor
|
||||
```cpp
|
||||
ex.set_num_threads(4);
|
||||
```
|
||||
Convert image colorspace and resize image with Mat convenient function, these functions are well optimized
|
||||
|
||||
Support RGB2GRAY GRAY2RGB RGB2BGR etc, support scale up and scale down
|
||||
```cpp
|
||||
#include "mat.h"
|
||||
unsigned char* rgbdata;// data pointer to RGB image pixels
|
||||
int w;// image width
|
||||
int h;// image height
|
||||
int target_width = 227;// target resized width
|
||||
int target_height = 227;// target resized height
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB2GRAY, w, h, target_width, target_height);
|
||||
```
|
||||
You can concat multiple model files into one, and load this single file from FILE* interface.
|
||||
|
||||
It should ease the distribution of param and model files.
|
||||
|
||||
> $ cat alexnet.param.bin alexnet.bin > alexnet-all.bin
|
||||
|
||||
```cpp
|
||||
#include "net.h"
|
||||
FILE* fp = fopen("alexnet-all.bin", "rb");
|
||||
net.load_param_bin(fp);
|
||||
net.load_model(fp);
|
||||
fclose(fp);
|
||||
```
|
149
3rdparty/ncnn/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.zh.md
vendored
Normal file
149
3rdparty/ncnn/docs/how-to-use-and-FAQ/use-ncnn-with-alexnet.zh.md
vendored
Normal file
@ -0,0 +1,149 @@
|
||||
首先,非常感谢大家对 ncnn 组件的关注
|
||||
为了方便大家使用 ncnn 组件,up主特意写了这篇使用指北,以烂大街的 alexnet 作为例子
|
||||
|
||||
|
||||
### 准备caffe网络和模型
|
||||
|
||||
caffe 的网络和模型通常是搞深度学习的研究者训练出来的,一般来说训练完会有
|
||||
```
|
||||
train.prototxt
|
||||
deploy.prototxt
|
||||
snapshot_10000.caffemodel
|
||||
```
|
||||
部署的时候只需要 TEST 过程,所以有 deploy.prototxt 和 caffemodel 就足够了
|
||||
|
||||
alexnet 的 deploy.prototxt 可以在这里下载
|
||||
https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet
|
||||
|
||||
alexnet 的 caffemodel 可以在这里下载
|
||||
http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel
|
||||
|
||||
### 转换ncnn网络和模型
|
||||
|
||||
caffe 自带了工具可以把老版本的 caffe 网络和模型转换为新版(ncnn的工具只认识新版
|
||||
```
|
||||
upgrade_net_proto_text [老prototxt] [新prototxt]
|
||||
upgrade_net_proto_binary [老caffemodel] [新caffemodel]
|
||||
```
|
||||
输入层改用 Input,因为每次只需要做一个图片,所以第一个 dim 设为 1
|
||||
```
|
||||
layer {
|
||||
name: "data"
|
||||
type: "Input"
|
||||
top: "data"
|
||||
input_param { shape: { dim: 1 dim: 3 dim: 227 dim: 227 } }
|
||||
}
|
||||
```
|
||||
使用 caffe2ncnn 工具转换为 ncnn 的网络描述和模型
|
||||
```
|
||||
caffe2ncnn deploy.prototxt bvlc_alexnet.caffemodel alexnet.param alexnet.bin
|
||||
```
|
||||
### 去除可见字符串
|
||||
|
||||
有 param 和 bin 文件其实已经可以用了,但是 param 描述文件是明文的,如果放在 APP 分发出去容易被窥探到网络结构(说得好像不明文就看不到一样
|
||||
使用 ncnn2mem 工具转换为二进制描述文件和内存模型,生成 alexnet.param.bin 和两个静态数组的代码文件
|
||||
```
|
||||
ncnn2mem alexnet.param alexnet.bin alexnet.id.h alexnet.mem.h
|
||||
```
|
||||
### 加载模型
|
||||
|
||||
直接加载 param 和 bin,适合快速验证效果使用
|
||||
```cpp
|
||||
ncnn::Net net;
|
||||
net.load_param("alexnet.param");
|
||||
net.load_model("alexnet.bin");
|
||||
```
|
||||
加载二进制的 param.bin 和 bin,没有可见字符串,适合 APP 分发模型资源
|
||||
```cpp
|
||||
ncnn::Net net;
|
||||
net.load_param_bin("alexnet.param.bin");
|
||||
net.load_model("alexnet.bin");
|
||||
```
|
||||
从内存引用加载网络和模型,没有可见字符串,模型数据全在代码里头,没有任何外部文件
|
||||
另外,android apk 打包的资源文件读出来也是内存块
|
||||
```cpp
|
||||
#include "alexnet.mem.h"
|
||||
ncnn::Net net;
|
||||
net.load_param(alexnet_param_bin);
|
||||
net.load_model(alexnet_bin);
|
||||
```
|
||||
以上三种都可以加载模型,其中内存引用方式加载是 zero-copy 的,所以使用 net 模型的来源内存块必须存在
|
||||
|
||||
### 卸载模型
|
||||
```cpp
|
||||
net.clear();
|
||||
```
|
||||
|
||||
### 输入和输出
|
||||
|
||||
ncnn 用自己的数据结构 Mat 来存放输入和输出数据
|
||||
输入图像的数据要转换为 Mat,依需要减去均值和乘系数
|
||||
```cpp
|
||||
#include "mat.h"
|
||||
unsigned char* rgbdata;// data pointer to RGB image pixels
|
||||
int w;// image width
|
||||
int h;// image height
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels(rgbdata, ncnn::Mat::PIXEL_RGB, w, h);
|
||||
|
||||
const float mean_vals[3] = {104.f, 117.f, 123.f};
|
||||
in.substract_mean_normalize(mean_vals, 0);
|
||||
```
|
||||
执行前向网络,获得计算结果
|
||||
```cpp
|
||||
#include "net.h"
|
||||
ncnn::Mat in;// input blob as above
|
||||
ncnn::Mat out;
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
ex.set_light_mode(true);
|
||||
ex.input("data", in);
|
||||
ex.extract("prob", out);
|
||||
```
|
||||
如果是二进制的 param.bin 方式,没有可见字符串,利用 alexnet.id.h 的枚举来代替 blob 的名字
|
||||
```cpp
|
||||
#include "net.h"
|
||||
#include "alexnet.id.h"
|
||||
ncnn::Mat in;// input blob as above
|
||||
ncnn::Mat out;
|
||||
ncnn::Extractor ex = net.create_extractor();
|
||||
ex.set_light_mode(true);
|
||||
ex.input(alexnet_param_id::BLOB_data, in);
|
||||
ex.extract(alexnet_param_id::BLOB_prob, out);
|
||||
```
|
||||
获取 Mat 中的输出数据,Mat 内部的数据通常是三维的,c / h / w,遍历所有获得全部分类的分数
|
||||
```cpp
|
||||
ncnn::Mat out_flatterned = out.reshape(out.w * out.h * out.c);
|
||||
std::vector<float> scores;
|
||||
scores.resize(out_flatterned.w);
|
||||
for (int j=0; j<out_flatterned.w; j++)
|
||||
{
|
||||
scores[j] = out_flatterned[j];
|
||||
}
|
||||
```
|
||||
### 某些使用技巧
|
||||
|
||||
Extractor 有个多线程加速的开关,设置线程数能加快计算
|
||||
```cpp
|
||||
ex.set_num_threads(4);
|
||||
```
|
||||
Mat 转换图像的时候可以顺便转换颜色和缩放大小,这些顺带的操作也是有优化的
|
||||
支持 RGB2GRAY GRAY2RGB RGB2BGR 等常用转换,支持缩小和放大
|
||||
```cpp
|
||||
#include "mat.h"
|
||||
unsigned char* rgbdata;// data pointer to RGB image pixels
|
||||
int w;// image width
|
||||
int h;// image height
|
||||
int target_width = 227;// target resized width
|
||||
int target_height = 227;// target resized height
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels_resize(rgbdata, ncnn::Mat::PIXEL_RGB2GRAY, w, h, target_width, target_height);
|
||||
```
|
||||
Net 有从 FILE* 文件描述加载的接口,可以利用这点把多个网络和模型文件合并为一个,分发时能方便些,内存引用就无所谓了
|
||||
|
||||
> $ cat alexnet.param.bin alexnet.bin > alexnet-all.bin
|
||||
|
||||
```cpp
|
||||
#include "net.h"
|
||||
FILE* fp = fopen("alexnet-all.bin", "rb");
|
||||
net.load_param_bin(fp);
|
||||
net.load_model(fp);
|
||||
fclose(fp);
|
||||
```
|
135
3rdparty/ncnn/docs/how-to-use-and-FAQ/use-ncnn-with-opencv.md
vendored
Normal file
135
3rdparty/ncnn/docs/how-to-use-and-FAQ/use-ncnn-with-opencv.md
vendored
Normal file
@ -0,0 +1,135 @@
|
||||
### opencv to ncnn
|
||||
|
||||
* cv::Mat CV_8UC3 -> ncnn::Mat 3 channel + swap RGB/BGR
|
||||
|
||||
```cpp
|
||||
// cv::Mat a(h, w, CV_8UC3);
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels(a.data, ncnn::Mat::PIXEL_BGR2RGB, a.cols, a.rows);
|
||||
```
|
||||
|
||||
* cv::Mat CV_8UC3 -> ncnn::Mat 3 channel + keep RGB/BGR order
|
||||
|
||||
```cpp
|
||||
// cv::Mat a(h, w, CV_8UC3);
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels(a.data, ncnn::Mat::PIXEL_RGB, a.cols, a.rows);
|
||||
```
|
||||
|
||||
* cv::Mat CV_8UC3 -> ncnn::Mat 1 channel + do RGB2GRAY/BGR2GRAY
|
||||
|
||||
```cpp
|
||||
// cv::Mat rgb(h, w, CV_8UC3);
|
||||
ncnn::Mat inrgb = ncnn::Mat::from_pixels(rgb.data, ncnn::Mat::PIXEL_RGB2GRAY, rgb.cols, rgb.rows);
|
||||
|
||||
// cv::Mat bgr(h, w, CV_8UC3);
|
||||
ncnn::Mat inbgr = ncnn::Mat::from_pixels(bgr.data, ncnn::Mat::PIXEL_BGR2GRAY, bgr.cols, bgr.rows);
|
||||
```
|
||||
|
||||
* cv::Mat CV_8UC1 -> ncnn::Mat 1 channel
|
||||
|
||||
```cpp
|
||||
// cv::Mat a(h, w, CV_8UC1);
|
||||
ncnn::Mat in = ncnn::Mat::from_pixels(a.data, ncnn::Mat::PIXEL_GRAY, a.cols, a.rows);
|
||||
```
|
||||
|
||||
* cv::Mat CV_32FC1 -> ncnn::Mat 1 channel
|
||||
|
||||
* **You could construct ncnn::Mat and fill data into it directly to avoid data copy**
|
||||
|
||||
```cpp
|
||||
// cv::Mat a(h, w, CV_32FC1);
|
||||
ncnn::Mat in(a.cols, a.rows, 1, (void*)a.data);
|
||||
in = in.clone();
|
||||
```
|
||||
|
||||
* cv::Mat CV_32FC3 -> ncnn::Mat 3 channel
|
||||
|
||||
* **You could construct ncnn::Mat and fill data into it directly to avoid data copy**
|
||||
|
||||
```cpp
|
||||
// cv::Mat a(h, w, CV_32FC3);
|
||||
ncnn::Mat in_pack3(a.cols, a.rows, 1, (void*)a.data, (size_t)4u * 3, 3);
|
||||
ncnn::Mat in;
|
||||
ncnn::convert_packing(in_pack3, in, 1);
|
||||
```
|
||||
|
||||
* std::vector < cv::Mat > + CV_32FC1 -> ncnn::Mat multiple channels
|
||||
|
||||
* **You could construct ncnn::Mat and fill data into it directly to avoid data copy**
|
||||
|
||||
```cpp
|
||||
// std::vector<cv::Mat> a(channels, cv::Mat(h, w, CV_32FC1));
|
||||
int channels = a.size();
|
||||
ncnn::Mat in(a[0].cols, a[0].rows, channels);
|
||||
for (int p=0; p<in.c; p++)
|
||||
{
|
||||
memcpy(in.channel(p), (const uchar*)a[p].data, in.w * in.h * sizeof(float));
|
||||
}
|
||||
```
|
||||
|
||||
### ncnn to opencv
|
||||
|
||||
* ncnn::Mat 3 channel -> cv::Mat CV_8UC3 + swap RGB/BGR
|
||||
|
||||
* **You may need to call in.substract_mean_normalize() first to scale values from 0..1 to 0..255**
|
||||
|
||||
```cpp
|
||||
// ncnn::Mat in(w, h, 3);
|
||||
cv::Mat a(in.h, in.w, CV_8UC3);
|
||||
in.to_pixels(a.data, ncnn::Mat::PIXEL_BGR2RGB);
|
||||
```
|
||||
|
||||
* ncnn::Mat 3 channel -> cv::Mat CV_8UC3 + keep RGB/BGR order
|
||||
|
||||
* **You may need to call in.substract_mean_normalize() first to scale values from 0..1 to 0..255**
|
||||
|
||||
```cpp
|
||||
// ncnn::Mat in(w, h, 3);
|
||||
cv::Mat a(in.h, in.w, CV_8UC3);
|
||||
in.to_pixels(a.data, ncnn::Mat::PIXEL_RGB);
|
||||
```
|
||||
|
||||
* ncnn::Mat 1 channel -> cv::Mat CV_8UC1
|
||||
|
||||
* **You may need to call in.substract_mean_normalize() first to scale values from 0..1 to 0..255**
|
||||
|
||||
```cpp
|
||||
// ncnn::Mat in(w, h, 1);
|
||||
cv::Mat a(in.h, in.w, CV_8UC1);
|
||||
in.to_pixels(a.data, ncnn::Mat::PIXEL_GRAY);
|
||||
```
|
||||
|
||||
* ncnn::Mat 1 channel -> cv::Mat CV_32FC1
|
||||
|
||||
* **You could consume or manipulate ncnn::Mat data directly to avoid data copy**
|
||||
|
||||
```cpp
|
||||
// ncnn::Mat in;
|
||||
cv::Mat a(in.h, in.w, CV_32FC1);
|
||||
memcpy((uchar*)a.data, in.data, in.w * in.h * sizeof(float));
|
||||
```
|
||||
|
||||
* ncnn::Mat 3 channel -> cv::Mat CV_32FC3
|
||||
|
||||
* **You could consume or manipulate ncnn::Mat data directly to avoid data copy**
|
||||
|
||||
```cpp
|
||||
// ncnn::Mat in(w, h, 3);
|
||||
ncnn::Mat in_pack3;
|
||||
ncnn::convert_packing(in, in_pack3, 3);
|
||||
cv::Mat a(in.h, in.w, CV_32FC3);
|
||||
memcpy((uchar*)a.data, in_pack3.data, in.w * in.h * 3 * sizeof(float));
|
||||
```
|
||||
|
||||
* ncnn::Mat multiple channels -> std::vector < cv::Mat > + CV_32FC1
|
||||
|
||||
* **You could consume or manipulate ncnn::Mat data directly to avoid data copy**
|
||||
|
||||
```cpp
|
||||
// ncnn::Mat in(w, h, channels);
|
||||
std::vector<cv::Mat> a(in.c);
|
||||
for (int p=0; p<in.c; p++)
|
||||
{
|
||||
a[p] = cv::Mat(in.h, in.w, CV_32FC1);
|
||||
memcpy((uchar*)a[p].data, in.channel(p), in.w * in.h * sizeof(float));
|
||||
}
|
||||
```
|
48
3rdparty/ncnn/docs/how-to-use-and-FAQ/use-ncnn-with-own-project.md
vendored
Normal file
48
3rdparty/ncnn/docs/how-to-use-and-FAQ/use-ncnn-with-own-project.md
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
### use ncnn with own project
|
||||
|
||||
After building ncnn, there is one or more library files generated. Consider integrating ncnn into your own project, you may use ncnn's installating provided cmake config file, or by manually specify library path(s).
|
||||
|
||||
**with cmake**
|
||||
|
||||
Ensure your project is built by cmake. Then in your project's CMakeLists.txt, add these lines:
|
||||
|
||||
```cmake
|
||||
set(ncnn_DIR "<ncnn_install_dir>/lib/cmake/ncnn" CACHE PATH "Directory that contains ncnnConfig.cmake")
|
||||
find_package(ncnn REQUIRED)
|
||||
target_link_libraries(my_target ncnn)
|
||||
```
|
||||
After this, both the header file search path ("including directories") and library paths are configured automatically, including vulkan related dependencies.
|
||||
|
||||
Note: you have to change `<ncnn_install_dir>` to your machine's directory, it is the directory that contains `ncnnConfig.cmake`.
|
||||
|
||||
For the prebuilt ncnn release packages, ncnnConfig is located in:
|
||||
- for `ncnn-YYYYMMDD-windows-vs2019`, it is `lib/cmake/ncnn`
|
||||
- for `ncnn-YYYYMMDD-android-vulkan`, it is `${ANDROID_ABI}/lib/cmake/ncnn` (`${ANDROID_ABI}` is defined in NDK's cmake toolchain file)
|
||||
- other prebuilt release packages are with similar condition
|
||||
|
||||
**manually specify**
|
||||
|
||||
You may also manually specify ncnn library path and including directory. Note that if you use ncnn with vulkan, it is also required to specify vulkan related dependencies.
|
||||
|
||||
For example, on Visual Studio debug mode with vulkan required, the lib paths are:
|
||||
```
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\ncnnd.lib
|
||||
E:\lib\VulkanSDK\1.2.148.0\Lib\vulkan-1.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\SPIRVd.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\glslangd.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\MachineIndependentd.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\OGLCompilerd.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\OSDependentd.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\GenericCodeGend.lib
|
||||
```
|
||||
And for its release mode, lib paths are:
|
||||
```
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\ncnn.lib
|
||||
E:\lib\VulkanSDK\1.2.148.0\Lib\vulkan-1.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\SPIRV.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\glslang.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\MachineIndependent.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\OGLCompiler.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\OSDependent.lib
|
||||
E:\github\ncnn\build\vs2019-x64\install\lib\GenericCodeGen.lib
|
||||
```
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user