添加项目文件。

This commit is contained in:
CaiXiang
2025-01-20 10:30:01 +08:00
parent 77371da5d7
commit 752be79e06
1010 changed files with 610100 additions and 0 deletions

View File

@@ -0,0 +1,256 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_HAL_HPP
#define OPENCV_HAL_HPP
#include "opencv2/core/cvdef.h"
#include "opencv2/core/cvstd.hpp"
#include "opencv2/core/hal/interface.h"
namespace cv { namespace hal {
//! @addtogroup core_hal_functions
//! @{
CV_EXPORTS int normHamming(const uchar* a, int n);
CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
int m_a, int n_a, int n_d, int flags);
CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
int m_a, int n_a, int n_d, int flags);
CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
int m_a, int n_a, int n_d, int flags);
CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
int m_a, int n_a, int n_d, int flags);
CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
CV_EXPORTS float normL1_(const float* a, const float* b, int n);
CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
CV_EXPORTS void exp32f(const float* src, float* dst, int n);
CV_EXPORTS void exp64f(const double* src, double* dst, int n);
CV_EXPORTS void log32f(const float* src, float* dst, int n);
CV_EXPORTS void log64f(const double* src, double* dst, int n);
CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );
CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );
struct CV_EXPORTS DFT1D
{
static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
virtual void apply(const uchar *src, uchar *dst) = 0;
virtual ~DFT1D() {}
};
struct CV_EXPORTS DFT2D
{
static Ptr<DFT2D> create(int width, int height, int depth,
int src_channels, int dst_channels,
int flags, int nonzero_rows = 0);
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
virtual ~DFT2D() {}
};
struct CV_EXPORTS DCT2D
{
static Ptr<DCT2D> create(int width, int height, int depth, int flags);
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
virtual ~DCT2D() {}
};
//! @} core_hal
//=============================================================================
// for binary compatibility with 3.0
//! @cond IGNORED
CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
CV_EXPORTS void exp(const float* src, float* dst, int n);
CV_EXPORTS void exp(const double* src, double* dst, int n);
CV_EXPORTS void log(const float* src, float* dst, int n);
CV_EXPORTS void log(const double* src, double* dst, int n);
CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
CV_EXPORTS void sqrt(const float* src, float* dst, int len);
CV_EXPORTS void sqrt(const double* src, double* dst, int len);
CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
//! @endcond
}} //cv::hal
#endif //OPENCV_HAL_HPP

View File

@@ -0,0 +1,190 @@
#ifndef OPENCV_CORE_HAL_INTERFACE_H
#define OPENCV_CORE_HAL_INTERFACE_H
//! @addtogroup core_hal_interface
//! @{
//! @name Return codes
//! @{
#define CV_HAL_ERROR_OK 0
#define CV_HAL_ERROR_NOT_IMPLEMENTED 1
#define CV_HAL_ERROR_UNKNOWN -1
//! @}
#ifdef __cplusplus
#include <cstddef>
#else
#include <stddef.h>
#include <stdbool.h>
#endif
//! @name Data types
//! primitive types
//! - schar - signed 1 byte integer
//! - uchar - unsigned 1 byte integer
//! - short - signed 2 byte integer
//! - ushort - unsigned 2 byte integer
//! - int - signed 4 byte integer
//! - uint - unsigned 4 byte integer
//! - int64 - signed 8 byte integer
//! - uint64 - unsigned 8 byte integer
//! @{
#if !defined _MSC_VER && !defined __BORLANDC__
# if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
# include <cstdint>
# ifdef __NEWLIB__
typedef unsigned int uint;
# else
typedef std::uint32_t uint;
# endif
# else
# include <stdint.h>
typedef uint32_t uint;
# endif
#else
typedef unsigned uint;
#endif
typedef signed char schar;
#ifndef __IPL_H__
typedef unsigned char uchar;
typedef unsigned short ushort;
#endif
#if defined _MSC_VER || defined __BORLANDC__
typedef __int64 int64;
typedef unsigned __int64 uint64;
# define CV_BIG_INT(n) n##I64
# define CV_BIG_UINT(n) n##UI64
#else
typedef int64_t int64;
typedef uint64_t uint64;
# define CV_BIG_INT(n) n##LL
# define CV_BIG_UINT(n) n##ULL
#endif
#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0"
#define CV_CN_MAX 512
#define CV_CN_SHIFT 3
#define CV_DEPTH_MAX (1 << CV_CN_SHIFT)
#define CV_8U 0
#define CV_8S 1
#define CV_16U 2
#define CV_16S 3
#define CV_32S 4
#define CV_32F 5
#define CV_64F 6
#define CV_16F 7
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
#define CV_MAKE_TYPE CV_MAKETYPE
#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
#define CV_16FC1 CV_MAKETYPE(CV_16F,1)
#define CV_16FC2 CV_MAKETYPE(CV_16F,2)
#define CV_16FC3 CV_MAKETYPE(CV_16F,3)
#define CV_16FC4 CV_MAKETYPE(CV_16F,4)
#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
//! @}
//! @name Comparison operation
//! @sa cv::CmpTypes
//! @{
#define CV_HAL_CMP_EQ 0
#define CV_HAL_CMP_GT 1
#define CV_HAL_CMP_GE 2
#define CV_HAL_CMP_LT 3
#define CV_HAL_CMP_LE 4
#define CV_HAL_CMP_NE 5
//! @}
//! @name Border processing modes
//! @sa cv::BorderTypes
//! @{
#define CV_HAL_BORDER_CONSTANT 0
#define CV_HAL_BORDER_REPLICATE 1
#define CV_HAL_BORDER_REFLECT 2
#define CV_HAL_BORDER_WRAP 3
#define CV_HAL_BORDER_REFLECT_101 4
#define CV_HAL_BORDER_TRANSPARENT 5
#define CV_HAL_BORDER_ISOLATED 16
//! @}
//! @name DFT flags
//! @{
#define CV_HAL_DFT_INVERSE 1
#define CV_HAL_DFT_SCALE 2
#define CV_HAL_DFT_ROWS 4
#define CV_HAL_DFT_COMPLEX_OUTPUT 16
#define CV_HAL_DFT_REAL_OUTPUT 32
#define CV_HAL_DFT_TWO_STAGE 64
#define CV_HAL_DFT_STAGE_COLS 128
#define CV_HAL_DFT_IS_CONTINUOUS 512
#define CV_HAL_DFT_IS_INPLACE 1024
//! @}
//! @name SVD flags
//! @{
#define CV_HAL_SVD_NO_UV 1
#define CV_HAL_SVD_SHORT_UV 2
#define CV_HAL_SVD_MODIFY_A 4
#define CV_HAL_SVD_FULL_UV 8
//! @}
//! @name Gemm flags
//! @{
#define CV_HAL_GEMM_1_T 1
#define CV_HAL_GEMM_2_T 2
#define CV_HAL_GEMM_3_T 4
//! @}
//! @}
#endif

View File

@@ -0,0 +1,706 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_HAL_INTRIN_HPP
#define OPENCV_HAL_INTRIN_HPP
#include <cmath>
#include <float.h>
#include <stdlib.h>
#include "opencv2/core/cvdef.h"
#define OPENCV_HAL_ADD(a, b) ((a) + (b))
#define OPENCV_HAL_AND(a, b) ((a) & (b))
#define OPENCV_HAL_NOP(a) (a)
#define OPENCV_HAL_1ST(a, b) (a)
namespace {
inline unsigned int trailingZeros32(unsigned int value) {
#if defined(_MSC_VER)
#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
unsigned long index = 0;
_BitScanForward(&index, value);
return (unsigned int)index;
#elif defined(__clang__)
// clang-cl doesn't export _tzcnt_u32 for non BMI systems
return value ? __builtin_ctz(value) : 32;
#else
return _tzcnt_u32(value);
#endif
#elif defined(__GNUC__) || defined(__GNUG__)
return __builtin_ctz(value);
#elif defined(__ICC) || defined(__INTEL_COMPILER)
return _bit_scan_forward(value);
#elif defined(__clang__)
return llvm.cttz.i32(value, true);
#else
static const int MultiplyDeBruijnBitPosition[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
#endif
}
}
// unlike HAL API, which is in cv::hal,
// we put intrinsics into cv namespace to make its
// access from within opencv code more accessible
namespace cv {
namespace hal {
enum StoreMode
{
STORE_UNALIGNED = 0,
STORE_ALIGNED = 1,
STORE_ALIGNED_NOCACHE = 2
};
}
// TODO FIXIT: Don't use "God" traits. Split on separate cases.
template<typename _Tp> struct V_TypeTraits
{
};
#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
template<> struct V_TypeTraits<type> \
{ \
typedef type value_type; \
typedef int_type_ int_type; \
typedef abs_type_ abs_type; \
typedef uint_type_ uint_type; \
typedef w_type_ w_type; \
typedef q_type_ q_type; \
typedef sum_type_ sum_type; \
\
static inline int_type reinterpret_int(type x) \
{ \
union { type l; int_type i; } v; \
v.l = x; \
return v.i; \
} \
\
static inline type reinterpret_from_int(int_type x) \
{ \
union { type l; int_type i; } v; \
v.i = x; \
return v.l; \
} \
}
#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
template<> struct V_TypeTraits<type> \
{ \
typedef type value_type; \
typedef int_type_ int_type; \
typedef abs_type_ abs_type; \
typedef uint_type_ uint_type; \
typedef w_type_ w_type; \
typedef sum_type_ sum_type; \
\
static inline int_type reinterpret_int(type x) \
{ \
union { type l; int_type i; } v; \
v.l = x; \
return v.i; \
} \
\
static inline type reinterpret_from_int(int_type x) \
{ \
union { type l; int_type i; } v; \
v.i = x; \
return v.l; \
} \
}
CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);
#ifndef CV_DOXYGEN
#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
#ifdef CV_FORCE_SIMD128_CPP
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#elif defined(CV_CPU_DISPATCH_MODE)
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#else
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#endif
#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
#endif
}
#ifdef CV_DOXYGEN
# undef CV_AVX2
# undef CV_SSE2
# undef CV_NEON
# undef CV_VSX
# undef CV_FP16
# undef CV_MSA
# undef CV_RVV
#endif
#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP)
#define CV__SIMD_FORWARD 128
#include "opencv2/core/hal/intrin_forward.hpp"
#endif
#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
#include "opencv2/core/hal/intrin_sse_em.hpp"
#include "opencv2/core/hal/intrin_sse.hpp"
#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
#include "opencv2/core/hal/intrin_neon.hpp"
#elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
#define CV_SIMD128_CPP 0
#include "opencv2/core/hal/intrin_rvv071.hpp"
#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
#include "opencv2/core/hal/intrin_vsx.hpp"
#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
#include "opencv2/core/hal/intrin_msa.hpp"
#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
#include "opencv2/core/hal/intrin_wasm.hpp"
#elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP)
#include "opencv2/core/hal/intrin_rvv.hpp"
#else
#include "opencv2/core/hal/intrin_cpp.hpp"
#endif
// AVX2 can be used together with SSE2, so
// we define those two sets of intrinsics at once.
// Most of the intrinsics do not conflict (the proper overloaded variant is
// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
// available instruction set) will get vx_ prefix
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
#if CV_AVX2
#define CV__SIMD_FORWARD 256
#include "opencv2/core/hal/intrin_forward.hpp"
#include "opencv2/core/hal/intrin_avx.hpp"
#endif
// AVX512 can be used together with SSE2 and AVX2, so
// we define those sets of intrinsics at once.
// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
#if CV_AVX512_SKX
#define CV__SIMD_FORWARD 512
#include "opencv2/core/hal/intrin_forward.hpp"
#include "opencv2/core/hal/intrin_avx512.hpp"
#endif
//! @cond IGNORED
namespace cv {
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#endif
#ifndef CV_SIMD128
#define CV_SIMD128 0
#endif
#ifndef CV_SIMD128_CPP
#define CV_SIMD128_CPP 0
#endif
#ifndef CV_SIMD128_64F
#define CV_SIMD128_64F 0
#endif
#ifndef CV_SIMD256
#define CV_SIMD256 0
#endif
#ifndef CV_SIMD256_64F
#define CV_SIMD256_64F 0
#endif
#ifndef CV_SIMD512
#define CV_SIMD512 0
#endif
#ifndef CV_SIMD512_64F
#define CV_SIMD512_64F 0
#endif
#ifndef CV_SIMD128_FP16
#define CV_SIMD128_FP16 0
#endif
#ifndef CV_SIMD256_FP16
#define CV_SIMD256_FP16 0
#endif
#ifndef CV_SIMD512_FP16
#define CV_SIMD512_FP16 0
#endif
//==================================================================================================
template<typename _Tp> struct V_RegTraits
{
};
#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
template<> struct V_RegTraits<_reg> \
{ \
typedef _reg reg; \
typedef _u_reg u_reg; \
typedef _w_reg w_reg; \
typedef _q_reg q_reg; \
typedef _int_reg int_reg; \
typedef _round_reg round_reg; \
}
#if CV_SIMD128 || CV_SIMD128_CPP
CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
#if CV_SIMD128_64F || CV_SIMD128_CPP
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
#else
CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
#endif
CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
#if CV_SIMD128_64F
CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
#endif
#endif
#if CV_SIMD256
CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
#endif
#if CV_SIMD512
CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
#endif
//! @endcond
#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
#define CV__SIMD_NAMESPACE simd512
namespace CV__SIMD_NAMESPACE {
#define CV_SIMD 1
#define CV_SIMD_64F CV_SIMD512_64F
#define CV_SIMD_FP16 CV_SIMD512_FP16
#define CV_SIMD_WIDTH 64
//! @addtogroup core_hal_intrin
//! @{
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
typedef v_uint8x64 v_uint8;
//! @brief Maximum available vector register capacity 8-bit signed integer values
typedef v_int8x64 v_int8;
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
typedef v_uint16x32 v_uint16;
//! @brief Maximum available vector register capacity 16-bit signed integer values
typedef v_int16x32 v_int16;
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
typedef v_uint32x16 v_uint32;
//! @brief Maximum available vector register capacity 32-bit signed integer values
typedef v_int32x16 v_int32;
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
typedef v_uint64x8 v_uint64;
//! @brief Maximum available vector register capacity 64-bit signed integer values
typedef v_int64x8 v_int64;
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
typedef v_float32x16 v_float32;
#if CV_SIMD512_64F
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
typedef v_float64x8 v_float64;
#endif
//! @}
#define VXPREFIX(func) v512##func
} // namespace
using namespace CV__SIMD_NAMESPACE;
#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
#define CV__SIMD_NAMESPACE simd256
namespace CV__SIMD_NAMESPACE {
#define CV_SIMD 1
#define CV_SIMD_64F CV_SIMD256_64F
#define CV_SIMD_FP16 CV_SIMD256_FP16
#define CV_SIMD_WIDTH 32
//! @addtogroup core_hal_intrin
//! @{
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
typedef v_uint8x32 v_uint8;
//! @brief Maximum available vector register capacity 8-bit signed integer values
typedef v_int8x32 v_int8;
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
typedef v_uint16x16 v_uint16;
//! @brief Maximum available vector register capacity 16-bit signed integer values
typedef v_int16x16 v_int16;
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
typedef v_uint32x8 v_uint32;
//! @brief Maximum available vector register capacity 32-bit signed integer values
typedef v_int32x8 v_int32;
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
typedef v_uint64x4 v_uint64;
//! @brief Maximum available vector register capacity 64-bit signed integer values
typedef v_int64x4 v_int64;
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
typedef v_float32x8 v_float32;
#if CV_SIMD256_64F
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
typedef v_float64x4 v_float64;
#endif
//! @}
#define VXPREFIX(func) v256##func
} // namespace
using namespace CV__SIMD_NAMESPACE;
#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
#if defined CV_SIMD128_CPP
#define CV__SIMD_NAMESPACE simd128_cpp
#else
#define CV__SIMD_NAMESPACE simd128
#endif
namespace CV__SIMD_NAMESPACE {
#define CV_SIMD CV_SIMD128
#define CV_SIMD_64F CV_SIMD128_64F
#define CV_SIMD_WIDTH 16
//! @addtogroup core_hal_intrin
//! @{
//! @brief Maximum available vector register capacity 8-bit unsigned integer values
typedef v_uint8x16 v_uint8;
//! @brief Maximum available vector register capacity 8-bit signed integer values
typedef v_int8x16 v_int8;
//! @brief Maximum available vector register capacity 16-bit unsigned integer values
typedef v_uint16x8 v_uint16;
//! @brief Maximum available vector register capacity 16-bit signed integer values
typedef v_int16x8 v_int16;
//! @brief Maximum available vector register capacity 32-bit unsigned integer values
typedef v_uint32x4 v_uint32;
//! @brief Maximum available vector register capacity 32-bit signed integer values
typedef v_int32x4 v_int32;
//! @brief Maximum available vector register capacity 64-bit unsigned integer values
typedef v_uint64x2 v_uint64;
//! @brief Maximum available vector register capacity 64-bit signed integer values
typedef v_int64x2 v_int64;
//! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
typedef v_float32x4 v_float32;
#if CV_SIMD128_64F
//! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
typedef v_float64x2 v_float64;
#endif
//! @}
#define VXPREFIX(func) v##func
} // namespace
using namespace CV__SIMD_NAMESPACE;
#endif
namespace CV__SIMD_NAMESPACE {
//! @addtogroup core_hal_intrin
//! @{
//! @name Wide init with value
//! @{
//! @brief Create maximum available capacity vector with elements set to a specific value
inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
#if CV_SIMD_64F
inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
#endif
//! @}
//! @name Wide init with zero
//! @{
//! @brief Create maximum available capacity vector with elements set to zero
inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
#if CV_SIMD_64F
inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
#endif
//! @}
//! @name Wide load from memory
//! @{
//! @brief Load maximum available capacity register contents from memory
inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
#if CV_SIMD_64F
inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
#endif
//! @}
//! @name Wide load from memory(aligned)
//! @{
//! @brief Load maximum available capacity register contents from memory(aligned)
inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
#if CV_SIMD_64F
inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
#endif
//! @}
//! @name Wide load lower half from memory
//! @{
//! @brief Load lower half of maximum available capacity register from memory
inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
#if CV_SIMD_64F
inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
#endif
//! @}
//! @name Wide load halfs from memory
//! @{
//! @brief Load maximum available capacity register contents from two memory blocks
inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
#if CV_SIMD_64F
inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
#endif
//! @}
//! @name Wide LUT of elements
//! @{
//! @brief Load maximum available capacity register contents with array elements by provided indexes
inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
#if CV_SIMD_64F
inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
#endif
//! @}
//! @name Wide LUT of element pairs
//! @{
//! @brief Load maximum available capacity register contents with array element pairs by provided indexes
inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
#if CV_SIMD_64F
inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
#endif
//! @}
//! @name Wide LUT of element quads
//! @{
//! @brief Load maximum available capacity register contents with array element quads by provided indexes
inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
//! @}
//! @name Wide load with double expansion
//! @{
//! @brief Load maximum available capacity register contents from memory with double expand
inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); }
//! @}
//! @name Wide load with quad expansion
//! @{
//! @brief Load maximum available capacity register contents from memory with quad expand
inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
//! @}
/** @brief SIMD processing state cleanup call */
inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
//! @cond IGNORED
// backward compatibility
template<typename _Tp, typename _Tvec> static inline
void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
// backward compatibility
template<typename _Tp, typename _Tvec> static inline
void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
//! @endcond
//! @}
#undef VXPREFIX
} // namespace
//! @cond IGNORED
#ifndef CV_SIMD_64F
#define CV_SIMD_64F 0
#endif
#ifndef CV_SIMD_FP16
#define CV_SIMD_FP16 0 //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
#endif
#ifndef CV_SIMD
#define CV_SIMD 0
#endif
#include "simd_utils.impl.hpp"
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
} // cv::
//! @endcond
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,191 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#ifndef CV__SIMD_FORWARD
#error "Need to pre-define forward width"
#endif
namespace cv
{
//! @cond IGNORED
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
/** Types **/
#if CV__SIMD_FORWARD == 1024
// [todo] 1024
#error "1024-long ops not implemented yet"
#elif CV__SIMD_FORWARD == 512
// 512
#define __CV_VX(fun) v512_##fun
#define __CV_V_UINT8 v_uint8x64
#define __CV_V_INT8 v_int8x64
#define __CV_V_UINT16 v_uint16x32
#define __CV_V_INT16 v_int16x32
#define __CV_V_UINT32 v_uint32x16
#define __CV_V_INT32 v_int32x16
#define __CV_V_UINT64 v_uint64x8
#define __CV_V_INT64 v_int64x8
#define __CV_V_FLOAT32 v_float32x16
#define __CV_V_FLOAT64 v_float64x8
struct v_uint8x64;
struct v_int8x64;
struct v_uint16x32;
struct v_int16x32;
struct v_uint32x16;
struct v_int32x16;
struct v_uint64x8;
struct v_int64x8;
struct v_float32x16;
struct v_float64x8;
#elif CV__SIMD_FORWARD == 256
// 256
#define __CV_VX(fun) v256_##fun
#define __CV_V_UINT8 v_uint8x32
#define __CV_V_INT8 v_int8x32
#define __CV_V_UINT16 v_uint16x16
#define __CV_V_INT16 v_int16x16
#define __CV_V_UINT32 v_uint32x8
#define __CV_V_INT32 v_int32x8
#define __CV_V_UINT64 v_uint64x4
#define __CV_V_INT64 v_int64x4
#define __CV_V_FLOAT32 v_float32x8
#define __CV_V_FLOAT64 v_float64x4
struct v_uint8x32;
struct v_int8x32;
struct v_uint16x16;
struct v_int16x16;
struct v_uint32x8;
struct v_int32x8;
struct v_uint64x4;
struct v_int64x4;
struct v_float32x8;
struct v_float64x4;
#else
// 128
#define __CV_VX(fun) v_##fun
#define __CV_V_UINT8 v_uint8x16
#define __CV_V_INT8 v_int8x16
#define __CV_V_UINT16 v_uint16x8
#define __CV_V_INT16 v_int16x8
#define __CV_V_UINT32 v_uint32x4
#define __CV_V_INT32 v_int32x4
#define __CV_V_UINT64 v_uint64x2
#define __CV_V_INT64 v_int64x2
#define __CV_V_FLOAT32 v_float32x4
#define __CV_V_FLOAT64 v_float64x2
struct v_uint8x16;
struct v_int8x16;
struct v_uint16x8;
struct v_int16x8;
struct v_uint32x4;
struct v_int32x4;
struct v_uint64x2;
struct v_int64x2;
struct v_float32x4;
struct v_float64x2;
#endif
/** Value reordering **/
// Expansion
void v_expand(const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
void v_expand(const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
void v_expand(const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
void v_expand(const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
// Low Expansion
__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&);
__CV_V_INT16 v_expand_low(const __CV_V_INT8&);
__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&);
__CV_V_INT32 v_expand_low(const __CV_V_INT16&);
__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&);
__CV_V_INT64 v_expand_low(const __CV_V_INT32&);
// High Expansion
__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&);
__CV_V_INT16 v_expand_high(const __CV_V_INT8&);
__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&);
__CV_V_INT32 v_expand_high(const __CV_V_INT16&);
__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&);
__CV_V_INT64 v_expand_high(const __CV_V_INT32&);
// Load & Low Expansion
__CV_V_UINT16 __CV_VX(load_expand)(const uchar*);
__CV_V_INT16 __CV_VX(load_expand)(const schar*);
__CV_V_UINT32 __CV_VX(load_expand)(const ushort*);
__CV_V_INT32 __CV_VX(load_expand)(const short*);
__CV_V_UINT64 __CV_VX(load_expand)(const uint*);
__CV_V_INT64 __CV_VX(load_expand)(const int*);
// Load lower 8-bit and expand into 32-bit
__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*);
__CV_V_INT32 __CV_VX(load_expand_q)(const schar*);
// Saturating Pack
__CV_V_UINT8 v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&);
__CV_V_INT8 v_pack(const __CV_V_INT16&, const __CV_V_INT16&);
__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&);
__CV_V_INT16 v_pack(const __CV_V_INT32&, const __CV_V_INT32&);
// Non-saturating Pack
__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&);
__CV_V_INT32 v_pack(const __CV_V_INT64&, const __CV_V_INT64&);
// Pack signed integers with unsigned saturation
__CV_V_UINT8 v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&);
__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&);
/** Arithmetic, bitwise and comparison operations **/
// Non-saturating multiply
#if CV_VSX
template<typename Tvec>
Tvec v_mul_wrap(const Tvec& a, const Tvec& b);
#else
__CV_V_UINT8 v_mul_wrap(const __CV_V_UINT8&, const __CV_V_UINT8&);
__CV_V_INT8 v_mul_wrap(const __CV_V_INT8&, const __CV_V_INT8&);
__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&);
__CV_V_INT16 v_mul_wrap(const __CV_V_INT16&, const __CV_V_INT16&);
#endif
// Multiply and expand
#if CV_VSX
template<typename Tvec, typename Twvec>
void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d);
#else
void v_mul_expand(const __CV_V_UINT8&, const __CV_V_UINT8&, __CV_V_UINT16&, __CV_V_UINT16&);
void v_mul_expand(const __CV_V_INT8&, const __CV_V_INT8&, __CV_V_INT16&, __CV_V_INT16&);
void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
void v_mul_expand(const __CV_V_INT16&, const __CV_V_INT16&, __CV_V_INT32&, __CV_V_INT32&);
void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
void v_mul_expand(const __CV_V_INT32&, const __CV_V_INT32&, __CV_V_INT64&, __CV_V_INT64&);
#endif
// Conversions
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_INT32& a);
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a);
__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a, const __CV_V_FLOAT64& b);
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT32& a);
__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_INT32& a);
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_FLOAT32& a);
__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_FLOAT32& a);
__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT64& a);
/** Cleanup **/
#undef CV__SIMD_FORWARD
#undef __CV_VX
#undef __CV_V_UINT8
#undef __CV_V_INT8
#undef __CV_V_UINT16
#undef __CV_V_INT16
#undef __CV_V_UINT32
#undef __CV_V_INT32
#undef __CV_V_UINT64
#undef __CV_V_INT64
#undef __CV_V_FLOAT32
#undef __CV_V_FLOAT64
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
} // cv::

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,180 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP
#define OPENCV_HAL_INTRIN_SSE_EM_HPP
namespace cv
{
//! @cond IGNORED
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \
inline tp _v128_##fun(const tp& a) \
{ return _mm_##fun(a); }
#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \
inline tp _v128_##fun(const tp& a, const tp& b) \
{ return _mm_##fun(a, b); }
#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \
inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \
{ return _mm_##fun(a, b, c); }
///////////////////////////// XOP /////////////////////////////
// [todo] define CV_XOP
#if 1 // CV_XOP
inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b)
{
const __m128i delta = _mm_set1_epi32((int)0x80000000);
return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
}
// wrapping XOP
#else
OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i)
#endif // !CV_XOP
///////////////////////////// SSE4.1 /////////////////////////////
#if !CV_SSE4_1
/** Swizzle **/
inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask)
{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); }
/** Convert **/
// 8 >> 16
inline __m128i _v128_cvtepu8_epi16(const __m128i& a)
{
const __m128i z = _mm_setzero_si128();
return _mm_unpacklo_epi8(a, z);
}
inline __m128i _v128_cvtepi8_epi16(const __m128i& a)
{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); }
// 8 >> 32
inline __m128i _v128_cvtepu8_epi32(const __m128i& a)
{
const __m128i z = _mm_setzero_si128();
return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);
}
inline __m128i _v128_cvtepi8_epi32(const __m128i& a)
{
__m128i r = _mm_unpacklo_epi8(a, a);
r = _mm_unpacklo_epi8(r, r);
return _mm_srai_epi32(r, 24);
}
// 16 >> 32
inline __m128i _v128_cvtepu16_epi32(const __m128i& a)
{
const __m128i z = _mm_setzero_si128();
return _mm_unpacklo_epi16(a, z);
}
inline __m128i _v128_cvtepi16_epi32(const __m128i& a)
{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); }
// 32 >> 64
inline __m128i _v128_cvtepu32_epi64(const __m128i& a)
{
const __m128i z = _mm_setzero_si128();
return _mm_unpacklo_epi32(a, z);
}
inline __m128i _v128_cvtepi32_epi64(const __m128i& a)
{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); }
/** Arithmetic **/
inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b)
{
__m128i c0 = _mm_mul_epu32(a, b);
__m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32));
__m128i d0 = _mm_unpacklo_epi32(c0, c1);
__m128i d1 = _mm_unpackhi_epi32(c0, c1);
return _mm_unpacklo_epi64(d0, d1);
}
/** Math **/
inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b)
{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); }
// wrapping SSE4.1
#else
OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i)
OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i)
OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i)
OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i)
OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i)
OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i)
OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i)
OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i)
OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i)
OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i)
OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i)
#endif // !CV_SSE4_1
///////////////////////////// Revolutionary /////////////////////////////
/** Convert **/
// 16 << 8
inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a)
{
const __m128i z = _mm_setzero_si128();
return _mm_unpackhi_epi8(a, z);
}
inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a)
{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); }
// 32 << 16
inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a)
{
const __m128i z = _mm_setzero_si128();
return _mm_unpackhi_epi16(a, z);
}
inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a)
{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); }
// 64 << 32
inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a)
{
const __m128i z = _mm_setzero_si128();
return _mm_unpackhi_epi32(a, z);
}
inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a)
{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); }
/** Miscellaneous **/
inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b)
{
const __m128i m = _mm_set1_epi32(65535);
__m128i am = _v128_min_epu32(a, m);
__m128i bm = _v128_min_epu32(b, m);
#if CV_SSE4_1
return _mm_packus_epi32(am, bm);
#else
const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768);
am = _mm_sub_epi32(am, d);
bm = _mm_sub_epi32(bm, d);
am = _mm_packs_epi32(am, bm);
return _mm_sub_epi16(am, nd);
#endif
}
template<int i>
inline int64 _v128_extract_epi64(const __m128i& a)
{
#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/))
#define CV__SIMD_NATIVE_mm_extract_epi64 1
return _mm_extract_epi64(a, i);
#else
CV_DECL_ALIGNED(16) int64 tmp[2];
_mm_store_si128((__m128i*)tmp, a);
return tmp[i];
#endif
}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
} // cv::
#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,146 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This header is not standalone. Don't include directly, use "intrin.hpp" instead.
#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp
#if CV_SIMD128 || CV_SIMD128_CPP
template<typename _T> struct Type2Vec128_Traits;
#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \
template<> struct Type2Vec128_Traits<type_> \
{ \
typedef vec_type_ vec_type; \
}
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2);
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2);
#if CV_SIMD128_64F
CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2);
#endif
template<typename _T> static inline
typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a);
template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const uchar& a) { return v_setall_u8(a); }
template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const schar& a) { return v_setall_s8(a); }
template<> inline Type2Vec128_Traits<ushort>::vec_type v_setall<ushort>(const ushort& a) { return v_setall_u16(a); }
template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const short& a) { return v_setall_s16(a); }
template<> inline Type2Vec128_Traits< uint>::vec_type v_setall< uint>(const uint& a) { return v_setall_u32(a); }
template<> inline Type2Vec128_Traits< int>::vec_type v_setall< int>(const int& a) { return v_setall_s32(a); }
template<> inline Type2Vec128_Traits<uint64>::vec_type v_setall<uint64>(const uint64& a) { return v_setall_u64(a); }
template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const int64& a) { return v_setall_s64(a); }
template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const float& a) { return v_setall_f32(a); }
#if CV_SIMD128_64F
template<> inline Type2Vec128_Traits<double>::vec_type v_setall<double>(const double& a) { return v_setall_f64(a); }
#endif
#endif // SIMD128
#if CV_SIMD256
template<typename _T> struct Type2Vec256_Traits;
#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \
template<> struct Type2Vec256_Traits<type_> \
{ \
typedef vec_type_ vec_type; \
}
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4);
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4);
#if CV_SIMD256_64F
CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4);
#endif
template<typename _T> static inline
typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a);
template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const uchar& a) { return v256_setall_u8(a); }
template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const schar& a) { return v256_setall_s8(a); }
template<> inline Type2Vec256_Traits<ushort>::vec_type v256_setall<ushort>(const ushort& a) { return v256_setall_u16(a); }
template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const short& a) { return v256_setall_s16(a); }
template<> inline Type2Vec256_Traits< uint>::vec_type v256_setall< uint>(const uint& a) { return v256_setall_u32(a); }
template<> inline Type2Vec256_Traits< int>::vec_type v256_setall< int>(const int& a) { return v256_setall_s32(a); }
template<> inline Type2Vec256_Traits<uint64>::vec_type v256_setall<uint64>(const uint64& a) { return v256_setall_u64(a); }
template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const int64& a) { return v256_setall_s64(a); }
template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const float& a) { return v256_setall_f32(a); }
#if CV_SIMD256_64F
template<> inline Type2Vec256_Traits<double>::vec_type v256_setall<double>(const double& a) { return v256_setall_f64(a); }
#endif
#endif // SIMD256
#if CV_SIMD512
template<typename _T> struct Type2Vec512_Traits;
#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \
template<> struct Type2Vec512_Traits<type_> \
{ \
typedef vec_type_ vec_type; \
}
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8);
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8);
#if CV_SIMD512_64F
CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8);
#endif
template<typename _T> static inline
typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a);
template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const uchar& a) { return v512_setall_u8(a); }
template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const schar& a) { return v512_setall_s8(a); }
template<> inline Type2Vec512_Traits<ushort>::vec_type v512_setall<ushort>(const ushort& a) { return v512_setall_u16(a); }
template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const short& a) { return v512_setall_s16(a); }
template<> inline Type2Vec512_Traits< uint>::vec_type v512_setall< uint>(const uint& a) { return v512_setall_u32(a); }
template<> inline Type2Vec512_Traits< int>::vec_type v512_setall< int>(const int& a) { return v512_setall_s32(a); }
template<> inline Type2Vec512_Traits<uint64>::vec_type v512_setall<uint64>(const uint64& a) { return v512_setall_u64(a); }
template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const int64& a) { return v512_setall_s64(a); }
template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const float& a) { return v512_setall_f32(a); }
#if CV_SIMD512_64F
template<> inline Type2Vec512_Traits<double>::vec_type v512_setall<double>(const double& a) { return v512_setall_f64(a); }
#endif
#endif // SIMD512
#if CV_SIMD_WIDTH == 16
template<typename _T> static inline
typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); }
#elif CV_SIMD_WIDTH == 32
template<typename _T> static inline
typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); }
#elif CV_SIMD_WIDTH == 64
template<typename _T> static inline
typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); }
#else
#error "Build configuration error, unsupported CV_SIMD_WIDTH"
#endif
#endif // OPENCV_HAL_INTRIN_HPP