添加项目文件。

2025-01-20 10:30:01 +08:00
parent 77371da5d7
commit 752be79e06
1010 changed files with 610100 additions and 0 deletions
--- a/3rdparty/opencv/inc/opencv2/aruco.hpp
+++ b/3rdparty/opencv/inc/opencv2/aruco.hpp
@@ -0,0 +1,616 @@
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+#ifndef __OPENCV_ARUCO_HPP__
+#define __OPENCV_ARUCO_HPP__
+
+#include <opencv2/core.hpp>
+#include <vector>
+#include "opencv2/aruco/dictionary.hpp"
+
+/**
+ * @defgroup aruco ArUco Marker Detection
+ * This module is dedicated to square fiducial markers (also known as Augmented Reality Markers)
+ * These markers are useful for easy, fast and robust camera pose estimation.ç
+ *
+ * The main functionalities are:
+ * - Detection of markers in an image
+ * - Pose estimation from a single marker or from a board/set of markers
+ * - Detection of ChArUco board for high subpixel accuracy
+ * - Camera calibration from both, ArUco boards and ChArUco boards.
+ * - Detection of ChArUco diamond markers
+ * The samples directory includes easy examples of how to use the module.
+ *
+ * The implementation is based on the ArUco Library by R. Muñoz-Salinas and S. Garrido-Jurado @cite Aruco2014.
+ *
+ * Markers can also be detected based on the AprilTag 2 @cite wang2016iros fiducial detection method.
+ *
+ * @sa S. Garrido-Jurado, R. Muñoz-Salinas, F. J. Madrid-Cuevas, and M. J. Marín-Jiménez. 2014.
+ * "Automatic generation and detection of highly reliable fiducial markers under occlusion".
+ * Pattern Recogn. 47, 6 (June 2014), 2280-2292. DOI=10.1016/j.patcog.2014.01.005
+ *
+ * @sa http://www.uco.es/investiga/grupos/ava/node/26
+ *
+ * This module has been originally developed by Sergio Garrido-Jurado as a project
+ * for Google Summer of Code 2015 (GSoC 15).
+ *
+ *
+*/
+
+namespace cv {
+namespace aruco {
+
+//! @addtogroup aruco
+//! @{
+
+enum CornerRefineMethod{
+    CORNER_REFINE_NONE,     ///< Tag and corners detection based on the ArUco approach
+    CORNER_REFINE_SUBPIX,   ///< ArUco approach and refine the corners locations using corner subpixel accuracy
+    CORNER_REFINE_CONTOUR,  ///< ArUco approach and refine the corners locations using the contour-points line fitting
+    CORNER_REFINE_APRILTAG, ///< Tag and corners detection based on the AprilTag 2 approach @cite wang2016iros
+};
+
+/**
+ * @brief Parameters for the detectMarker process:
+ * - adaptiveThreshWinSizeMin: minimum window size for adaptive thresholding before finding
+ *   contours (default 3).
+ * - adaptiveThreshWinSizeMax: maximum window size for adaptive thresholding before finding
+ *   contours (default 23).
+ * - adaptiveThreshWinSizeStep: increments from adaptiveThreshWinSizeMin to adaptiveThreshWinSizeMax
+ *   during the thresholding (default 10).
+ * - adaptiveThreshConstant: constant for adaptive thresholding before finding contours (default 7)
+ * - minMarkerPerimeterRate: determine minimum perimeter for marker contour to be detected. This
+ *   is defined as a rate respect to the maximum dimension of the input image (default 0.03).
+ * - maxMarkerPerimeterRate:  determine maximum perimeter for marker contour to be detected. This
+ *   is defined as a rate respect to the maximum dimension of the input image (default 4.0).
+ * - polygonalApproxAccuracyRate: minimum accuracy during the polygonal approximation process to
+ *   determine which contours are squares. (default 0.03)
+ * - minCornerDistanceRate: minimum distance between corners for detected markers relative to its
+ *   perimeter (default 0.05)
+ * - minDistanceToBorder: minimum distance of any corner to the image border for detected markers
+ *   (in pixels) (default 3)
+ * - minMarkerDistanceRate: minimum mean distance beetween two marker corners to be considered
+ *   similar, so that the smaller one is removed. The rate is relative to the smaller perimeter
+ *   of the two markers (default 0.05).
+ * - cornerRefinementMethod: corner refinement method. (CORNER_REFINE_NONE, no refinement.
+ *   CORNER_REFINE_SUBPIX, do subpixel refinement. CORNER_REFINE_CONTOUR use contour-Points,
+ *   CORNER_REFINE_APRILTAG  use the AprilTag2 approach). (default CORNER_REFINE_NONE)
+ * - cornerRefinementWinSize: window size for the corner refinement process (in pixels) (default 5).
+ * - cornerRefinementMaxIterations: maximum number of iterations for stop criteria of the corner
+ *   refinement process (default 30).
+ * - cornerRefinementMinAccuracy: minimum error for the stop cristeria of the corner refinement
+ *   process (default: 0.1)
+ * - markerBorderBits: number of bits of the marker border, i.e. marker border width (default 1).
+ * - perspectiveRemovePixelPerCell: number of bits (per dimension) for each cell of the marker
+ *   when removing the perspective (default 4).
+ * - perspectiveRemoveIgnoredMarginPerCell: width of the margin of pixels on each cell not
+ *   considered for the determination of the cell bit. Represents the rate respect to the total
+ *   size of the cell, i.e. perspectiveRemovePixelPerCell (default 0.13)
+ * - maxErroneousBitsInBorderRate: maximum number of accepted erroneous bits in the border (i.e.
+ *   number of allowed white bits in the border). Represented as a rate respect to the total
+ *   number of bits per marker (default 0.35).
+ * - minOtsuStdDev: minimun standard deviation in pixels values during the decodification step to
+ *   apply Otsu thresholding (otherwise, all the bits are set to 0 or 1 depending on mean higher
+ *   than 128 or not) (default 5.0)
+ * - errorCorrectionRate error correction rate respect to the maximun error correction capability
+ *   for each dictionary. (default 0.6).
+ * - aprilTagMinClusterPixels: reject quads containing too few pixels. (default 5)
+ * - aprilTagMaxNmaxima: how many corner candidates to consider when segmenting a group of pixels into a quad. (default 10)
+ * - aprilTagCriticalRad: Reject quads where pairs of edges have angles that are close to straight or close to
+ *   180 degrees. Zero means that no quads are rejected. (In radians) (default 10*PI/180)
+ * - aprilTagMaxLineFitMse:  When fitting lines to the contours, what is the maximum mean squared error
+ *   allowed?  This is useful in rejecting contours that are far from being quad shaped; rejecting
+ *   these quads "early" saves expensive decoding processing. (default 10.0)
+ * - aprilTagMinWhiteBlackDiff: When we build our model of black & white pixels, we add an extra check that
+ *   the white model must be (overall) brighter than the black model.  How much brighter? (in pixel values, [0,255]). (default 5)
+ * - aprilTagDeglitch:  should the thresholded image be deglitched? Only useful for very noisy images. (default 0)
+ * - aprilTagQuadDecimate: Detection of quads can be done on a lower-resolution image, improving speed at a
+ *   cost of pose accuracy and a slight decrease in detection rate. Decoding the binary payload is still
+ *   done at full resolution. (default 0.0)
+ * - aprilTagQuadSigma: What Gaussian blur should be applied to the segmented image (used for quad detection?)
+ *   Parameter is the standard deviation in pixels.  Very noisy images benefit from non-zero values (e.g. 0.8). (default 0.0)
+ * - detectInvertedMarker: to check if there is a white marker. In order to generate a "white" marker just
+ *   invert a normal marker by using a tilde, ~markerImage. (default false)
+ */
+struct CV_EXPORTS_W DetectorParameters {
+
+    DetectorParameters();
+
+    CV_WRAP static Ptr<DetectorParameters> create();
+
+    CV_PROP_RW int adaptiveThreshWinSizeMin;
+    CV_PROP_RW int adaptiveThreshWinSizeMax;
+    CV_PROP_RW int adaptiveThreshWinSizeStep;
+    CV_PROP_RW double adaptiveThreshConstant;
+    CV_PROP_RW double minMarkerPerimeterRate;
+    CV_PROP_RW double maxMarkerPerimeterRate;
+    CV_PROP_RW double polygonalApproxAccuracyRate;
+    CV_PROP_RW double minCornerDistanceRate;
+    CV_PROP_RW int minDistanceToBorder;
+    CV_PROP_RW double minMarkerDistanceRate;
+    CV_PROP_RW int cornerRefinementMethod;
+    CV_PROP_RW int cornerRefinementWinSize;
+    CV_PROP_RW int cornerRefinementMaxIterations;
+    CV_PROP_RW double cornerRefinementMinAccuracy;
+    CV_PROP_RW int markerBorderBits;
+    CV_PROP_RW int perspectiveRemovePixelPerCell;
+    CV_PROP_RW double perspectiveRemoveIgnoredMarginPerCell;
+    CV_PROP_RW double maxErroneousBitsInBorderRate;
+    CV_PROP_RW double minOtsuStdDev;
+    CV_PROP_RW double errorCorrectionRate;
+
+    // April :: User-configurable parameters.
+    CV_PROP_RW float aprilTagQuadDecimate;
+    CV_PROP_RW float aprilTagQuadSigma;
+
+    // April :: Internal variables
+    CV_PROP_RW int aprilTagMinClusterPixels;
+    CV_PROP_RW int aprilTagMaxNmaxima;
+    CV_PROP_RW float aprilTagCriticalRad;
+    CV_PROP_RW float aprilTagMaxLineFitMse;
+    CV_PROP_RW int aprilTagMinWhiteBlackDiff;
+    CV_PROP_RW int aprilTagDeglitch;
+
+    // to detect white (inverted) markers
+    CV_PROP_RW bool detectInvertedMarker;
+};
+
+
+
+/**
+ * @brief Basic marker detection
+ *
+ * @param image input image
+ * @param dictionary indicates the type of markers that will be searched
+ * @param corners vector of detected marker corners. For each marker, its four corners
+ * are provided, (e.g std::vector<std::vector<cv::Point2f> > ). For N detected markers,
+ * the dimensions of this array is Nx4. The order of the corners is clockwise.
+ * @param ids vector of identifiers of the detected markers. The identifier is of type int
+ * (e.g. std::vector<int>). For N detected markers, the size of ids is also N.
+ * The identifiers have the same order than the markers in the imgPoints array.
+ * @param parameters marker detection parameters
+ * @param rejectedImgPoints contains the imgPoints of those squares whose inner code has not a
+ * correct codification. Useful for debugging purposes.
+ * @param cameraMatrix optional input 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+ * @param distCoeff optional vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ *
+ * Performs marker detection in the input image. Only markers included in the specific dictionary
+ * are searched. For each detected marker, it returns the 2D position of its corner in the image
+ * and its corresponding identifier.
+ * Note that this function does not perform pose estimation.
+ * @sa estimatePoseSingleMarkers,  estimatePoseBoard
+ *
+ */
+CV_EXPORTS_W void detectMarkers(InputArray image, const Ptr<Dictionary> &dictionary, OutputArrayOfArrays corners,
+                                OutputArray ids, const Ptr<DetectorParameters> &parameters = DetectorParameters::create(),
+                                OutputArrayOfArrays rejectedImgPoints = noArray(), InputArray cameraMatrix= noArray(), InputArray distCoeff= noArray());
+
+
+
+/**
+ * @brief Pose estimation for single markers
+ *
+ * @param corners vector of already detected markers corners. For each marker, its four corners
+ * are provided, (e.g std::vector<std::vector<cv::Point2f> > ). For N detected markers,
+ * the dimensions of this array should be Nx4. The order of the corners should be clockwise.
+ * @sa detectMarkers
+ * @param markerLength the length of the markers' side. The returning translation vectors will
+ * be in the same unit. Normally, unit is meters.
+ * @param cameraMatrix input 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+ * @param distCoeffs vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param rvecs array of output rotation vectors (@sa Rodrigues) (e.g. std::vector<cv::Vec3d>).
+ * Each element in rvecs corresponds to the specific marker in imgPoints.
+ * @param tvecs array of output translation vectors (e.g. std::vector<cv::Vec3d>).
+ * Each element in tvecs corresponds to the specific marker in imgPoints.
+ * @param _objPoints array of object points of all the marker corners
+ *
+ * This function receives the detected markers and returns their pose estimation respect to
+ * the camera individually. So for each marker, one rotation and translation vector is returned.
+ * The returned transformation is the one that transforms points from each marker coordinate system
+ * to the camera coordinate system.
+ * The marker corrdinate system is centered on the middle of the marker, with the Z axis
+ * perpendicular to the marker plane.
+ * The coordinates of the four corners of the marker in its own coordinate system are:
+ * (-markerLength/2, markerLength/2, 0), (markerLength/2, markerLength/2, 0),
+ * (markerLength/2, -markerLength/2, 0), (-markerLength/2, -markerLength/2, 0)
+ */
+CV_EXPORTS_W void estimatePoseSingleMarkers(InputArrayOfArrays corners, float markerLength,
+                                            InputArray cameraMatrix, InputArray distCoeffs,
+                                            OutputArray rvecs, OutputArray tvecs, OutputArray _objPoints = noArray());
+
+
+
+/**
+ * @brief Board of markers
+ *
+ * A board is a set of markers in the 3D space with a common coordinate system.
+ * The common form of a board of marker is a planar (2D) board, however any 3D layout can be used.
+ * A Board object is composed by:
+ * - The object points of the marker corners, i.e. their coordinates respect to the board system.
+ * - The dictionary which indicates the type of markers of the board
+ * - The identifier of all the markers in the board.
+ */
+class CV_EXPORTS_W Board {
+
+    public:
+    /**
+    * @brief Provide way to create Board by passing necessary data. Specially needed in Python.
+    *
+    * @param objPoints array of object points of all the marker corners in the board
+    * @param dictionary the dictionary of markers employed for this board
+    * @param ids vector of the identifiers of the markers in the board
+    *
+    */
+    CV_WRAP static Ptr<Board> create(InputArrayOfArrays objPoints, const Ptr<Dictionary> &dictionary, InputArray ids);
+
+    /**
+    * @brief Set ids vector
+    *
+    * @param ids vector of the identifiers of the markers in the board (should be the same size
+    * as objPoints)
+    *
+    * Recommended way to set ids vector, which will fail if the size of ids does not match size
+     * of objPoints.
+    */
+    CV_WRAP void setIds(InputArray ids);
+
+    /// array of object points of all the marker corners in the board
+    /// each marker include its 4 corners in CCW order. For M markers, the size is Mx4.
+    CV_PROP std::vector< std::vector< Point3f > > objPoints;
+
+    /// the dictionary of markers employed for this board
+    CV_PROP Ptr<Dictionary> dictionary;
+
+    /// vector of the identifiers of the markers in the board (same size than objPoints)
+    /// The identifiers refers to the board dictionary
+    CV_PROP_RW std::vector< int > ids;
+};
+
+
+
+/**
+ * @brief Planar board with grid arrangement of markers
+ * More common type of board. All markers are placed in the same plane in a grid arrangement.
+ * The board can be drawn using drawPlanarBoard() function (@sa drawPlanarBoard)
+ */
+class CV_EXPORTS_W GridBoard : public Board {
+
+    public:
+    /**
+     * @brief Draw a GridBoard
+     *
+     * @param outSize size of the output image in pixels.
+     * @param img output image with the board. The size of this image will be outSize
+     * and the board will be on the center, keeping the board proportions.
+     * @param marginSize minimum margins (in pixels) of the board in the output image
+     * @param borderBits width of the marker borders.
+     *
+     * This function return the image of the GridBoard, ready to be printed.
+     */
+    CV_WRAP void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1);
+
+
+    /**
+     * @brief Create a GridBoard object
+     *
+     * @param markersX number of markers in X direction
+     * @param markersY number of markers in Y direction
+     * @param markerLength marker side length (normally in meters)
+     * @param markerSeparation separation between two markers (same unit as markerLength)
+     * @param dictionary dictionary of markers indicating the type of markers
+     * @param firstMarker id of first marker in dictionary to use on board.
+     * @return the output GridBoard object
+     *
+     * This functions creates a GridBoard object given the number of markers in each direction and
+     * the marker size and marker separation.
+     */
+    CV_WRAP static Ptr<GridBoard> create(int markersX, int markersY, float markerLength,
+                                         float markerSeparation, const Ptr<Dictionary> &dictionary, int firstMarker = 0);
+
+    /**
+      *
+      */
+    CV_WRAP Size getGridSize() const { return Size(_markersX, _markersY); }
+
+    /**
+      *
+      */
+    CV_WRAP float getMarkerLength() const { return _markerLength; }
+
+    /**
+      *
+      */
+    CV_WRAP float getMarkerSeparation() const { return _markerSeparation; }
+
+
+    private:
+    // number of markers in X and Y directions
+    int _markersX, _markersY;
+
+    // marker side length (normally in meters)
+    float _markerLength;
+
+    // separation between markers in the grid
+    float _markerSeparation;
+};
+
+
+
+/**
+ * @brief Pose estimation for a board of markers
+ *
+ * @param corners vector of already detected markers corners. For each marker, its four corners
+ * are provided, (e.g std::vector<std::vector<cv::Point2f> > ). For N detected markers, the
+ * dimensions of this array should be Nx4. The order of the corners should be clockwise.
+ * @param ids list of identifiers for each marker in corners
+ * @param board layout of markers in the board. The layout is composed by the marker identifiers
+ * and the positions of each marker corner in the board reference system.
+ * @param cameraMatrix input 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+ * @param distCoeffs vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param rvec Output vector (e.g. cv::Mat) corresponding to the rotation vector of the board
+ * (see cv::Rodrigues). Used as initial guess if not empty.
+ * @param tvec Output vector (e.g. cv::Mat) corresponding to the translation vector of the board.
+ * @param useExtrinsicGuess defines whether initial guess for \b rvec and \b tvec will be used or not.
+ * Used as initial guess if not empty.
+ *
+ * This function receives the detected markers and returns the pose of a marker board composed
+ * by those markers.
+ * A Board of marker has a single world coordinate system which is defined by the board layout.
+ * The returned transformation is the one that transforms points from the board coordinate system
+ * to the camera coordinate system.
+ * Input markers that are not included in the board layout are ignored.
+ * The function returns the number of markers from the input employed for the board pose estimation.
+ * Note that returning a 0 means the pose has not been estimated.
+ */
+CV_EXPORTS_W int estimatePoseBoard(InputArrayOfArrays corners, InputArray ids, const Ptr<Board> &board,
+                                   InputArray cameraMatrix, InputArray distCoeffs, InputOutputArray rvec,
+                                   InputOutputArray tvec, bool useExtrinsicGuess = false);
+
+
+
+
+/**
+ * @brief Refind not detected markers based on the already detected and the board layout
+ *
+ * @param image input image
+ * @param board layout of markers in the board.
+ * @param detectedCorners vector of already detected marker corners.
+ * @param detectedIds vector of already detected marker identifiers.
+ * @param rejectedCorners vector of rejected candidates during the marker detection process.
+ * @param cameraMatrix optional input 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+ * @param distCoeffs optional vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param minRepDistance minimum distance between the corners of the rejected candidate and the
+ * reprojected marker in order to consider it as a correspondence.
+ * @param errorCorrectionRate rate of allowed erroneous bits respect to the error correction
+ * capability of the used dictionary. -1 ignores the error correction step.
+ * @param checkAllOrders Consider the four posible corner orders in the rejectedCorners array.
+ * If it set to false, only the provided corner order is considered (default true).
+ * @param recoveredIdxs Optional array to returns the indexes of the recovered candidates in the
+ * original rejectedCorners array.
+ * @param parameters marker detection parameters
+ *
+ * This function tries to find markers that were not detected in the basic detecMarkers function.
+ * First, based on the current detected marker and the board layout, the function interpolates
+ * the position of the missing markers. Then it tries to find correspondence between the reprojected
+ * markers and the rejected candidates based on the minRepDistance and errorCorrectionRate
+ * parameters.
+ * If camera parameters and distortion coefficients are provided, missing markers are reprojected
+ * using projectPoint function. If not, missing marker projections are interpolated using global
+ * homography, and all the marker corners in the board must have the same Z coordinate.
+ */
+CV_EXPORTS_W void refineDetectedMarkers(
+    InputArray image,const  Ptr<Board> &board, InputOutputArrayOfArrays detectedCorners,
+    InputOutputArray detectedIds, InputOutputArrayOfArrays rejectedCorners,
+    InputArray cameraMatrix = noArray(), InputArray distCoeffs = noArray(),
+    float minRepDistance = 10.f, float errorCorrectionRate = 3.f, bool checkAllOrders = true,
+    OutputArray recoveredIdxs = noArray(), const Ptr<DetectorParameters> &parameters = DetectorParameters::create());
+
+
+
+/**
+ * @brief Draw detected markers in image
+ *
+ * @param image input/output image. It must have 1 or 3 channels. The number of channels is not
+ * altered.
+ * @param corners positions of marker corners on input image.
+ * (e.g std::vector<std::vector<cv::Point2f> > ). For N detected markers, the dimensions of
+ * this array should be Nx4. The order of the corners should be clockwise.
+ * @param ids vector of identifiers for markers in markersCorners .
+ * Optional, if not provided, ids are not painted.
+ * @param borderColor color of marker borders. Rest of colors (text color and first corner color)
+ * are calculated based on this one to improve visualization.
+ *
+ * Given an array of detected marker corners and its corresponding ids, this functions draws
+ * the markers in the image. The marker borders are painted and the markers identifiers if provided.
+ * Useful for debugging purposes.
+ */
+CV_EXPORTS_W void drawDetectedMarkers(InputOutputArray image, InputArrayOfArrays corners,
+                                      InputArray ids = noArray(),
+                                      Scalar borderColor = Scalar(0, 255, 0));
+
+
+
+/**
+ * @brief Draw coordinate system axis from pose estimation
+ *
+ * @param image input/output image. It must have 1 or 3 channels. The number of channels is not
+ * altered.
+ * @param cameraMatrix input 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+ * @param distCoeffs vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param rvec rotation vector of the coordinate system that will be drawn. (@sa Rodrigues).
+ * @param tvec translation vector of the coordinate system that will be drawn.
+ * @param length length of the painted axis in the same unit than tvec (usually in meters)
+ *
+ * Given the pose estimation of a marker or board, this function draws the axis of the world
+ * coordinate system, i.e. the system centered on the marker/board. Useful for debugging purposes.
+ *
+ * @deprecated use cv::drawFrameAxes
+ */
+CV_EXPORTS_W void drawAxis(InputOutputArray image, InputArray cameraMatrix, InputArray distCoeffs,
+                           InputArray rvec, InputArray tvec, float length);
+
+
+
+/**
+ * @brief Draw a canonical marker image
+ *
+ * @param dictionary dictionary of markers indicating the type of markers
+ * @param id identifier of the marker that will be returned. It has to be a valid id
+ * in the specified dictionary.
+ * @param sidePixels size of the image in pixels
+ * @param img output image with the marker
+ * @param borderBits width of the marker border.
+ *
+ * This function returns a marker image in its canonical form (i.e. ready to be printed)
+ */
+CV_EXPORTS_W void drawMarker(const Ptr<Dictionary> &dictionary, int id, int sidePixels, OutputArray img,
+                             int borderBits = 1);
+
+
+
+/**
+ * @brief Draw a planar board
+ * @sa _drawPlanarBoardImpl
+ *
+ * @param board layout of the board that will be drawn. The board should be planar,
+ * z coordinate is ignored
+ * @param outSize size of the output image in pixels.
+ * @param img output image with the board. The size of this image will be outSize
+ * and the board will be on the center, keeping the board proportions.
+ * @param marginSize minimum margins (in pixels) of the board in the output image
+ * @param borderBits width of the marker borders.
+ *
+ * This function return the image of a planar board, ready to be printed. It assumes
+ * the Board layout specified is planar by ignoring the z coordinates of the object points.
+ */
+CV_EXPORTS_W void drawPlanarBoard(const Ptr<Board> &board, Size outSize, OutputArray img,
+                                  int marginSize = 0, int borderBits = 1);
+
+
+
+/**
+ * @brief Implementation of drawPlanarBoard that accepts a raw Board pointer.
+ */
+void _drawPlanarBoardImpl(Board *board, Size outSize, OutputArray img,
+                          int marginSize = 0, int borderBits = 1);
+
+
+
+/**
+ * @brief Calibrate a camera using aruco markers
+ *
+ * @param corners vector of detected marker corners in all frames.
+ * The corners should have the same format returned by detectMarkers (see #detectMarkers).
+ * @param ids list of identifiers for each marker in corners
+ * @param counter number of markers in each frame so that corners and ids can be split
+ * @param board Marker Board layout
+ * @param imageSize Size of the image used only to initialize the intrinsic camera matrix.
+ * @param cameraMatrix Output 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
+ * and/or CV_CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
+ * initialized before calling the function.
+ * @param distCoeffs Output vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each board view
+ * (e.g. std::vector<cv::Mat>>). That is, each k-th rotation vector together with the corresponding
+ * k-th translation vector (see the next output parameter description) brings the board pattern
+ * from the model coordinate space (in which object points are specified) to the world coordinate
+ * space, that is, a real position of the board pattern in the k-th pattern view (k=0.. *M* -1).
+ * @param tvecs Output vector of translation vectors estimated for each pattern view.
+ * @param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters.
+ * Order of deviations values:
+ * \f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3,
+ * s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero.
+ * @param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters.
+ * Order of deviations values: \f$(R_1, T_1, \dotsc , R_M, T_M)\f$ where M is number of pattern views,
+ * \f$R_i, T_i\f$ are concatenated 1x3 vectors.
+ * @param perViewErrors Output vector of average re-projection errors estimated for each pattern view.
+ * @param flags flags Different flags  for the calibration process (see #calibrateCamera for details).
+ * @param criteria Termination criteria for the iterative optimization algorithm.
+ *
+ * This function calibrates a camera using an Aruco Board. The function receives a list of
+ * detected markers from several views of the Board. The process is similar to the chessboard
+ * calibration in calibrateCamera(). The function returns the final re-projection error.
+ */
+CV_EXPORTS_AS(calibrateCameraArucoExtended) double calibrateCameraAruco(
+    InputArrayOfArrays corners, InputArray ids, InputArray counter, const Ptr<Board> &board,
+    Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+    OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+    OutputArray stdDeviationsIntrinsics, OutputArray stdDeviationsExtrinsics,
+    OutputArray perViewErrors, int flags = 0,
+    TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
+
+
+/** @brief It's the same function as #calibrateCameraAruco but without calibration error estimation.
+ */
+CV_EXPORTS_W double calibrateCameraAruco(
+  InputArrayOfArrays corners, InputArray ids, InputArray counter, const Ptr<Board> &board,
+  Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+  OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0,
+  TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
+
+
+/**
+ * @brief Given a board configuration and a set of detected markers, returns the corresponding
+ * image points and object points to call solvePnP
+ *
+ * @param board Marker board layout.
+ * @param detectedCorners List of detected marker corners of the board.
+ * @param detectedIds List of identifiers for each marker.
+ * @param objPoints Vector of vectors of board marker points in the board coordinate space.
+ * @param imgPoints Vector of vectors of the projections of board marker corner points.
+*/
+CV_EXPORTS_W void getBoardObjectAndImagePoints(const Ptr<Board> &board, InputArrayOfArrays detectedCorners,
+  InputArray detectedIds, OutputArray objPoints, OutputArray imgPoints);
+
+
+//! @}
+}
+}
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/aruco/charuco.hpp
+++ b/3rdparty/opencv/inc/opencv2/aruco/charuco.hpp
@@ -0,0 +1,353 @@
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+#ifndef __OPENCV_CHARUCO_HPP__
+#define __OPENCV_CHARUCO_HPP__
+
+#include <opencv2/core.hpp>
+#include <vector>
+#include <opencv2/aruco.hpp>
+
+
+namespace cv {
+namespace aruco {
+
+//! @addtogroup aruco
+//! @{
+
+
+/**
+ * @brief ChArUco board
+ * Specific class for ChArUco boards. A ChArUco board is a planar board where the markers are placed
+ * inside the white squares of a chessboard. The benefits of ChArUco boards is that they provide
+ * both, ArUco markers versatility and chessboard corner precision, which is important for
+ * calibration and pose estimation.
+ * This class also allows the easy creation and drawing of ChArUco boards.
+ */
+class CV_EXPORTS_W CharucoBoard : public Board {
+
+    public:
+    // vector of chessboard 3D corners precalculated
+    CV_PROP std::vector< Point3f > chessboardCorners;
+
+    // for each charuco corner, nearest marker id and nearest marker corner id of each marker
+    CV_PROP std::vector< std::vector< int > > nearestMarkerIdx;
+    CV_PROP std::vector< std::vector< int > > nearestMarkerCorners;
+
+    /**
+     * @brief Draw a ChArUco board
+     *
+     * @param outSize size of the output image in pixels.
+     * @param img output image with the board. The size of this image will be outSize
+     * and the board will be on the center, keeping the board proportions.
+     * @param marginSize minimum margins (in pixels) of the board in the output image
+     * @param borderBits width of the marker borders.
+     *
+     * This function return the image of the ChArUco board, ready to be printed.
+     */
+    CV_WRAP void draw(Size outSize, OutputArray img, int marginSize = 0, int borderBits = 1);
+
+
+    /**
+     * @brief Create a CharucoBoard object
+     *
+     * @param squaresX number of chessboard squares in X direction
+     * @param squaresY number of chessboard squares in Y direction
+     * @param squareLength chessboard square side length (normally in meters)
+     * @param markerLength marker side length (same unit than squareLength)
+     * @param dictionary dictionary of markers indicating the type of markers.
+     * The first markers in the dictionary are used to fill the white chessboard squares.
+     * @return the output CharucoBoard object
+     *
+     * This functions creates a CharucoBoard object given the number of squares in each direction
+     * and the size of the markers and chessboard squares.
+     */
+    CV_WRAP static Ptr<CharucoBoard> create(int squaresX, int squaresY, float squareLength,
+                                            float markerLength, const Ptr<Dictionary> &dictionary);
+
+    /**
+      *
+      */
+    CV_WRAP Size getChessboardSize() const { return Size(_squaresX, _squaresY); }
+
+    /**
+      *
+      */
+    CV_WRAP float getSquareLength() const { return _squareLength; }
+
+    /**
+      *
+      */
+    CV_WRAP float getMarkerLength() const { return _markerLength; }
+
+    private:
+    void _getNearestMarkerCorners();
+
+    // number of markers in X and Y directions
+    int _squaresX, _squaresY;
+
+    // size of chessboard squares side (normally in meters)
+    float _squareLength;
+
+    // marker side length (normally in meters)
+    float _markerLength;
+};
+
+
+
+
+/**
+ * @brief Interpolate position of ChArUco board corners
+ * @param markerCorners vector of already detected markers corners. For each marker, its four
+ * corners are provided, (e.g std::vector<std::vector<cv::Point2f> > ). For N detected markers, the
+ * dimensions of this array should be Nx4. The order of the corners should be clockwise.
+ * @param markerIds list of identifiers for each marker in corners
+ * @param image input image necesary for corner refinement. Note that markers are not detected and
+ * should be sent in corners and ids parameters.
+ * @param board layout of ChArUco board.
+ * @param charucoCorners interpolated chessboard corners
+ * @param charucoIds interpolated chessboard corners identifiers
+ * @param cameraMatrix optional 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+ * @param distCoeffs optional vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param minMarkers number of adjacent markers that must be detected to return a charuco corner
+ *
+ * This function receives the detected markers and returns the 2D position of the chessboard corners
+ * from a ChArUco board using the detected Aruco markers. If camera parameters are provided,
+ * the process is based in an approximated pose estimation, else it is based on local homography.
+ * Only visible corners are returned. For each corner, its corresponding identifier is
+ * also returned in charucoIds.
+ * The function returns the number of interpolated corners.
+ */
+CV_EXPORTS_W int interpolateCornersCharuco(InputArrayOfArrays markerCorners, InputArray markerIds,
+                                           InputArray image, const Ptr<CharucoBoard> &board,
+                                           OutputArray charucoCorners, OutputArray charucoIds,
+                                           InputArray cameraMatrix = noArray(),
+                                           InputArray distCoeffs = noArray(), int minMarkers = 2);
+
+
+
+
+/**
+ * @brief Pose estimation for a ChArUco board given some of their corners
+ * @param charucoCorners vector of detected charuco corners
+ * @param charucoIds list of identifiers for each corner in charucoCorners
+ * @param board layout of ChArUco board.
+ * @param cameraMatrix input 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$
+ * @param distCoeffs vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param rvec Output vector (e.g. cv::Mat) corresponding to the rotation vector of the board
+ * (see cv::Rodrigues).
+ * @param tvec Output vector (e.g. cv::Mat) corresponding to the translation vector of the board.
+ * @param useExtrinsicGuess defines whether initial guess for \b rvec and \b tvec will be used or not.
+ *
+ * This function estimates a Charuco board pose from some detected corners.
+ * The function checks if the input corners are enough and valid to perform pose estimation.
+ * If pose estimation is valid, returns true, else returns false.
+ */
+CV_EXPORTS_W bool estimatePoseCharucoBoard(InputArray charucoCorners, InputArray charucoIds,
+                                           const Ptr<CharucoBoard> &board, InputArray cameraMatrix,
+                                           InputArray distCoeffs, InputOutputArray rvec,
+                                           InputOutputArray tvec, bool useExtrinsicGuess = false);
+
+
+
+
+/**
+ * @brief Draws a set of Charuco corners
+ * @param image input/output image. It must have 1 or 3 channels. The number of channels is not
+ * altered.
+ * @param charucoCorners vector of detected charuco corners
+ * @param charucoIds list of identifiers for each corner in charucoCorners
+ * @param cornerColor color of the square surrounding each corner
+ *
+ * This function draws a set of detected Charuco corners. If identifiers vector is provided, it also
+ * draws the id of each corner.
+ */
+CV_EXPORTS_W void drawDetectedCornersCharuco(InputOutputArray image, InputArray charucoCorners,
+                                             InputArray charucoIds = noArray(),
+                                             Scalar cornerColor = Scalar(255, 0, 0));
+
+
+
+/**
+ * @brief Calibrate a camera using Charuco corners
+ *
+ * @param charucoCorners vector of detected charuco corners per frame
+ * @param charucoIds list of identifiers for each corner in charucoCorners per frame
+ * @param board Marker Board layout
+ * @param imageSize input image size
+ * @param cameraMatrix Output 3x3 floating-point camera matrix
+ * \f$A = \vecthreethree{f_x}{0}{c_x}{0}{f_y}{c_y}{0}{0}{1}\f$ . If CV\_CALIB\_USE\_INTRINSIC\_GUESS
+ * and/or CV_CALIB_FIX_ASPECT_RATIO are specified, some or all of fx, fy, cx, cy must be
+ * initialized before calling the function.
+ * @param distCoeffs Output vector of distortion coefficients
+ * \f$(k_1, k_2, p_1, p_2[, k_3[, k_4, k_5, k_6],[s_1, s_2, s_3, s_4]])\f$ of 4, 5, 8 or 12 elements
+ * @param rvecs Output vector of rotation vectors (see Rodrigues ) estimated for each board view
+ * (e.g. std::vector<cv::Mat>>). That is, each k-th rotation vector together with the corresponding
+ * k-th translation vector (see the next output parameter description) brings the board pattern
+ * from the model coordinate space (in which object points are specified) to the world coordinate
+ * space, that is, a real position of the board pattern in the k-th pattern view (k=0.. *M* -1).
+ * @param tvecs Output vector of translation vectors estimated for each pattern view.
+ * @param stdDeviationsIntrinsics Output vector of standard deviations estimated for intrinsic parameters.
+ * Order of deviations values:
+ * \f$(f_x, f_y, c_x, c_y, k_1, k_2, p_1, p_2, k_3, k_4, k_5, k_6 , s_1, s_2, s_3,
+ * s_4, \tau_x, \tau_y)\f$ If one of parameters is not estimated, it's deviation is equals to zero.
+ * @param stdDeviationsExtrinsics Output vector of standard deviations estimated for extrinsic parameters.
+ * Order of deviations values: \f$(R_1, T_1, \dotsc , R_M, T_M)\f$ where M is number of pattern views,
+ * \f$R_i, T_i\f$ are concatenated 1x3 vectors.
+ * @param perViewErrors Output vector of average re-projection errors estimated for each pattern view.
+ * @param flags flags Different flags  for the calibration process (see #calibrateCamera for details).
+ * @param criteria Termination criteria for the iterative optimization algorithm.
+ *
+ * This function calibrates a camera using a set of corners of a  Charuco Board. The function
+ * receives a list of detected corners and its identifiers from several views of the Board.
+ * The function returns the final re-projection error.
+ */
+CV_EXPORTS_AS(calibrateCameraCharucoExtended) double calibrateCameraCharuco(
+    InputArrayOfArrays charucoCorners, InputArrayOfArrays charucoIds, const Ptr<CharucoBoard> &board,
+    Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+    OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+    OutputArray stdDeviationsIntrinsics, OutputArray stdDeviationsExtrinsics,
+    OutputArray perViewErrors, int flags = 0,
+    TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
+
+/** @brief It's the same function as #calibrateCameraCharuco but without calibration error estimation.
+*/
+CV_EXPORTS_W double calibrateCameraCharuco(
+  InputArrayOfArrays charucoCorners, InputArrayOfArrays charucoIds, const Ptr<CharucoBoard> &board,
+  Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+  OutputArrayOfArrays rvecs = noArray(), OutputArrayOfArrays tvecs = noArray(), int flags = 0,
+  TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
+
+
+
+/**
+ * @brief Detect ChArUco Diamond markers
+ *
+ * @param image input image necessary for corner subpixel.
+ * @param markerCorners list of detected marker corners from detectMarkers function.
+ * @param markerIds list of marker ids in markerCorners.
+ * @param squareMarkerLengthRate rate between square and marker length:
+ * squareMarkerLengthRate = squareLength/markerLength. The real units are not necessary.
+ * @param diamondCorners output list of detected diamond corners (4 corners per diamond). The order
+ * is the same than in marker corners: top left, top right, bottom right and bottom left. Similar
+ * format than the corners returned by detectMarkers (e.g std::vector<std::vector<cv::Point2f> > ).
+ * @param diamondIds ids of the diamonds in diamondCorners. The id of each diamond is in fact of
+ * type Vec4i, so each diamond has 4 ids, which are the ids of the aruco markers composing the
+ * diamond.
+ * @param cameraMatrix Optional camera calibration matrix.
+ * @param distCoeffs Optional camera distortion coefficients.
+ *
+ * This function detects Diamond markers from the previous detected ArUco markers. The diamonds
+ * are returned in the diamondCorners and diamondIds parameters. If camera calibration parameters
+ * are provided, the diamond search is based on reprojection. If not, diamond search is based on
+ * homography. Homography is faster than reprojection but can slightly reduce the detection rate.
+ */
+CV_EXPORTS_W void detectCharucoDiamond(InputArray image, InputArrayOfArrays markerCorners,
+                                       InputArray markerIds, float squareMarkerLengthRate,
+                                       OutputArrayOfArrays diamondCorners, OutputArray diamondIds,
+                                       InputArray cameraMatrix = noArray(),
+                                       InputArray distCoeffs = noArray());
+
+
+
+/**
+ * @brief Draw a set of detected ChArUco Diamond markers
+ *
+ * @param image input/output image. It must have 1 or 3 channels. The number of channels is not
+ * altered.
+ * @param diamondCorners positions of diamond corners in the same format returned by
+ * detectCharucoDiamond(). (e.g std::vector<std::vector<cv::Point2f> > ). For N detected markers,
+ * the dimensions of this array should be Nx4. The order of the corners should be clockwise.
+ * @param diamondIds vector of identifiers for diamonds in diamondCorners, in the same format
+ * returned by detectCharucoDiamond() (e.g. std::vector<Vec4i>).
+ * Optional, if not provided, ids are not painted.
+ * @param borderColor color of marker borders. Rest of colors (text color and first corner color)
+ * are calculated based on this one.
+ *
+ * Given an array of detected diamonds, this functions draws them in the image. The marker borders
+ * are painted and the markers identifiers if provided.
+ * Useful for debugging purposes.
+ */
+CV_EXPORTS_W void drawDetectedDiamonds(InputOutputArray image, InputArrayOfArrays diamondCorners,
+                                       InputArray diamondIds = noArray(),
+                                       Scalar borderColor = Scalar(0, 0, 255));
+
+
+
+
+/**
+ * @brief Draw a ChArUco Diamond marker
+ *
+ * @param dictionary dictionary of markers indicating the type of markers.
+ * @param ids list of 4 ids for each ArUco marker in the ChArUco marker.
+ * @param squareLength size of the chessboard squares in pixels.
+ * @param markerLength size of the markers in pixels.
+ * @param img output image with the marker. The size of this image will be
+ * 3*squareLength + 2*marginSize,.
+ * @param marginSize minimum margins (in pixels) of the marker in the output image
+ * @param borderBits width of the marker borders.
+ *
+ * This function return the image of a ChArUco marker, ready to be printed.
+ */
+CV_EXPORTS_W void drawCharucoDiamond(const Ptr<Dictionary> &dictionary, Vec4i ids, int squareLength,
+                                   int markerLength, OutputArray img, int marginSize = 0,
+                                   int borderBits = 1);
+
+
+/**
+ * @brief test whether the ChArUco markers are collinear
+ *
+ * @param _board layout of ChArUco board.
+ * @param _charucoIds list of identifiers for each corner in charucoCorners per frame.
+ * @return bool value, 1 (true) if detected corners form a line, 0 (false) if they do not.
+      solvePnP, calibration functions will fail if the corners are collinear (true).
+ *
+ * The number of ids in charucoIDs should be <= the number of chessboard corners in the board.  This functions checks whether the charuco corners are on a straight line (returns true, if so), or not (false).  Axis parallel, as well as diagonal and other straight lines detected.  Degenerate cases: for number of charucoIDs <= 2, the function returns true.
+ */
+CV_EXPORTS_W bool testCharucoCornersCollinear(const Ptr<CharucoBoard> &_board,
+                                              InputArray _charucoIds);
+
+//! @}
+}
+}
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/aruco/dictionary.hpp
+++ b/3rdparty/opencv/inc/opencv2/aruco/dictionary.hpp
@@ -0,0 +1,212 @@
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+#ifndef __OPENCV_DICTIONARY_HPP__
+#define __OPENCV_DICTIONARY_HPP__
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace aruco {
+
+//! @addtogroup aruco
+//! @{
+
+
+/**
+ * @brief Dictionary/Set of markers. It contains the inner codification
+ *
+ * bytesList contains the marker codewords where
+ * - bytesList.rows is the dictionary size
+ * - each marker is encoded using `nbytes = ceil(markerSize*markerSize/8.)`
+ * - each row contains all 4 rotations of the marker, so its length is `4*nbytes`
+ *
+ * `bytesList.ptr(i)[k*nbytes + j]` is then the j-th byte of i-th marker, in its k-th rotation.
+ */
+class CV_EXPORTS_W Dictionary {
+
+    public:
+    CV_PROP_RW Mat bytesList;         // marker code information
+    CV_PROP_RW int markerSize;        // number of bits per dimension
+    CV_PROP_RW int maxCorrectionBits; // maximum number of bits that can be corrected
+
+
+    /**
+      */
+    Dictionary(const Mat &_bytesList = Mat(), int _markerSize = 0, int _maxcorr = 0);
+
+
+    /**
+    Dictionary(const Dictionary &_dictionary);
+    */
+
+
+    /**
+      */
+    Dictionary(const Ptr<Dictionary> &_dictionary);
+
+
+    /**
+     * @see generateCustomDictionary
+     */
+    CV_WRAP_AS(create) static Ptr<Dictionary> create(int nMarkers, int markerSize, int randomSeed=0);
+
+
+    /**
+     * @see generateCustomDictionary
+     */
+    CV_WRAP_AS(create_from) static Ptr<Dictionary> create(int nMarkers, int markerSize,
+            const Ptr<Dictionary> &baseDictionary, int randomSeed=0);
+
+    /**
+     * @see getPredefinedDictionary
+     */
+    CV_WRAP static Ptr<Dictionary> get(int dict);
+
+    /**
+     * @brief Given a matrix of bits. Returns whether if marker is identified or not.
+     * It returns by reference the correct id (if any) and the correct rotation
+     */
+    bool identify(const Mat &onlyBits, int &idx, int &rotation, double maxCorrectionRate) const;
+
+    /**
+      * @brief Returns the distance of the input bits to the specific id. If allRotations is true,
+      * the four posible bits rotation are considered
+      */
+    int getDistanceToId(InputArray bits, int id, bool allRotations = true) const;
+
+
+    /**
+     * @brief Draw a canonical marker image
+     */
+    CV_WRAP void drawMarker(int id, int sidePixels, OutputArray _img, int borderBits = 1) const;
+
+
+    /**
+      * @brief Transform matrix of bits to list of bytes in the 4 rotations
+      */
+    CV_WRAP static Mat getByteListFromBits(const Mat &bits);
+
+
+    /**
+      * @brief Transform list of bytes to matrix of bits
+      */
+    CV_WRAP static Mat getBitsFromByteList(const Mat &byteList, int markerSize);
+};
+
+
+
+
+/**
+ * @brief Predefined markers dictionaries/sets
+ * Each dictionary indicates the number of bits and the number of markers contained
+ * - DICT_ARUCO_ORIGINAL: standard ArUco Library Markers. 1024 markers, 5x5 bits, 0 minimum
+                          distance
+ */
+enum PREDEFINED_DICTIONARY_NAME {
+    DICT_4X4_50 = 0,
+    DICT_4X4_100,
+    DICT_4X4_250,
+    DICT_4X4_1000,
+    DICT_5X5_50,
+    DICT_5X5_100,
+    DICT_5X5_250,
+    DICT_5X5_1000,
+    DICT_6X6_50,
+    DICT_6X6_100,
+    DICT_6X6_250,
+    DICT_6X6_1000,
+    DICT_7X7_50,
+    DICT_7X7_100,
+    DICT_7X7_250,
+    DICT_7X7_1000,
+    DICT_ARUCO_ORIGINAL,
+    DICT_APRILTAG_16h5,     ///< 4x4 bits, minimum hamming distance between any two codes = 5, 30 codes
+    DICT_APRILTAG_25h9,     ///< 5x5 bits, minimum hamming distance between any two codes = 9, 35 codes
+    DICT_APRILTAG_36h10,    ///< 6x6 bits, minimum hamming distance between any two codes = 10, 2320 codes
+    DICT_APRILTAG_36h11     ///< 6x6 bits, minimum hamming distance between any two codes = 11, 587 codes
+};
+
+
+/**
+  * @brief Returns one of the predefined dictionaries defined in PREDEFINED_DICTIONARY_NAME
+  */
+CV_EXPORTS Ptr<Dictionary> getPredefinedDictionary(PREDEFINED_DICTIONARY_NAME name);
+
+
+/**
+  * @brief Returns one of the predefined dictionaries referenced by DICT_*.
+  */
+CV_EXPORTS_W Ptr<Dictionary> getPredefinedDictionary(int dict);
+
+
+/**
+  * @see generateCustomDictionary
+  */
+CV_EXPORTS_AS(custom_dictionary) Ptr<Dictionary> generateCustomDictionary(
+        int nMarkers,
+        int markerSize,
+        int randomSeed=0);
+
+
+/**
+  * @brief Generates a new customizable marker dictionary
+  *
+  * @param nMarkers number of markers in the dictionary
+  * @param markerSize number of bits per dimension of each markers
+  * @param baseDictionary Include the markers in this dictionary at the beginning (optional)
+  * @param randomSeed a user supplied seed for theRNG()
+  *
+  * This function creates a new dictionary composed by nMarkers markers and each markers composed
+  * by markerSize x markerSize bits. If baseDictionary is provided, its markers are directly
+  * included and the rest are generated based on them. If the size of baseDictionary is higher
+  * than nMarkers, only the first nMarkers in baseDictionary are taken and no new marker is added.
+  */
+CV_EXPORTS_AS(custom_dictionary_from) Ptr<Dictionary> generateCustomDictionary(
+        int nMarkers,
+        int markerSize,
+        const Ptr<Dictionary> &baseDictionary,
+        int randomSeed=0);
+
+
+
+//! @}
+}
+}
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/barcode.hpp
+++ b/3rdparty/opencv/inc/opencv2/barcode.hpp
@@ -0,0 +1,101 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+// Copyright (c) 2020-2021 darkliang wangberlinT Certseeds
+
+#ifndef __OPENCV_BARCODE_HPP__
+#define __OPENCV_BARCODE_HPP__
+
+#include <opencv2/core.hpp>
+#include <ostream>
+
+/** @defgroup barcode Barcode detecting and decoding methods
+*/
+
+namespace cv {
+namespace barcode {
+
+//! @addtogroup barcode
+//! @{
+
+enum BarcodeType
+{
+    NONE, EAN_8, EAN_13, UPC_A, UPC_E, UPC_EAN_EXTENSION
+};
+
+static inline std::ostream &operator<<(std::ostream &out, const BarcodeType &barcode_type)
+{
+    switch (barcode_type)
+    {
+        case BarcodeType::EAN_8:
+            out << "EAN_8";
+            break;
+        case BarcodeType::EAN_13:
+            out << "EAN_13";
+            break;
+        case BarcodeType::UPC_E:
+            out << "UPC_E";
+            break;
+        case BarcodeType::UPC_A:
+            out << "UPC_A";
+            break;
+        case BarcodeType::UPC_EAN_EXTENSION:
+            out << "UPC_EAN_EXTENSION";
+            break;
+        default:
+            out << "NONE";
+    }
+    return out;
+}
+
+class CV_EXPORTS_W BarcodeDetector
+{
+public:
+    /**
+     * @brief Initialize the BarcodeDetector.
+     * @param prototxt_path prototxt file path for the super resolution model
+     * @param model_path model file path for the super resolution model
+     */
+    CV_WRAP BarcodeDetector(const std::string &prototxt_path = "", const std::string &model_path = "");
+
+    ~BarcodeDetector();
+
+    /** @brief Detects Barcode in image and returns the rectangle(s) containing the code.
+     *
+     * @param img grayscale or color (BGR) image containing (or not) Barcode.
+     * @param points Output vector of vector of vertices of the minimum-area rotated rectangle containing the codes.
+     * For N detected barcodes, the dimensions of this array should be [N][4].
+     * Order of four points in vector< Point2f> is bottomLeft, topLeft, topRight, bottomRight.
+     */
+    CV_WRAP bool detect(InputArray img, OutputArray points) const;
+
+    /** @brief Decodes barcode in image once it's found by the detect() method.
+     *
+     * @param img grayscale or color (BGR) image containing bar code.
+     * @param points vector of rotated rectangle vertices found by detect() method (or some other algorithm).
+     * For N detected barcodes, the dimensions of this array should be [N][4].
+     * Order of four points in vector<Point2f> is bottomLeft, topLeft, topRight, bottomRight.
+     * @param decoded_info UTF8-encoded output vector of string or empty vector of string if the codes cannot be decoded.
+     * @param decoded_type vector of BarcodeType, specifies the type of these barcodes
+     */
+    CV_WRAP bool decode(InputArray img, InputArray points, CV_OUT std::vector<std::string> &decoded_info, CV_OUT
+                        std::vector<BarcodeType> &decoded_type) const;
+
+    /** @brief Both detects and decodes barcode
+
+     * @param img grayscale or color (BGR) image containing barcode.
+     * @param decoded_info UTF8-encoded output vector of string(s) or empty vector of string if the codes cannot be decoded.
+     * @param decoded_type vector of BarcodeType, specifies the type of these barcodes
+     * @param points optional output vector of vertices of the found  barcode rectangle. Will be empty if not found.
+     */
+    CV_WRAP bool detectAndDecode(InputArray img, CV_OUT std::vector<std::string> &decoded_info, CV_OUT
+                                 std::vector<BarcodeType> &decoded_type, OutputArray points = noArray()) const;
+
+protected:
+    struct Impl;
+    Ptr<Impl> p;
+};
+//! @}
+}
+} // cv::barcode::
+#endif //__OPENCV_BARCODE_HPP__
--- a/3rdparty/opencv/inc/opencv2/bgsegm.hpp
+++ b/3rdparty/opencv/inc/opencv2/bgsegm.hpp
@@ -0,0 +1,380 @@
+/*
+By downloading, copying, installing or using the software you agree to this
+license. If you do not agree to this license, do not download, install,
+copy or use the software.
+
+
+                          License Agreement
+               For Open Source Computer Vision Library
+                       (3-clause BSD License)
+
+Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+Third party copyrights are property of their respective owners.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * Neither the names of the copyright holders nor the names of the contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall copyright holders or contributors be liable for
+any direct, indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
+*/
+
+#ifndef __OPENCV_BGSEGM_HPP__
+#define __OPENCV_BGSEGM_HPP__
+
+#include "opencv2/video.hpp"
+
+#ifdef __cplusplus
+
+/** @defgroup bgsegm Improved Background-Foreground Segmentation Methods
+*/
+
+namespace cv
+{
+namespace bgsegm
+{
+
+//! @addtogroup bgsegm
+//! @{
+
+/** @brief Gaussian Mixture-based Background/Foreground Segmentation Algorithm.
+
+The class implements the algorithm described in @cite KB2001 .
+ */
+class CV_EXPORTS_W BackgroundSubtractorMOG : public BackgroundSubtractor
+{
+public:
+    CV_WRAP virtual int getHistory() const = 0;
+    CV_WRAP virtual void setHistory(int nframes) = 0;
+
+    CV_WRAP virtual int getNMixtures() const = 0;
+    CV_WRAP virtual void setNMixtures(int nmix) = 0;
+
+    CV_WRAP virtual double getBackgroundRatio() const = 0;
+    CV_WRAP virtual void setBackgroundRatio(double backgroundRatio) = 0;
+
+    CV_WRAP virtual double getNoiseSigma() const = 0;
+    CV_WRAP virtual void setNoiseSigma(double noiseSigma) = 0;
+};
+
+/** @brief Creates mixture-of-gaussian background subtractor
+
+@param history Length of the history.
+@param nmixtures Number of Gaussian mixtures.
+@param backgroundRatio Background ratio.
+@param noiseSigma Noise strength (standard deviation of the brightness or each color channel). 0
+means some automatic value.
+ */
+CV_EXPORTS_W Ptr<BackgroundSubtractorMOG>
+    createBackgroundSubtractorMOG(int history=200, int nmixtures=5,
+                                  double backgroundRatio=0.7, double noiseSigma=0);
+
+
+/** @brief Background Subtractor module based on the algorithm given in @cite Gold2012 .
+
+ Takes a series of images and returns a sequence of mask (8UC1)
+ images of the same size, where 255 indicates Foreground and 0 represents Background.
+ This class implements an algorithm described in "Visual Tracking of Human Visitors under
+ Variable-Lighting Conditions for a Responsive Audio Art Installation," A. Godbehere,
+ A. Matsukawa, K. Goldberg, American Control Conference, Montreal, June 2012.
+ */
+class CV_EXPORTS_W BackgroundSubtractorGMG : public BackgroundSubtractor
+{
+public:
+    /** @brief Returns total number of distinct colors to maintain in histogram.
+    */
+    CV_WRAP virtual int getMaxFeatures() const = 0;
+    /** @brief Sets total number of distinct colors to maintain in histogram.
+    */
+    CV_WRAP virtual void setMaxFeatures(int maxFeatures) = 0;
+
+    /** @brief Returns the learning rate of the algorithm.
+
+    It lies between 0.0 and 1.0. It determines how quickly features are "forgotten" from
+    histograms.
+     */
+    CV_WRAP virtual double getDefaultLearningRate() const = 0;
+    /** @brief Sets the learning rate of the algorithm.
+    */
+    CV_WRAP virtual void setDefaultLearningRate(double lr) = 0;
+
+    /** @brief Returns the number of frames used to initialize background model.
+    */
+    CV_WRAP virtual int getNumFrames() const = 0;
+    /** @brief Sets the number of frames used to initialize background model.
+    */
+    CV_WRAP virtual void setNumFrames(int nframes) = 0;
+
+    /** @brief Returns the parameter used for quantization of color-space.
+
+    It is the number of discrete levels in each channel to be used in histograms.
+     */
+    CV_WRAP virtual int getQuantizationLevels() const = 0;
+    /** @brief Sets the parameter used for quantization of color-space
+    */
+    CV_WRAP virtual void setQuantizationLevels(int nlevels) = 0;
+
+    /** @brief Returns the prior probability that each individual pixel is a background pixel.
+    */
+    CV_WRAP virtual double getBackgroundPrior() const = 0;
+    /** @brief Sets the prior probability that each individual pixel is a background pixel.
+    */
+    CV_WRAP virtual void setBackgroundPrior(double bgprior) = 0;
+
+    /** @brief Returns the kernel radius used for morphological operations
+    */
+    CV_WRAP virtual int getSmoothingRadius() const = 0;
+    /** @brief Sets the kernel radius used for morphological operations
+    */
+    CV_WRAP virtual void setSmoothingRadius(int radius) = 0;
+
+    /** @brief Returns the value of decision threshold.
+
+    Decision value is the value above which pixel is determined to be FG.
+     */
+    CV_WRAP virtual double getDecisionThreshold() const = 0;
+    /** @brief Sets the value of decision threshold.
+    */
+    CV_WRAP virtual void setDecisionThreshold(double thresh) = 0;
+
+    /** @brief Returns the status of background model update
+    */
+    CV_WRAP virtual bool getUpdateBackgroundModel() const = 0;
+    /** @brief Sets the status of background model update
+    */
+    CV_WRAP virtual void setUpdateBackgroundModel(bool update) = 0;
+
+    /** @brief Returns the minimum value taken on by pixels in image sequence. Usually 0.
+    */
+    CV_WRAP virtual double getMinVal() const = 0;
+    /** @brief Sets the minimum value taken on by pixels in image sequence.
+    */
+    CV_WRAP virtual void setMinVal(double val) = 0;
+
+    /** @brief Returns the maximum value taken on by pixels in image sequence. e.g. 1.0 or 255.
+    */
+    CV_WRAP virtual double getMaxVal() const = 0;
+    /** @brief Sets the maximum value taken on by pixels in image sequence.
+    */
+    CV_WRAP virtual void setMaxVal(double val) = 0;
+};
+
+/** @brief Creates a GMG Background Subtractor
+
+@param initializationFrames number of frames used to initialize the background models.
+@param decisionThreshold Threshold value, above which it is marked foreground, else background.
+ */
+CV_EXPORTS_W Ptr<BackgroundSubtractorGMG> createBackgroundSubtractorGMG(int initializationFrames=120,
+                                                                        double decisionThreshold=0.8);
+
+/** @brief Background subtraction based on counting.
+
+  About as fast as MOG2 on a high end system.
+  More than twice faster than MOG2 on cheap hardware (benchmarked on Raspberry Pi3).
+
+  %Algorithm by Sagi Zeevi ( https://github.com/sagi-z/BackgroundSubtractorCNT )
+*/
+class CV_EXPORTS_W BackgroundSubtractorCNT  : public BackgroundSubtractor
+{
+public:
+    // BackgroundSubtractor interface
+    CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0;
+    CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0;
+
+    /** @brief Returns number of frames with same pixel color to consider stable.
+    */
+    CV_WRAP virtual int getMinPixelStability() const = 0;
+    /** @brief Sets the number of frames with same pixel color to consider stable.
+    */
+    CV_WRAP virtual void setMinPixelStability(int value) = 0;
+
+    /** @brief Returns maximum allowed credit for a pixel in history.
+    */
+    CV_WRAP virtual int getMaxPixelStability() const = 0;
+    /** @brief Sets the maximum allowed credit for a pixel in history.
+    */
+    CV_WRAP virtual void setMaxPixelStability(int value) = 0;
+
+    /** @brief Returns if we're giving a pixel credit for being stable for a long time.
+    */
+    CV_WRAP virtual bool getUseHistory() const = 0;
+    /** @brief Sets if we're giving a pixel credit for being stable for a long time.
+    */
+    CV_WRAP virtual void setUseHistory(bool value) = 0;
+
+    /** @brief Returns if we're parallelizing the algorithm.
+    */
+    CV_WRAP virtual bool getIsParallel() const = 0;
+    /** @brief Sets if we're parallelizing the algorithm.
+    */
+    CV_WRAP virtual void setIsParallel(bool value) = 0;
+};
+
+/** @brief Creates a CNT Background Subtractor
+
+@param minPixelStability number of frames with same pixel color to consider stable
+@param useHistory determines if we're giving a pixel credit for being stable for a long time
+@param maxPixelStability maximum allowed credit for a pixel in history
+@param isParallel determines if we're parallelizing the algorithm
+ */
+
+CV_EXPORTS_W Ptr<BackgroundSubtractorCNT>
+createBackgroundSubtractorCNT(int minPixelStability = 15,
+                              bool useHistory = true,
+                              int maxPixelStability = 15*60,
+                              bool isParallel = true);
+
+enum LSBPCameraMotionCompensation {
+    LSBP_CAMERA_MOTION_COMPENSATION_NONE = 0,
+    LSBP_CAMERA_MOTION_COMPENSATION_LK
+};
+
+/** @brief Implementation of the different yet better algorithm which is called GSOC, as it was implemented during GSOC and was not originated from any paper.
+
+This algorithm demonstrates better performance on CDNET 2014 dataset compared to other algorithms in OpenCV.
+ */
+class CV_EXPORTS_W BackgroundSubtractorGSOC : public BackgroundSubtractor
+{
+public:
+    // BackgroundSubtractor interface
+    CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0;
+
+    CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0;
+};
+
+/** @brief Background Subtraction using Local SVD Binary Pattern. More details about the algorithm can be found at @cite LGuo2016
+ */
+class CV_EXPORTS_W BackgroundSubtractorLSBP : public BackgroundSubtractor
+{
+public:
+    // BackgroundSubtractor interface
+    CV_WRAP virtual void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE = 0;
+
+    CV_WRAP virtual void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE = 0;
+};
+
+/** @brief This is for calculation of the LSBP descriptors.
+ */
+class CV_EXPORTS_W BackgroundSubtractorLSBPDesc
+{
+public:
+    static void calcLocalSVDValues(OutputArray localSVDValues, const Mat& frame);
+
+    static void computeFromLocalSVDValues(OutputArray desc, const Mat& localSVDValues, const Point2i* LSBPSamplePoints);
+
+    static void compute(OutputArray desc, const Mat& frame, const Point2i* LSBPSamplePoints);
+};
+
+/** @brief Creates an instance of BackgroundSubtractorGSOC algorithm.
+
+Implementation of the different yet better algorithm which is called GSOC, as it was implemented during GSOC and was not originated from any paper.
+
+@param mc Whether to use camera motion compensation.
+@param nSamples Number of samples to maintain at each point of the frame.
+@param replaceRate Probability of replacing the old sample - how fast the model will update itself.
+@param propagationRate Probability of propagating to neighbors.
+@param hitsThreshold How many positives the sample must get before it will be considered as a possible replacement.
+@param alpha Scale coefficient for threshold.
+@param beta Bias coefficient for threshold.
+@param blinkingSupressionDecay Blinking supression decay factor.
+@param blinkingSupressionMultiplier Blinking supression multiplier.
+@param noiseRemovalThresholdFacBG Strength of the noise removal for background points.
+@param noiseRemovalThresholdFacFG Strength of the noise removal for foreground points.
+ */
+CV_EXPORTS_W Ptr<BackgroundSubtractorGSOC> createBackgroundSubtractorGSOC(int mc = LSBP_CAMERA_MOTION_COMPENSATION_NONE, int nSamples = 20, float replaceRate = 0.003f, float propagationRate = 0.01f, int hitsThreshold = 32, float alpha = 0.01f, float beta = 0.0022f, float blinkingSupressionDecay = 0.1f, float blinkingSupressionMultiplier = 0.1f, float noiseRemovalThresholdFacBG = 0.0004f, float noiseRemovalThresholdFacFG = 0.0008f);
+
+/** @brief Creates an instance of BackgroundSubtractorLSBP algorithm.
+
+Background Subtraction using Local SVD Binary Pattern. More details about the algorithm can be found at @cite LGuo2016
+
+@param mc Whether to use camera motion compensation.
+@param nSamples Number of samples to maintain at each point of the frame.
+@param LSBPRadius LSBP descriptor radius.
+@param Tlower Lower bound for T-values. See @cite LGuo2016 for details.
+@param Tupper Upper bound for T-values. See @cite LGuo2016 for details.
+@param Tinc Increase step for T-values. See @cite LGuo2016 for details.
+@param Tdec Decrease step for T-values. See @cite LGuo2016 for details.
+@param Rscale Scale coefficient for threshold values.
+@param Rincdec Increase/Decrease step for threshold values.
+@param noiseRemovalThresholdFacBG Strength of the noise removal for background points.
+@param noiseRemovalThresholdFacFG Strength of the noise removal for foreground points.
+@param LSBPthreshold Threshold for LSBP binary string.
+@param minCount Minimal number of matches for sample to be considered as foreground.
+ */
+CV_EXPORTS_W Ptr<BackgroundSubtractorLSBP> createBackgroundSubtractorLSBP(int mc = LSBP_CAMERA_MOTION_COMPENSATION_NONE, int nSamples = 20, int LSBPRadius = 16, float Tlower = 2.0f, float Tupper = 32.0f, float Tinc = 1.0f, float Tdec = 0.05f, float Rscale = 10.0f, float Rincdec = 0.005f, float noiseRemovalThresholdFacBG = 0.0004f, float noiseRemovalThresholdFacFG = 0.0008f, int LSBPthreshold = 8, int minCount = 2);
+
+/** @brief Synthetic frame sequence generator for testing background subtraction algorithms.
+
+ It will generate the moving object on top of the background.
+ It will apply some distortion to the background to make the test more complex.
+ */
+class CV_EXPORTS_W SyntheticSequenceGenerator : public Algorithm
+{
+private:
+    const double amplitude;
+    const double wavelength;
+    const double wavespeed;
+    const double objspeed;
+    unsigned timeStep;
+    Point2d pos;
+    Point2d dir;
+    Mat background;
+    Mat object;
+    RNG rng;
+
+public:
+    /** @brief Creates an instance of SyntheticSequenceGenerator.
+
+    @param background Background image for object.
+    @param object Object image which will move slowly over the background.
+    @param amplitude Amplitude of wave distortion applied to background.
+    @param wavelength Length of waves in distortion applied to background.
+    @param wavespeed How fast waves will move.
+    @param objspeed How fast object will fly over background.
+     */
+    CV_WRAP SyntheticSequenceGenerator(InputArray background, InputArray object, double amplitude, double wavelength, double wavespeed, double objspeed);
+
+    /** @brief Obtain the next frame in the sequence.
+
+    @param frame Output frame.
+    @param gtMask Output ground-truth (reference) segmentation mask object/background.
+     */
+    CV_WRAP void getNextFrame(OutputArray frame, OutputArray gtMask);
+};
+
+/** @brief Creates an instance of SyntheticSequenceGenerator.
+
+@param background Background image for object.
+@param object Object image which will move slowly over the background.
+@param amplitude Amplitude of wave distortion applied to background.
+@param wavelength Length of waves in distortion applied to background.
+@param wavespeed How fast waves will move.
+@param objspeed How fast object will fly over background.
+ */
+CV_EXPORTS_W Ptr<SyntheticSequenceGenerator> createSyntheticSequenceGenerator(InputArray background, InputArray object, double amplitude = 2.0, double wavelength = 20.0, double wavespeed = 0.2, double objspeed = 6.0);
+
+//! @}
+
+}
+}
+
+#endif
+#endif
--- a/3rdparty/opencv/inc/opencv2/bioinspired.hpp
+++ b/3rdparty/opencv/inc/opencv2/bioinspired.hpp
@@ -0,0 +1,60 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_BIOINSPIRED_HPP__
+#define __OPENCV_BIOINSPIRED_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/bioinspired/retina.hpp"
+#include "opencv2/bioinspired/retinafasttonemapping.hpp"
+#include "opencv2/bioinspired/transientareassegmentationmodule.hpp"
+
+/** @defgroup bioinspired Biologically inspired vision models and derivated tools
+
+The module provides biological visual systems models (human visual system and others). It also
+provides derivated objects that take advantage of those bio-inspired models.
+
+@ref bioinspired_retina
+
+*/
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/bioinspired/bioinspired.hpp
+++ b/3rdparty/opencv/inc/opencv2/bioinspired/bioinspired.hpp
@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
+
+#include "opencv2/bioinspired.hpp"
--- a/3rdparty/opencv/inc/opencv2/bioinspired/retina.hpp
+++ b/3rdparty/opencv/inc/opencv2/bioinspired/retina.hpp
@@ -0,0 +1,454 @@
+/*#******************************************************************************
+ ** IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ **
+ ** By downloading, copying, installing or using the software you agree to this license.
+ ** If you do not agree to this license, do not download, install,
+ ** copy or use the software.
+ **
+ **
+ ** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+ ** Use: extract still images & image sequences features, from contours details to motion spatio-temporal features, etc. for high level visual scene analysis. Also contribute to image enhancement/compression such as tone mapping.
+ **
+ ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
+ **
+ **  Creation - enhancement process 2007-2015
+ **      Author: Alexandre Benoit (benoit.alexandre.vision@gmail.com), LISTIC lab, Annecy le vieux, France
+ **
+ ** Theses algorithm have been developped by Alexandre BENOIT since his thesis with Alice Caplier at Gipsa-Lab (www.gipsa-lab.inpg.fr) and the research he pursues at LISTIC Lab (www.listic.univ-savoie.fr).
+ ** Refer to the following research paper for more information:
+ ** Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+ ** This work have been carried out thanks to Jeanny Herault who's research and great discussions are the basis of all this work, please take a look at his book:
+ ** Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+ **
+ ** The retina filter includes the research contributions of phd/research collegues from which code has been redrawn by the author :
+ ** _take a look at the retinacolor.hpp module to discover Brice Chaix de Lavarene color mosaicing/demosaicing and the reference paper:
+ ** ====> B. Chaix de Lavarene, D. Alleysson, B. Durette, J. Herault (2007). "Efficient demosaicing through recursive filtering", IEEE International Conference on Image Processing ICIP 2007
+ ** _take a look at imagelogpolprojection.hpp to discover retina spatial log sampling which originates from Barthelemy Durette phd with Jeanny Herault. A Retina / V1 cortex projection is also proposed and originates from Jeanny's discussions.
+ ** ====> more informations in the above cited Jeanny Heraults's book.
+ **
+ **                          License Agreement
+ **               For Open Source Computer Vision Library
+ **
+ ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+ **
+ **               For Human Visual System tools (bioinspired)
+ ** Copyright (C) 2007-2015, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
+ **
+ ** Third party copyrights are property of their respective owners.
+ **
+ ** Redistribution and use in source and binary forms, with or without modification,
+ ** are permitted provided that the following conditions are met:
+ **
+ ** * Redistributions of source code must retain the above copyright notice,
+ **    this list of conditions and the following disclaimer.
+ **
+ ** * Redistributions in binary form must reproduce the above copyright notice,
+ **    this list of conditions and the following disclaimer in the documentation
+ **    and/or other materials provided with the distribution.
+ **
+ ** * The name of the copyright holders may not be used to endorse or promote products
+ **    derived from this software without specific prior written permission.
+ **
+ ** This software is provided by the copyright holders and contributors "as is" and
+ ** any express or implied warranties, including, but not limited to, the implied
+ ** warranties of merchantability and fitness for a particular purpose are disclaimed.
+ ** In no event shall the Intel Corporation or contributors be liable for any direct,
+ ** indirect, incidental, special, exemplary, or consequential damages
+ ** (including, but not limited to, procurement of substitute goods or services;
+ ** loss of use, data, or profits; or business interruption) however caused
+ ** and on any theory of liability, whether in contract, strict liability,
+ ** or tort (including negligence or otherwise) arising in any way out of
+ ** the use of this software, even if advised of the possibility of such damage.
+ *******************************************************************************/
+
+#ifndef __OPENCV_BIOINSPIRED_RETINA_HPP__
+#define __OPENCV_BIOINSPIRED_RETINA_HPP__
+
+/**
+@file
+@date Jul 19, 2011
+@author Alexandre Benoit
+*/
+
+#include "opencv2/core.hpp" // for all OpenCV core functionalities access, including cv::Exception support
+
+
+namespace cv{
+namespace bioinspired{
+
+//! @addtogroup bioinspired
+//! @{
+
+enum {
+    RETINA_COLOR_RANDOM, //!< each pixel position is either R, G or B in a random choice
+    RETINA_COLOR_DIAGONAL,//!< color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR...
+    RETINA_COLOR_BAYER//!< standard bayer sampling
+};
+
+
+/** @brief retina model parameters structure
+
+    For better clarity, check explenations on the comments of methods : setupOPLandIPLParvoChannel and setupIPLMagnoChannel
+
+    Here is the default configuration file of the retina module. It gives results such as the first
+    retina output shown on the top of this page.
+
+    @code{xml}
+    <?xml version="1.0"?>
+    <opencv_storage>
+    <OPLandIPLparvo>
+        <colorMode>1</colorMode>
+        <normaliseOutput>1</normaliseOutput>
+        <photoreceptorsLocalAdaptationSensitivity>7.5e-01</photoreceptorsLocalAdaptationSensitivity>
+        <photoreceptorsTemporalConstant>9.0e-01</photoreceptorsTemporalConstant>
+        <photoreceptorsSpatialConstant>5.3e-01</photoreceptorsSpatialConstant>
+        <horizontalCellsGain>0.01</horizontalCellsGain>
+        <hcellsTemporalConstant>0.5</hcellsTemporalConstant>
+        <hcellsSpatialConstant>7.</hcellsSpatialConstant>
+        <ganglionCellsSensitivity>7.5e-01</ganglionCellsSensitivity></OPLandIPLparvo>
+    <IPLmagno>
+        <normaliseOutput>1</normaliseOutput>
+        <parasolCells_beta>0.</parasolCells_beta>
+        <parasolCells_tau>0.</parasolCells_tau>
+        <parasolCells_k>7.</parasolCells_k>
+        <amacrinCellsTemporalCutFrequency>2.0e+00</amacrinCellsTemporalCutFrequency>
+        <V0CompressionParameter>9.5e-01</V0CompressionParameter>
+        <localAdaptintegration_tau>0.</localAdaptintegration_tau>
+        <localAdaptintegration_k>7.</localAdaptintegration_k></IPLmagno>
+    </opencv_storage>
+    @endcode
+
+    Here is the 'realistic" setup used to obtain the second retina output shown on the top of this page.
+
+    @code{xml}
+    <?xml version="1.0"?>
+    <opencv_storage>
+    <OPLandIPLparvo>
+      <colorMode>1</colorMode>
+      <normaliseOutput>1</normaliseOutput>
+      <photoreceptorsLocalAdaptationSensitivity>8.9e-01</photoreceptorsLocalAdaptationSensitivity>
+      <photoreceptorsTemporalConstant>9.0e-01</photoreceptorsTemporalConstant>
+      <photoreceptorsSpatialConstant>5.3e-01</photoreceptorsSpatialConstant>
+      <horizontalCellsGain>0.3</horizontalCellsGain>
+      <hcellsTemporalConstant>0.5</hcellsTemporalConstant>
+      <hcellsSpatialConstant>7.</hcellsSpatialConstant>
+      <ganglionCellsSensitivity>8.9e-01</ganglionCellsSensitivity></OPLandIPLparvo>
+    <IPLmagno>
+      <normaliseOutput>1</normaliseOutput>
+      <parasolCells_beta>0.</parasolCells_beta>
+      <parasolCells_tau>0.</parasolCells_tau>
+      <parasolCells_k>7.</parasolCells_k>
+      <amacrinCellsTemporalCutFrequency>2.0e+00</amacrinCellsTemporalCutFrequency>
+      <V0CompressionParameter>9.5e-01</V0CompressionParameter>
+      <localAdaptintegration_tau>0.</localAdaptintegration_tau>
+      <localAdaptintegration_k>7.</localAdaptintegration_k></IPLmagno>
+    </opencv_storage>
+    @endcode
+      */
+    struct RetinaParameters{
+        //! Outer Plexiform Layer (OPL) and Inner Plexiform Layer Parvocellular (IplParvo) parameters
+        struct OPLandIplParvoParameters{
+               OPLandIplParvoParameters():colorMode(true),
+                                 normaliseOutput(true),
+                                 photoreceptorsLocalAdaptationSensitivity(0.75f),
+                                 photoreceptorsTemporalConstant(0.9f),
+                                 photoreceptorsSpatialConstant(0.53f),
+                                 horizontalCellsGain(0.01f),
+                                 hcellsTemporalConstant(0.5f),
+                                 hcellsSpatialConstant(7.f),
+                                 ganglionCellsSensitivity(0.75f) { } // default setup
+               bool colorMode, normaliseOutput;
+               float photoreceptorsLocalAdaptationSensitivity, photoreceptorsTemporalConstant, photoreceptorsSpatialConstant, horizontalCellsGain, hcellsTemporalConstant, hcellsSpatialConstant, ganglionCellsSensitivity;
+        };
+        //! Inner Plexiform Layer Magnocellular channel (IplMagno)
+        struct IplMagnoParameters{
+            IplMagnoParameters():
+                          normaliseOutput(true),
+                          parasolCells_beta(0.f),
+                          parasolCells_tau(0.f),
+                          parasolCells_k(7.f),
+                          amacrinCellsTemporalCutFrequency(2.0f),
+                          V0CompressionParameter(0.95f),
+                          localAdaptintegration_tau(0.f),
+                          localAdaptintegration_k(7.f) { } // default setup
+            bool normaliseOutput;
+            float parasolCells_beta, parasolCells_tau, parasolCells_k, amacrinCellsTemporalCutFrequency, V0CompressionParameter, localAdaptintegration_tau, localAdaptintegration_k;
+        };
+        OPLandIplParvoParameters OPLandIplParvo;
+        IplMagnoParameters IplMagno;
+    };
+
+
+
+/** @brief class which allows the Gipsa/Listic Labs model to be used with OpenCV.
+
+This retina model allows spatio-temporal image processing (applied on still images, video sequences).
+As a summary, these are the retina model properties:
+- It applies a spectral whithening (mid-frequency details enhancement)
+- high frequency spatio-temporal noise reduction
+- low frequency luminance to be reduced (luminance range compression)
+- local logarithmic luminance compression allows details to be enhanced in low light conditions
+
+USE : this model can be used basically for spatio-temporal video effects but also for :
+     _using the getParvo method output matrix : texture analysiswith enhanced signal to noise ratio and enhanced details robust against input images luminance ranges
+     _using the getMagno method output matrix : motion analysis also with the previously cited properties
+
+for more information, reer to the following papers :
+Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+
+The retina filter includes the research contributions of phd/research collegues from which code has been redrawn by the author :
+take a look at the retinacolor.hpp module to discover Brice Chaix de Lavarene color mosaicing/demosaicing and the reference paper:
+B. Chaix de Lavarene, D. Alleysson, B. Durette, J. Herault (2007). "Efficient demosaicing through recursive filtering", IEEE International Conference on Image Processing ICIP 2007
+take a look at imagelogpolprojection.hpp to discover retina spatial log sampling which originates from Barthelemy Durette phd with Jeanny Herault. A Retina / V1 cortex projection is also proposed and originates from Jeanny's discussions.
+more informations in the above cited Jeanny Heraults's book.
+ */
+class CV_EXPORTS_W Retina : public Algorithm {
+
+public:
+
+
+    /** @brief Retreive retina input buffer size
+    @return the retina input buffer size
+     */
+    CV_WRAP virtual Size getInputSize()=0;
+
+    /** @brief Retreive retina output buffer size that can be different from the input if a spatial log
+    transformation is applied
+    @return the retina output buffer size
+     */
+    CV_WRAP virtual Size getOutputSize()=0;
+
+    /** @brief Try to open an XML retina parameters file to adjust current retina instance setup
+
+    - if the xml file does not exist, then default setup is applied
+    - warning, Exceptions are thrown if read XML file is not valid
+    @param retinaParameterFile the parameters filename
+    @param applyDefaultSetupOnFailure set to true if an error must be thrown on error
+
+    You can retrieve the current parameters structure using the method Retina::getParameters and update
+    it before running method Retina::setup.
+     */
+    CV_WRAP virtual void setup(String retinaParameterFile="", const bool applyDefaultSetupOnFailure=true)=0;
+
+    /** @overload
+    @param fs the open Filestorage which contains retina parameters
+    @param applyDefaultSetupOnFailure set to true if an error must be thrown on error
+    */
+    virtual void setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure=true)=0;
+
+    /** @overload
+    @param newParameters a parameters structures updated with the new target configuration.
+    */
+    virtual void setup(RetinaParameters newParameters)=0;
+
+    /**
+    @return the current parameters setup
+    */
+    virtual RetinaParameters getParameters()=0;
+
+    /** @brief Outputs a string showing the used parameters setup
+    @return a string which contains formated parameters information
+     */
+    CV_WRAP virtual const String printSetup()=0;
+
+    /** @brief Write xml/yml formated parameters information
+    @param fs the filename of the xml file that will be open and writen with formatted parameters
+    information
+     */
+    CV_WRAP virtual void write( String fs ) const=0;
+
+    /** @overload */
+    virtual void write( FileStorage& fs ) const CV_OVERRIDE = 0;
+
+    /** @brief Setup the OPL and IPL parvo channels (see biologocal model)
+
+    OPL is referred as Outer Plexiform Layer of the retina, it allows the spatio-temporal filtering
+    which withens the spectrum and reduces spatio-temporal noise while attenuating global luminance
+    (low frequency energy) IPL parvo is the OPL next processing stage, it refers to a part of the
+    Inner Plexiform layer of the retina, it allows high contours sensitivity in foveal vision. See
+    reference papers for more informations.
+    for more informations, please have a look at the paper Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+    @param colorMode specifies if (true) color is processed of not (false) to then processing gray
+    level image
+    @param normaliseOutput specifies if (true) output is rescaled between 0 and 255 of not (false)
+    @param photoreceptorsLocalAdaptationSensitivity the photoreceptors sensitivity renage is 0-1
+    (more log compression effect when value increases)
+    @param photoreceptorsTemporalConstant the time constant of the first order low pass filter of
+    the photoreceptors, use it to cut high temporal frequencies (noise or fast motion), unit is
+    frames, typical value is 1 frame
+    @param photoreceptorsSpatialConstant the spatial constant of the first order low pass filter of
+    the photoreceptors, use it to cut high spatial frequencies (noise or thick contours), unit is
+    pixels, typical value is 1 pixel
+    @param horizontalCellsGain gain of the horizontal cells network, if 0, then the mean value of
+    the output is zero, if the parameter is near 1, then, the luminance is not filtered and is
+    still reachable at the output, typicall value is 0
+    @param HcellsTemporalConstant the time constant of the first order low pass filter of the
+    horizontal cells, use it to cut low temporal frequencies (local luminance variations), unit is
+    frames, typical value is 1 frame, as the photoreceptors
+    @param HcellsSpatialConstant the spatial constant of the first order low pass filter of the
+    horizontal cells, use it to cut low spatial frequencies (local luminance), unit is pixels,
+    typical value is 5 pixel, this value is also used for local contrast computing when computing
+    the local contrast adaptation at the ganglion cells level (Inner Plexiform Layer parvocellular
+    channel model)
+    @param ganglionCellsSensitivity the compression strengh of the ganglion cells local adaptation
+    output, set a value between 0.6 and 1 for best results, a high value increases more the low
+    value sensitivity... and the output saturates faster, recommended value: 0.7
+     */
+    CV_WRAP virtual void setupOPLandIPLParvoChannel(const bool colorMode=true, const bool normaliseOutput = true, const float photoreceptorsLocalAdaptationSensitivity=0.7f, const float photoreceptorsTemporalConstant=0.5f, const float photoreceptorsSpatialConstant=0.53f, const float horizontalCellsGain=0.f, const float HcellsTemporalConstant=1.f, const float HcellsSpatialConstant=7.f, const float ganglionCellsSensitivity=0.7f)=0;
+
+    /** @brief Set parameters values for the Inner Plexiform Layer (IPL) magnocellular channel
+
+    this channel processes signals output from OPL processing stage in peripheral vision, it allows
+    motion information enhancement. It is decorrelated from the details channel. See reference
+    papers for more details.
+
+    @param normaliseOutput specifies if (true) output is rescaled between 0 and 255 of not (false)
+    @param parasolCells_beta the low pass filter gain used for local contrast adaptation at the
+    IPL level of the retina (for ganglion cells local adaptation), typical value is 0
+    @param parasolCells_tau the low pass filter time constant used for local contrast adaptation
+    at the IPL level of the retina (for ganglion cells local adaptation), unit is frame, typical
+    value is 0 (immediate response)
+    @param parasolCells_k the low pass filter spatial constant used for local contrast adaptation
+    at the IPL level of the retina (for ganglion cells local adaptation), unit is pixels, typical
+    value is 5
+    @param amacrinCellsTemporalCutFrequency the time constant of the first order high pass fiter of
+    the magnocellular way (motion information channel), unit is frames, typical value is 1.2
+    @param V0CompressionParameter the compression strengh of the ganglion cells local adaptation
+    output, set a value between 0.6 and 1 for best results, a high value increases more the low
+    value sensitivity... and the output saturates faster, recommended value: 0.95
+    @param localAdaptintegration_tau specifies the temporal constant of the low pas filter
+    involved in the computation of the local "motion mean" for the local adaptation computation
+    @param localAdaptintegration_k specifies the spatial constant of the low pas filter involved
+    in the computation of the local "motion mean" for the local adaptation computation
+     */
+    CV_WRAP virtual void setupIPLMagnoChannel(const bool normaliseOutput = true, const float parasolCells_beta=0.f, const float parasolCells_tau=0.f, const float parasolCells_k=7.f, const float amacrinCellsTemporalCutFrequency=1.2f, const float V0CompressionParameter=0.95f, const float localAdaptintegration_tau=0.f, const float localAdaptintegration_k=7.f)=0;
+
+    /** @brief Method which allows retina to be applied on an input image,
+
+    after run, encapsulated retina module is ready to deliver its outputs using dedicated
+    acccessors, see getParvo and getMagno methods
+    @param inputImage the input Mat image to be processed, can be gray level or BGR coded in any
+    format (from 8bit to 16bits)
+     */
+    CV_WRAP virtual void run(InputArray inputImage)=0;
+
+    /** @brief Method which processes an image in the aim to correct its luminance correct
+    backlight problems, enhance details in shadows.
+
+    This method is designed to perform High Dynamic Range image tone mapping (compress \>8bit/pixel
+    images to 8bit/pixel). This is a simplified version of the Retina Parvocellular model
+    (simplified version of the run/getParvo methods call) since it does not include the
+    spatio-temporal filter modelling the Outer Plexiform Layer of the retina that performs spectral
+    whitening and many other stuff. However, it works great for tone mapping and in a faster way.
+
+    Check the demos and experiments section to see examples and the way to perform tone mapping
+    using the original retina model and the method.
+
+    @param inputImage the input image to process (should be coded in float format : CV_32F,
+    CV_32FC1, CV_32F_C3, CV_32F_C4, the 4th channel won't be considered).
+    @param outputToneMappedImage the output 8bit/channel tone mapped image (CV_8U or CV_8UC3 format).
+     */
+    CV_WRAP virtual void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)=0;
+
+    /** @brief Accessor of the details channel of the retina (models foveal vision).
+
+    Warning, getParvoRAW methods return buffers that are not rescaled within range [0;255] while
+    the non RAW method allows a normalized matrix to be retrieved.
+
+    @param retinaOutput_parvo the output buffer (reallocated if necessary), format can be :
+    -   a Mat, this output is rescaled for standard 8bits image processing use in OpenCV
+    -   RAW methods actually return a 1D matrix (encoding is R1, R2, ... Rn, G1, G2, ..., Gn, B1,
+    B2, ...Bn), this output is the original retina filter model output, without any
+    quantification or rescaling.
+    @see getParvoRAW
+     */
+    CV_WRAP virtual void getParvo(OutputArray retinaOutput_parvo)=0;
+
+    /** @brief Accessor of the details channel of the retina (models foveal vision).
+    @see getParvo
+     */
+    CV_WRAP virtual void getParvoRAW(OutputArray retinaOutput_parvo)=0;
+
+    /** @brief Accessor of the motion channel of the retina (models peripheral vision).
+
+    Warning, getMagnoRAW methods return buffers that are not rescaled within range [0;255] while
+    the non RAW method allows a normalized matrix to be retrieved.
+    @param retinaOutput_magno the output buffer (reallocated if necessary), format can be :
+    -   a Mat, this output is rescaled for standard 8bits image processing use in OpenCV
+    -   RAW methods actually return a 1D matrix (encoding is M1, M2,... Mn), this output is the
+    original retina filter model output, without any quantification or rescaling.
+    @see getMagnoRAW
+     */
+    CV_WRAP virtual void getMagno(OutputArray retinaOutput_magno)=0;
+
+    /** @brief Accessor of the motion channel of the retina (models peripheral vision).
+    @see getMagno
+     */
+    CV_WRAP virtual void getMagnoRAW(OutputArray retinaOutput_magno)=0;
+
+    /** @overload */
+    CV_WRAP virtual const Mat getMagnoRAW() const=0;
+    /** @overload */
+    CV_WRAP virtual const Mat getParvoRAW() const=0;
+
+    /** @brief Activate color saturation as the final step of the color demultiplexing process -\> this
+    saturation is a sigmoide function applied to each channel of the demultiplexed image.
+    @param saturateColors boolean that activates color saturation (if true) or desactivate (if false)
+    @param colorSaturationValue the saturation factor : a simple factor applied on the chrominance
+    buffers
+     */
+    CV_WRAP virtual void setColorSaturation(const bool saturateColors=true, const float colorSaturationValue=4.0f)=0;
+
+    /** @brief Clears all retina buffers
+
+    (equivalent to opening the eyes after a long period of eye close ;o) whatchout the temporal
+    transition occuring just after this method call.
+     */
+    CV_WRAP virtual void clearBuffers()=0;
+
+    /** @brief Activate/desactivate the Magnocellular pathway processing (motion information extraction), by
+    default, it is activated
+    @param activate true if Magnocellular output should be activated, false if not... if activated,
+    the Magnocellular output can be retrieved using the **getMagno** methods
+     */
+    CV_WRAP virtual void activateMovingContoursProcessing(const bool activate)=0;
+
+    /** @brief Activate/desactivate the Parvocellular pathway processing (contours information extraction), by
+    default, it is activated
+    @param activate true if Parvocellular (contours information extraction) output should be
+    activated, false if not... if activated, the Parvocellular output can be retrieved using the
+    Retina::getParvo methods
+     */
+    CV_WRAP virtual void activateContoursProcessing(const bool activate)=0;
+
+    /** @overload */
+    CV_WRAP static Ptr<Retina> create(Size inputSize);
+    /** @brief Constructors from standardized interfaces : retreive a smart pointer to a Retina instance
+
+    @param inputSize the input frame size
+    @param colorMode the chosen processing mode : with or without color processing
+    @param colorSamplingMethod specifies which kind of color sampling will be used :
+    -   cv::bioinspired::RETINA_COLOR_RANDOM: each pixel position is either R, G or B in a random choice
+    -   cv::bioinspired::RETINA_COLOR_DIAGONAL: color sampling is RGBRGBRGB..., line 2 BRGBRGBRG..., line 3, GBRGBRGBR...
+    -   cv::bioinspired::RETINA_COLOR_BAYER: standard bayer sampling
+    @param useRetinaLogSampling activate retina log sampling, if true, the 2 following parameters can
+    be used
+    @param reductionFactor only usefull if param useRetinaLogSampling=true, specifies the reduction
+    factor of the output frame (as the center (fovea) is high resolution and corners can be
+    underscaled, then a reduction of the output is allowed without precision leak
+    @param samplingStrength only usefull if param useRetinaLogSampling=true, specifies the strength of
+    the log scale that is applied
+     */
+    CV_WRAP static Ptr<Retina> create(Size inputSize, const bool colorMode,
+                                           int colorSamplingMethod=RETINA_COLOR_BAYER,
+                                           const bool useRetinaLogSampling=false,
+                                           const float reductionFactor=1.0f, const float samplingStrength=10.0f);
+};
+
+//! @}
+
+}
+}
+#endif /* __OPENCV_BIOINSPIRED_RETINA_HPP__ */
--- a/3rdparty/opencv/inc/opencv2/bioinspired/retinafasttonemapping.hpp
+++ b/3rdparty/opencv/inc/opencv2/bioinspired/retinafasttonemapping.hpp
@@ -0,0 +1,138 @@
+
+/*#******************************************************************************
+ ** IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ **
+ ** By downloading, copying, installing or using the software you agree to this license.
+ ** If you do not agree to this license, do not download, install,
+ ** copy or use the software.
+ **
+ **
+ ** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models. Presented models originate from Jeanny Herault's original research and have been reused and adapted by the author&collaborators for computed vision applications since his thesis with Alice Caplier at Gipsa-Lab.
+ **
+ ** Maintainers : Listic lab (code author current affiliation & applications) and Gipsa Lab (original research origins & applications)
+ **
+ **  Creation - enhancement process 2007-2013
+ **      Author: Alexandre Benoit (benoit.alexandre.vision@gmail.com), LISTIC lab, Annecy le vieux, France
+ **
+ ** Theses algorithm have been developped by Alexandre BENOIT since his thesis with Alice Caplier at Gipsa-Lab (www.gipsa-lab.inpg.fr) and the research he pursues at LISTIC Lab (www.listic.univ-savoie.fr).
+ ** Refer to the following research paper for more information:
+ ** Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+ ** This work have been carried out thanks to Jeanny Herault who's research and great discussions are the basis of all this work, please take a look at his book:
+ ** Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+ **
+ **
+ **
+ **
+ **
+ ** This class is based on image processing tools of the author and already used within the Retina class (this is the same code as method retina::applyFastToneMapping, but in an independent class, it is ligth from a memory requirement point of view). It implements an adaptation of the efficient tone mapping algorithm propose by David Alleyson, Sabine Susstruck and Laurence Meylan's work, please cite:
+ ** -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+ **
+ **
+ **                          License Agreement
+ **               For Open Source Computer Vision Library
+ **
+ ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+ **
+ **               For Human Visual System tools (bioinspired)
+ ** Copyright (C) 2007-2011, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
+ **
+ ** Third party copyrights are property of their respective owners.
+ **
+ ** Redistribution and use in source and binary forms, with or without modification,
+ ** are permitted provided that the following conditions are met:
+ **
+ ** * Redistributions of source code must retain the above copyright notice,
+ **    this list of conditions and the following disclaimer.
+ **
+ ** * Redistributions in binary form must reproduce the above copyright notice,
+ **    this list of conditions and the following disclaimer in the documentation
+ **    and/or other materials provided with the distribution.
+ **
+ ** * The name of the copyright holders may not be used to endorse or promote products
+ **    derived from this software without specific prior written permission.
+ **
+ ** This software is provided by the copyright holders and contributors "as is" and
+ ** any express or implied warranties, including, but not limited to, the implied
+ ** warranties of merchantability and fitness for a particular purpose are disclaimed.
+ ** In no event shall the Intel Corporation or contributors be liable for any direct,
+ ** indirect, incidental, special, exemplary, or consequential damages
+ ** (including, but not limited to, procurement of substitute goods or services;
+ ** loss of use, data, or profits; or business interruption) however caused
+ ** and on any theory of liability, whether in contract, strict liability,
+ ** or tort (including negligence or otherwise) arising in any way out of
+ ** the use of this software, even if advised of the possibility of such damage.
+ *******************************************************************************/
+
+#ifndef __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__
+#define __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__
+
+/**
+@file
+@date May 26, 2013
+@author Alexandre Benoit
+ */
+
+#include "opencv2/core.hpp" // for all OpenCV core functionalities access, including cv::Exception support
+
+namespace cv{
+namespace bioinspired{
+
+//! @addtogroup bioinspired
+//! @{
+
+/** @brief  a wrapper class which allows the tone mapping algorithm of Meylan&al(2007) to be used with OpenCV.
+
+This algorithm is already implemented in thre Retina class (retina::applyFastToneMapping) but used it does not require all the retina model to be allocated. This allows a light memory use for low memory devices (smartphones, etc.
+As a summary, these are the model properties:
+- 2 stages of local luminance adaptation with a different local neighborhood for each.
+- first stage models the retina photorecetors local luminance adaptation
+- second stage models th ganglion cells local information adaptation
+- compared to the initial publication, this class uses spatio-temporal low pass filters instead of spatial only filters.
+  this can help noise robustness and temporal stability for video sequence use cases.
+
+for more information, read to the following papers :
+Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+regarding spatio-temporal filter and the bigger retina model :
+Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+*/
+class CV_EXPORTS_W RetinaFastToneMapping : public Algorithm
+{
+public:
+
+    /** @brief applies a luminance correction (initially High Dynamic Range (HDR) tone mapping)
+
+    using only the 2 local adaptation stages of the retina parvocellular channel : photoreceptors
+    level and ganlion cells level. Spatio temporal filtering is applied but limited to temporal
+    smoothing and eventually high frequencies attenuation. This is a lighter method than the one
+    available using the regular retina::run method. It is then faster but it does not include
+    complete temporal filtering nor retina spectral whitening. Then, it can have a more limited
+    effect on images with a very high dynamic range. This is an adptation of the original still
+    image HDR tone mapping algorithm of David Alleyson, Sabine Susstruck and Laurence Meylan's
+    work, please cite: -> Meylan L., Alleysson D., and Susstrunk S., A Model of Retinal Local
+    Adaptation for the Tone Mapping of Color Filter Array Images, Journal of Optical Society of
+    America, A, Vol. 24, N 9, September, 1st, 2007, pp. 2807-2816
+
+    @param inputImage the input image to process RGB or gray levels
+    @param outputToneMappedImage the output tone mapped image
+    */
+    CV_WRAP virtual void applyFastToneMapping(InputArray inputImage, OutputArray outputToneMappedImage)=0;
+
+    /** @brief updates tone mapping behaviors by adjusing the local luminance computation area
+
+    @param photoreceptorsNeighborhoodRadius the first stage local adaptation area
+    @param ganglioncellsNeighborhoodRadius the second stage local adaptation area
+    @param meanLuminanceModulatorK the factor applied to modulate the meanLuminance information
+    (default is 1, see reference paper)
+     */
+    CV_WRAP virtual void setup(const float photoreceptorsNeighborhoodRadius=3.f, const float ganglioncellsNeighborhoodRadius=1.f, const float meanLuminanceModulatorK=1.f)=0;
+
+    CV_WRAP static Ptr<RetinaFastToneMapping> create(Size inputSize);
+};
+
+
+//! @}
+
+}
+}
+#endif /* __OPENCV_BIOINSPIRED_RETINAFASTTONEMAPPING_HPP__ */
--- a/3rdparty/opencv/inc/opencv2/bioinspired/transientareassegmentationmodule.hpp
+++ b/3rdparty/opencv/inc/opencv2/bioinspired/transientareassegmentationmodule.hpp
@@ -0,0 +1,204 @@
+/*#******************************************************************************
+ ** IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ **
+ ** By downloading, copying, installing or using the software you agree to this license.
+ ** If you do not agree to this license, do not download, install,
+ ** copy or use the software.
+ **
+ **
+ ** bioinspired : interfaces allowing OpenCV users to integrate Human Vision System models.
+ ** TransientAreasSegmentationModule Use: extract areas that present spatio-temporal changes.
+ ** => It should be used at the output of the cv::bioinspired::Retina::getMagnoRAW() output that enhances spatio-temporal changes
+ **
+ ** Maintainers : Listic lab (code author current affiliation & applications)
+ **
+ **  Creation - enhancement process 2007-2015
+ **      Author: Alexandre Benoit (benoit.alexandre.vision@gmail.com), LISTIC lab, Annecy le vieux, France
+ **
+ ** Theses algorithm have been developped by Alexandre BENOIT since his thesis with Alice Caplier at Gipsa-Lab (www.gipsa-lab.inpg.fr) and the research he pursues at LISTIC Lab (www.listic.univ-savoie.fr).
+ ** Refer to the following research paper for more information:
+ ** Strat, S.T.; Benoit, A.; Lambert, P., "Retina enhanced bag of words descriptors for video classification," Signal Processing Conference (EUSIPCO), 2014 Proceedings of the 22nd European , vol., no., pp.1307,1311, 1-5 Sept. 2014 (http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6952461&isnumber=6951911)
+ ** Benoit A., Caplier A., Durette B., Herault, J., "USING HUMAN VISUAL SYSTEM MODELING FOR BIO-INSPIRED LOW LEVEL IMAGE PROCESSING", Elsevier, Computer Vision and Image Understanding 114 (2010), pp. 758-773, DOI: http://dx.doi.org/10.1016/j.cviu.2010.01.011
+ ** This work have been carried out thanks to Jeanny Herault who's research and great discussions are the basis of all this work, please take a look at his book:
+ ** Vision: Images, Signals and Neural Networks: Models of Neural Processing in Visual Perception (Progress in Neural Processing),By: Jeanny Herault, ISBN: 9814273686. WAPI (Tower ID): 113266891.
+ **
+ **
+ **                          License Agreement
+ **               For Open Source Computer Vision Library
+ **
+ ** Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+ ** Copyright (C) 2008-2011, Willow Garage Inc., all rights reserved.
+ **
+ **               For Human Visual System tools (bioinspired)
+ ** Copyright (C) 2007-2015, LISTIC Lab, Annecy le Vieux and GIPSA Lab, Grenoble, France, all rights reserved.
+ **
+ ** Third party copyrights are property of their respective owners.
+ **
+ ** Redistribution and use in source and binary forms, with or without modification,
+ ** are permitted provided that the following conditions are met:
+ **
+ ** * Redistributions of source code must retain the above copyright notice,
+ **    this list of conditions and the following disclaimer.
+ **
+ ** * Redistributions in binary form must reproduce the above copyright notice,
+ **    this list of conditions and the following disclaimer in the documentation
+ **    and/or other materials provided with the distribution.
+ **
+ ** * The name of the copyright holders may not be used to endorse or promote products
+ **    derived from this software without specific prior written permission.
+ **
+ ** This software is provided by the copyright holders and contributors "as is" and
+ ** any express or implied warranties, including, but not limited to, the implied
+ ** warranties of merchantability and fitness for a particular purpose are disclaimed.
+ ** In no event shall the Intel Corporation or contributors be liable for any direct,
+ ** indirect, incidental, special, exemplary, or consequential damages
+ ** (including, but not limited to, procurement of substitute goods or services;
+ ** loss of use, data, or profits; or business interruption) however caused
+ ** and on any theory of liability, whether in contract, strict liability,
+ ** or tort (including negligence or otherwise) arising in any way out of
+ ** the use of this software, even if advised of the possibility of such damage.
+ *******************************************************************************/
+
+#ifndef SEGMENTATIONMODULE_HPP_
+#define SEGMENTATIONMODULE_HPP_
+
+/**
+@file
+@date 2007-2013
+@author Alexandre BENOIT, benoit.alexandre.vision@gmail.com
+*/
+
+#include "opencv2/core.hpp" // for all OpenCV core functionalities access, including cv::Exception support
+
+namespace cv
+{
+namespace bioinspired
+{
+//! @addtogroup bioinspired
+//! @{
+
+/** @brief parameter structure that stores the transient events detector setup parameters
+*/
+struct SegmentationParameters{ // CV_EXPORTS_W_MAP to export to python native dictionnaries
+	// default structure instance construction with default values
+	SegmentationParameters():
+	    thresholdON(100),
+	    thresholdOFF(100),
+	    localEnergy_temporalConstant(0.5),
+	    localEnergy_spatialConstant(5),
+	    neighborhoodEnergy_temporalConstant(1),
+	    neighborhoodEnergy_spatialConstant(15),
+	    contextEnergy_temporalConstant(1),
+	    contextEnergy_spatialConstant(75){};
+	// all properties list
+	float thresholdON;
+	float thresholdOFF;
+	//! the time constant of the first order low pass filter, use it to cut high temporal frequencies (noise or fast motion), unit is frames, typical value is 0.5 frame
+	float localEnergy_temporalConstant;
+	//! the spatial constant of the first order low pass filter, use it to cut high spatial frequencies (noise or thick contours), unit is pixels, typical value is 5 pixel
+	float localEnergy_spatialConstant;
+	//! local neighborhood energy filtering parameters : the aim is to get information about the energy neighborhood to perform a center surround energy analysis
+	float neighborhoodEnergy_temporalConstant;
+	float neighborhoodEnergy_spatialConstant;
+	//! context neighborhood energy filtering parameters : the aim is to get information about the energy on a wide neighborhood area to filtered out local effects
+	float contextEnergy_temporalConstant;
+	float contextEnergy_spatialConstant;
+};
+
+/** @brief class which provides a transient/moving areas segmentation module
+
+perform a locally adapted segmentation by using the retina magno input data Based on Alexandre
+BENOIT thesis: "Le système visuel humain au secours de la vision par ordinateur"
+
+3 spatio temporal filters are used:
+- a first one which filters the noise and local variations of the input motion energy
+- a second (more powerfull low pass spatial filter) which gives the neighborhood motion energy the
+segmentation consists in the comparison of these both outputs, if the local motion energy is higher
+to the neighborhood otion energy, then the area is considered as moving and is segmented
+- a stronger third low pass filter helps decision by providing a smooth information about the
+"motion context" in a wider area
+ */
+
+class CV_EXPORTS_W TransientAreasSegmentationModule: public Algorithm
+{
+public:
+
+
+    /** @brief return the sze of the manage input and output images
+    */
+    CV_WRAP virtual Size getSize()=0;
+
+    /** @brief try to open an XML segmentation parameters file to adjust current segmentation instance setup
+
+    - if the xml file does not exist, then default setup is applied
+    - warning, Exceptions are thrown if read XML file is not valid
+    @param segmentationParameterFile : the parameters filename
+    @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error
+     */
+    CV_WRAP virtual void setup(String segmentationParameterFile="", const bool applyDefaultSetupOnFailure=true)=0;
+
+    /** @brief try to open an XML segmentation parameters file to adjust current segmentation instance setup
+
+    - if the xml file does not exist, then default setup is applied
+    - warning, Exceptions are thrown if read XML file is not valid
+    @param fs : the open Filestorage which contains segmentation parameters
+    @param applyDefaultSetupOnFailure : set to true if an error must be thrown on error
+    */
+    virtual void setup(cv::FileStorage &fs, const bool applyDefaultSetupOnFailure=true)=0;
+
+    /** @brief try to open an XML segmentation parameters file to adjust current segmentation instance setup
+
+    - if the xml file does not exist, then default setup is applied
+    - warning, Exceptions are thrown if read XML file is not valid
+    @param newParameters : a parameters structures updated with the new target configuration
+     */
+    virtual void setup(SegmentationParameters newParameters)=0;
+
+    /** @brief return the current parameters setup
+    */
+    virtual SegmentationParameters getParameters()=0;
+
+    /** @brief parameters setup display method
+    @return a string which contains formatted parameters information
+    */
+    CV_WRAP virtual const String printSetup()=0;
+
+    /** @brief write xml/yml formated parameters information
+    @param fs : the filename of the xml file that will be open and writen with formatted parameters information
+    */
+    CV_WRAP virtual void write( String fs ) const=0;
+
+    /** @brief write xml/yml formated parameters information
+    @param fs : a cv::Filestorage object ready to be filled
+    */
+    virtual void write( cv::FileStorage& fs ) const CV_OVERRIDE = 0;
+
+    /** @brief main processing method, get result using methods getSegmentationPicture()
+    @param inputToSegment : the image to process, it must match the instance buffer size !
+    @param channelIndex : the channel to process in case of multichannel images
+    */
+    CV_WRAP virtual void run(InputArray inputToSegment, const int channelIndex=0)=0;
+
+    /** @brief access function
+    return the last segmentation result: a boolean picture which is resampled between 0 and 255 for a display purpose
+    */
+    CV_WRAP virtual void getSegmentationPicture(OutputArray transientAreas)=0;
+
+    /** @brief cleans all the buffers of the instance
+    */
+    CV_WRAP virtual void clearAllBuffers()=0;
+
+    /** @brief allocator
+    @param inputSize : size of the images input to segment (output will be the same size)
+     */
+    CV_WRAP static Ptr<TransientAreasSegmentationModule> create(Size inputSize);
+};
+
+//! @}
+
+}} // namespaces end : cv and bioinspired
+
+
+#endif
+
+
--- a/3rdparty/opencv/inc/opencv2/calib3d.hpp
+++ b/3rdparty/opencv/inc/opencv2/calib3d.hpp
--- a/3rdparty/opencv/inc/opencv2/calib3d/calib3d.hpp
+++ b/3rdparty/opencv/inc/opencv2/calib3d/calib3d.hpp
@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
+
+#include "opencv2/calib3d.hpp"
--- a/3rdparty/opencv/inc/opencv2/calib3d/calib3d_c.h
+++ b/3rdparty/opencv/inc/opencv2/calib3d/calib3d_c.h
@@ -0,0 +1,150 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CALIB3D_C_H
+#define OPENCV_CALIB3D_C_H
+
+#include "opencv2/core/types_c.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Calculates fundamental matrix given a set of corresponding points */
+#define CV_FM_7POINT 1
+#define CV_FM_8POINT 2
+
+#define CV_LMEDS 4
+#define CV_RANSAC 8
+
+#define CV_FM_LMEDS_ONLY  CV_LMEDS
+#define CV_FM_RANSAC_ONLY CV_RANSAC
+#define CV_FM_LMEDS CV_LMEDS
+#define CV_FM_RANSAC CV_RANSAC
+
+enum
+{
+    CV_ITERATIVE = 0,
+    CV_EPNP = 1, // F.Moreno-Noguer, V.Lepetit and P.Fua "EPnP: Efficient Perspective-n-Point Camera Pose Estimation"
+    CV_P3P = 2, // X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
+    CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP"
+};
+
+#define CV_CALIB_CB_ADAPTIVE_THRESH  1
+#define CV_CALIB_CB_NORMALIZE_IMAGE  2
+#define CV_CALIB_CB_FILTER_QUADS     4
+#define CV_CALIB_CB_FAST_CHECK       8
+
+#define CV_CALIB_USE_INTRINSIC_GUESS  1
+#define CV_CALIB_FIX_ASPECT_RATIO     2
+#define CV_CALIB_FIX_PRINCIPAL_POINT  4
+#define CV_CALIB_ZERO_TANGENT_DIST    8
+#define CV_CALIB_FIX_FOCAL_LENGTH 16
+#define CV_CALIB_FIX_K1  32
+#define CV_CALIB_FIX_K2  64
+#define CV_CALIB_FIX_K3  128
+#define CV_CALIB_FIX_K4  2048
+#define CV_CALIB_FIX_K5  4096
+#define CV_CALIB_FIX_K6  8192
+#define CV_CALIB_RATIONAL_MODEL 16384
+#define CV_CALIB_THIN_PRISM_MODEL 32768
+#define CV_CALIB_FIX_S1_S2_S3_S4  65536
+#define CV_CALIB_TILTED_MODEL  262144
+#define CV_CALIB_FIX_TAUX_TAUY  524288
+#define CV_CALIB_FIX_TANGENT_DIST 2097152
+
+#define CV_CALIB_NINTRINSIC 18
+
+#define CV_CALIB_FIX_INTRINSIC  256
+#define CV_CALIB_SAME_FOCAL_LENGTH 512
+
+#define CV_CALIB_ZERO_DISPARITY 1024
+
+/* stereo correspondence parameters and functions */
+#define CV_STEREO_BM_NORMALIZED_RESPONSE  0
+#define CV_STEREO_BM_XSOBEL               1
+
+#ifdef __cplusplus
+} // extern "C"
+
+//////////////////////////////////////////////////////////////////////////////////////////
+class CV_EXPORTS CvLevMarq
+{
+public:
+    CvLevMarq();
+    CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
+              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
+              bool completeSymmFlag=false );
+    ~CvLevMarq();
+    void init( int nparams, int nerrs, CvTermCriteria criteria=
+              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
+              bool completeSymmFlag=false );
+    bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
+    bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );
+
+    void clear();
+    void step();
+    enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 };
+
+    cv::Ptr<CvMat> mask;
+    cv::Ptr<CvMat> prevParam;
+    cv::Ptr<CvMat> param;
+    cv::Ptr<CvMat> J;
+    cv::Ptr<CvMat> err;
+    cv::Ptr<CvMat> JtJ;
+    cv::Ptr<CvMat> JtJN;
+    cv::Ptr<CvMat> JtErr;
+    cv::Ptr<CvMat> JtJV;
+    cv::Ptr<CvMat> JtJW;
+    double prevErrNorm, errNorm;
+    int lambdaLg10;
+    CvTermCriteria criteria;
+    int state;
+    int iters;
+    bool completeSymmFlag;
+    int solveMethod;
+};
+
+#endif
+
+#endif /* OPENCV_CALIB3D_C_H */
--- a/3rdparty/opencv/inc/opencv2/ccalib.hpp
+++ b/3rdparty/opencv/inc/opencv2/ccalib.hpp
@@ -0,0 +1,157 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+ //
+ //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+ //
+ //  By downloading, copying, installing or using the software you agree to this license.
+ //  If you do not agree to this license, do not download, install,
+ //  copy or use the software.
+ //
+ //
+ //                           License Agreement
+ //                For Open Source Computer Vision Library
+ //
+ // Copyright (C) 2014, OpenCV Foundation, all rights reserved.
+ // Third party copyrights are property of their respective owners.
+ //
+ // Redistribution and use in source and binary forms, with or without modification,
+ // are permitted provided that the following conditions are met:
+ //
+ //   * Redistribution's of source code must retain the above copyright notice,
+ //     this list of conditions and the following disclaimer.
+ //
+ //   * Redistribution's in binary form must reproduce the above copyright notice,
+ //     this list of conditions and the following disclaimer in the documentation
+ //     and/or other materials provided with the distribution.
+ //
+ //   * The name of the copyright holders may not be used to endorse or promote products
+ //     derived from this software without specific prior written permission.
+ //
+ // This software is provided by the copyright holders and contributors "as is" and
+ // any express or implied warranties, including, but not limited to, the implied
+ // warranties of merchantability and fitness for a particular purpose are disclaimed.
+ // In no event shall the Intel Corporation or contributors be liable for any direct,
+ // indirect, incidental, special, exemplary, or consequential damages
+ // (including, but not limited to, procurement of substitute goods or services;
+ // loss of use, data, or profits; or business interruption) however caused
+ // and on any theory of liability, whether in contract, strict liability,
+ // or tort (including negligence or otherwise) arising in any way out of
+ // the use of this software, even if advised of the possibility of such damage.
+ //
+ //M*/
+
+#ifndef __OPENCV_CCALIB_HPP__
+#define __OPENCV_CCALIB_HPP__
+
+#include <opencv2/core.hpp>
+#include <opencv2/features2d.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/calib3d.hpp>
+
+#include <vector>
+
+/** @defgroup ccalib Custom Calibration Pattern for 3D reconstruction
+*/
+
+namespace cv{ namespace ccalib{
+
+//! @addtogroup ccalib
+//! @{
+
+class CV_EXPORTS CustomPattern : public Algorithm
+{
+public:
+	CustomPattern();
+	virtual ~CustomPattern();
+
+	bool create(InputArray pattern, const Size2f boardSize, OutputArray output = noArray());
+
+	bool findPattern(InputArray image, OutputArray matched_features, OutputArray pattern_points, const double ratio = 0.7,
+					 const double proj_error = 8.0, const bool refine_position = false, OutputArray out = noArray(),
+					 OutputArray H = noArray(), OutputArray pattern_corners = noArray());
+
+	bool isInitialized();
+
+	void getPatternPoints(std::vector<KeyPoint>& original_points);
+    /**<
+		Returns a vector<Point> of the original points.
+	*/
+	double getPixelSize();
+    /**<
+		Get the pixel size of the pattern
+	*/
+
+	bool setFeatureDetector(Ptr<FeatureDetector> featureDetector);
+	bool setDescriptorExtractor(Ptr<DescriptorExtractor> extractor);
+	bool setDescriptorMatcher(Ptr<DescriptorMatcher> matcher);
+
+	Ptr<FeatureDetector> getFeatureDetector();
+	Ptr<DescriptorExtractor> getDescriptorExtractor();
+	Ptr<DescriptorMatcher> getDescriptorMatcher();
+
+	double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints,
+				Size imageSize, InputOutputArray cameraMatrix, InputOutputArray distCoeffs,
+				OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs, int flags = 0,
+				TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 30, DBL_EPSILON));
+    /**<
+		Calls the calirateCamera function with the same inputs.
+	*/
+
+    bool findRt(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs,
+                InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE);
+    bool findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs,
+                InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE);
+    /**<
+		Uses solvePnP to find the rotation and translation of the pattern
+		with respect to the camera frame.
+	*/
+
+    bool findRtRANSAC(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs,
+                      InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100,
+                      float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE);
+    bool findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs,
+                      InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100,
+                      float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE);
+        /**<
+		Uses solvePnPRansac()
+	*/
+
+	void drawOrientation(InputOutputArray image, InputArray tvec, InputArray rvec, InputArray cameraMatrix,
+						 InputArray distCoeffs, double axis_length = 3, int axis_width = 2);
+    /**<
+		pattern_corners -> projected over the image position of the edges of the pattern.
+	*/
+
+private:
+
+	Mat img_roi;
+	std::vector<Point2f> obj_corners;
+	double pxSize;
+
+	bool initialized;
+
+	Ptr<FeatureDetector> detector;
+	Ptr<DescriptorExtractor> descriptorExtractor;
+	Ptr<DescriptorMatcher> descriptorMatcher;
+
+	std::vector<KeyPoint> keypoints;
+	std::vector<Point3f> points3d;
+	Mat descriptor;
+
+	bool init(Mat& image, const float pixel_size, OutputArray output = noArray());
+	bool findPatternPass(const Mat& image, std::vector<Point2f>& matched_features, std::vector<Point3f>& pattern_points,
+						 Mat& H, std::vector<Point2f>& scene_corners, const double pratio, const double proj_error,
+						 const bool refine_position = false, const Mat& mask = Mat(), OutputArray output = noArray());
+	void scaleFoundPoints(const double squareSize, const std::vector<KeyPoint>& corners, std::vector<Point3f>& pts3d);
+	void check_matches(std::vector<Point2f>& matched, const std::vector<Point2f>& pattern, std::vector<DMatch>& good, std::vector<Point3f>& pattern_3d, const Mat& H);
+
+	void keypoints2points(const std::vector<KeyPoint>& in, std::vector<Point2f>& out);
+	void updateKeypointsPos(std::vector<KeyPoint>& in, const std::vector<Point2f>& new_pos);
+	void refinePointsPos(const Mat& img, std::vector<Point2f>& p);
+	void refineKeypointsPos(const Mat& img, std::vector<KeyPoint>& kp);
+};
+
+//! @}
+
+}} // namespace ccalib, cv
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/ccalib/multicalib.hpp
+++ b/3rdparty/opencv/inc/opencv2/ccalib/multicalib.hpp
@@ -0,0 +1,212 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, Baisheng Lai (laibaisheng@gmail.com), Zhejiang University,
+// all rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_MULTICAMERACALIBRATION_HPP__
+#define __OPENCV_MULTICAMERACALIBRATION_HPP__
+
+#include "opencv2/ccalib/randpattern.hpp"
+#include "opencv2/ccalib/omnidir.hpp"
+#include <string>
+#include <iostream>
+
+namespace cv { namespace multicalib {
+
+//! @addtogroup ccalib
+//! @{
+
+#define HEAD -1
+#define INVALID -2
+
+/** @brief Class for multiple camera calibration that supports pinhole camera and omnidirection camera.
+For omnidirectional camera model, please refer to omnidir.hpp in ccalib module.
+It first calibrate each camera individually, then a bundle adjustment like optimization is applied to
+refine extrinsic parameters. So far, it only support "random" pattern for calibration,
+see randomPattern.hpp in ccalib module for details.
+Images that are used should be named by "cameraIdx-timestamp.*", several images with the same timestamp
+means that they are the same pattern that are photographed. cameraIdx should start from 0.
+
+For more details, please refer to paper
+    B. Li, L. Heng, K. Kevin  and M. Pollefeys, "A Multiple-Camera System
+    Calibration Toolbox Using A Feature Descriptor-Based Calibration
+    Pattern", in IROS 2013.
+*/
+
+class CV_EXPORTS MultiCameraCalibration
+{
+public:
+    enum {
+        PINHOLE,
+        OMNIDIRECTIONAL
+        //FISHEYE
+    };
+
+    // an edge connects a camera and pattern
+    struct edge
+    {
+        int cameraVertex;   // vertex index for camera in this edge
+        int photoVertex;    // vertex index for pattern in this edge
+        int photoIndex;     // photo index among photos for this camera
+        Mat transform;      // transform from pattern to camera
+
+        edge(int cv, int pv, int pi, Mat trans)
+        {
+            cameraVertex = cv;
+            photoVertex = pv;
+            photoIndex = pi;
+            transform = trans;
+        }
+    };
+
+    struct vertex
+    {
+        Mat pose;   // relative pose to the first camera. For camera vertex, it is the
+                    // transform from the first camera to this camera, for pattern vertex,
+                    // it is the transform from pattern to the first camera
+        int timestamp;  // timestamp of photo, only available for photo vertex
+
+        vertex(Mat po, int ts)
+        {
+            pose = po;
+            timestamp = ts;
+        }
+
+        vertex()
+        {
+            pose = Mat::eye(4, 4, CV_32F);
+            timestamp = -1;
+        }
+    };
+    /* @brief Constructor
+    @param cameraType camera type, PINHOLE or OMNIDIRECTIONAL
+    @param nCameras number of cameras
+    @fileName filename of string list that are used for calibration, the file is generated
+    by imagelist_creator from OpenCv samples. The first one in the list is the pattern filename.
+    @patternWidth the physical width of pattern, in user defined unit.
+    @patternHeight the physical height of pattern, in user defined unit.
+    @showExtration whether show extracted features and feature filtering.
+    @nMiniMatches minimal number of matched features for a frame.
+	@flags Calibration flags
+    @criteria optimization stopping criteria.
+    @detector feature detector that detect feature points in pattern and images.
+    @descriptor feature descriptor.
+    @matcher feature matcher.
+    */
+    MultiCameraCalibration(int cameraType, int nCameras, const std::string& fileName, float patternWidth,
+        float patternHeight, int verbose = 0, int showExtration = 0, int nMiniMatches = 20, int flags = 0,
+        TermCriteria criteria = TermCriteria(TermCriteria::COUNT + TermCriteria::EPS, 200, 1e-7),
+        Ptr<FeatureDetector> detector = AKAZE::create(AKAZE::DESCRIPTOR_MLDB, 0, 3, 0.006f),
+        Ptr<DescriptorExtractor> descriptor = AKAZE::create(AKAZE::DESCRIPTOR_MLDB,0, 3, 0.006f),
+        Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-L1"));
+
+    /* @brief load images
+    */
+    void loadImages();
+
+    /* @brief initialize multiple camera calibration. It calibrates each camera individually.
+    */
+    void initialize();
+
+    /* @brief optimization extrinsic parameters
+    */
+    double optimizeExtrinsics();
+
+    /* @brief run multi-camera camera calibration, it runs loadImage(), initialize() and optimizeExtrinsics()
+    */
+    double run();
+
+    /* @brief write camera parameters to file.
+    */
+    void writeParameters(const std::string& filename);
+
+private:
+    std::vector<std::string> readStringList();
+
+    int getPhotoVertex(int timestamp);
+
+    void graphTraverse(const Mat& G, int begin, std::vector<int>& order, std::vector<int>& pre);
+
+    void findRowNonZero(const Mat& row, Mat& idx);
+
+    void computeJacobianExtrinsic(const Mat& extrinsicParams, Mat& JTJ_inv, Mat& JTE);
+
+    void computePhotoCameraJacobian(const Mat& rvecPhoto, const Mat& tvecPhoto, const Mat& rvecCamera,
+        const Mat& tvecCamera, Mat& rvecTran, Mat& tvecTran, const Mat& objectPoints, const Mat& imagePoints, const Mat& K,
+        const Mat& distort, const Mat& xi, Mat& jacobianPhoto, Mat& jacobianCamera, Mat& E);
+
+    void compose_motion(InputArray _om1, InputArray _T1, InputArray _om2, InputArray _T2, Mat& om3, Mat& T3, Mat& dom3dom1,
+        Mat& dom3dT1, Mat& dom3dom2, Mat& dom3dT2, Mat& dT3dom1, Mat& dT3dT1, Mat& dT3dom2, Mat& dT3dT2);
+
+    void JRodriguesMatlab(const Mat& src, Mat& dst);
+    void dAB(InputArray A, InputArray B, OutputArray dABdA, OutputArray dABdB);
+
+    double computeProjectError(Mat& parameters);
+
+    void vector2parameters(const Mat& parameters, std::vector<Vec3f>& rvecVertex, std::vector<Vec3f>& tvecVertexs);
+    void parameters2vector(const std::vector<Vec3f>& rvecVertex, const std::vector<Vec3f>& tvecVertex, Mat& parameters);
+
+    int _camType; //PINHOLE, FISHEYE or OMNIDIRECTIONAL
+    int _nCamera;
+    int _nMiniMatches;
+    int _flags;
+	int _verbose;
+    double _error;
+    float _patternWidth, _patternHeight;
+    TermCriteria _criteria;
+    std::string _filename;
+    int _showExtraction;
+    Ptr<FeatureDetector> _detector;
+    Ptr<DescriptorExtractor> _descriptor;
+    Ptr<DescriptorMatcher> _matcher;
+
+    std::vector<edge> _edgeList;
+    std::vector<vertex> _vertexList;
+    std::vector<std::vector<cv::Mat> > _objectPointsForEachCamera;
+    std::vector<std::vector<cv::Mat> > _imagePointsForEachCamera;
+    std::vector<cv::Mat> _cameraMatrix;
+    std::vector<cv::Mat> _distortCoeffs;
+    std::vector<cv::Mat> _xi;
+    std::vector<std::vector<Mat> > _omEachCamera, _tEachCamera;
+};
+
+//! @}
+
+}} // namespace multicalib, cv
+#endif
--- a/3rdparty/opencv/inc/opencv2/ccalib/omnidir.hpp
+++ b/3rdparty/opencv/inc/opencv2/ccalib/omnidir.hpp
@@ -0,0 +1,315 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, Baisheng Lai (laibaisheng@gmail.com), Zhejiang University,
+// all rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_OMNIDIR_HPP__
+#define __OPENCV_OMNIDIR_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/affine.hpp"
+#include <vector>
+
+namespace cv
+{
+namespace omnidir
+{
+    //! @addtogroup ccalib
+    //! @{
+
+    enum {
+        CALIB_USE_GUESS             = 1,
+        CALIB_FIX_SKEW              = 2,
+        CALIB_FIX_K1                = 4,
+        CALIB_FIX_K2                = 8,
+        CALIB_FIX_P1                = 16,
+        CALIB_FIX_P2                = 32,
+        CALIB_FIX_XI                = 64,
+        CALIB_FIX_GAMMA             = 128,
+        CALIB_FIX_CENTER            = 256
+    };
+
+    enum{
+        RECTIFY_PERSPECTIVE         = 1,
+        RECTIFY_CYLINDRICAL         = 2,
+        RECTIFY_LONGLATI            = 3,
+        RECTIFY_STEREOGRAPHIC       = 4
+    };
+
+    enum{
+        XYZRGB  = 1,
+        XYZ     = 2
+    };
+/**
+ * This module was accepted as a GSoC 2015 project for OpenCV, authored by
+ * Baisheng Lai, mentored by Bo Li.
+ */
+
+    /** @brief Projects points for omnidirectional camera using CMei's model
+
+    @param objectPoints Object points in world coordinate, vector of vector of Vec3f or Mat of
+    1xN/Nx1 3-channel of type CV_32F and N is the number of points. 64F is also acceptable.
+    @param imagePoints Output array of image points, vector of vector of Vec2f or
+    1xN/Nx1 2-channel of type CV_32F. 64F is also acceptable.
+    @param rvec vector of rotation between world coordinate and camera coordinate, i.e., om
+    @param tvec vector of translation between pattern coordinate and camera coordinate
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{s}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, p_1, p_2)\f$.
+    @param xi The parameter xi for CMei's model
+    @param jacobian Optional output 2Nx16 of type CV_64F jacobian matrix, contains the derivatives of
+    image pixel points wrt parameters including \f$om, T, f_x, f_y, s, c_x, c_y, xi, k_1, k_2, p_1, p_2\f$.
+    This matrix will be used in calibration by optimization.
+
+    The function projects object 3D points of world coordinate to image pixels, parameter by intrinsic
+    and extrinsic parameters. Also, it optionally compute a by-product: the jacobian matrix containing
+    contains the derivatives of image pixel points wrt intrinsic and extrinsic parameters.
+     */
+    CV_EXPORTS_W void projectPoints(InputArray objectPoints, OutputArray imagePoints, InputArray rvec, InputArray tvec,
+                       InputArray K, double xi, InputArray D, OutputArray jacobian = noArray());
+
+    /** @overload */
+    CV_EXPORTS void projectPoints(InputArray objectPoints, OutputArray imagePoints, const Affine3d& affine,
+                        InputArray K, double xi, InputArray D, OutputArray jacobian = noArray());
+
+    /** @brief Undistort 2D image points for omnidirectional camera using CMei's model
+
+    @param distorted Array of distorted image points, vector of Vec2f
+    or 1xN/Nx1 2-channel Mat of type CV_32F, 64F depth is also acceptable
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{s}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param D Distortion coefficients \f$(k_1, k_2, p_1, p_2)\f$.
+    @param xi The parameter xi for CMei's model
+    @param R Rotation trainsform between the original and object space : 3x3 1-channel, or vector: 3x1/1x3
+    1-channel or 1x1 3-channel
+    @param undistorted array of normalized object points, vector of Vec2f/Vec2d or 1xN/Nx1 2-channel Mat with the same
+    depth of distorted points.
+     */
+    CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray xi, InputArray R);
+
+    /** @brief Computes undistortion and rectification maps for omnidirectional camera image transform by a rotation R.
+    It output two maps that are used for cv::remap(). If D is empty then zero distortion is used,
+    if R or P is empty then identity matrices are used.
+
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{s}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$, with depth CV_32F or CV_64F
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, p_1, p_2)\f$, with depth CV_32F or CV_64F
+    @param xi The parameter xi for CMei's model
+    @param R Rotation transform between the original and object space : 3x3 1-channel, or vector: 3x1/1x3, with depth CV_32F or CV_64F
+    @param P New camera matrix (3x3) or new projection matrix (3x4)
+    @param size Undistorted image size.
+    @param m1type Type of the first output map that can be CV_32FC1 or CV_16SC2 . See convertMaps()
+    for details.
+    @param map1 The first output map.
+    @param map2 The second output map.
+    @param flags Flags indicates the rectification type,  RECTIFY_PERSPECTIVE, RECTIFY_CYLINDRICAL, RECTIFY_LONGLATI and RECTIFY_STEREOGRAPHIC
+    are supported.
+     */
+    CV_EXPORTS_W void initUndistortRectifyMap(InputArray K, InputArray D, InputArray xi, InputArray R, InputArray P, const cv::Size& size,
+        int m1type, OutputArray map1, OutputArray map2, int flags);
+
+    /** @brief Undistort omnidirectional images to perspective images
+
+    @param distorted The input omnidirectional image.
+    @param undistorted The output undistorted image.
+    @param K Camera matrix \f$K = \vecthreethree{f_x}{s}{c_x}{0}{f_y}{c_y}{0}{0}{_1}\f$.
+    @param D Input vector of distortion coefficients \f$(k_1, k_2, p_1, p_2)\f$.
+    @param xi The parameter xi for CMei's model.
+    @param flags Flags indicates the rectification type,  RECTIFY_PERSPECTIVE, RECTIFY_CYLINDRICAL, RECTIFY_LONGLATI and RECTIFY_STEREOGRAPHIC
+    @param Knew Camera matrix of the distorted image. If it is not assigned, it is just K.
+    @param new_size The new image size. By default, it is the size of distorted.
+    @param R Rotation matrix between the input and output images. By default, it is identity matrix.
+    */
+    CV_EXPORTS_W void undistortImage(InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray xi, int flags,
+        InputArray Knew = cv::noArray(), const Size& new_size = Size(), InputArray R = Mat::eye(3, 3, CV_64F));
+
+    /** @brief Perform omnidirectional camera calibration, the default depth of outputs is CV_64F.
+
+    @param objectPoints Vector of vector of Vec3f object points in world (pattern) coordinate.
+    It also can be vector of Mat with size 1xN/Nx1 and type CV_32FC3. Data with depth of 64_F is also acceptable.
+    @param imagePoints Vector of vector of Vec2f corresponding image points of objectPoints. It must be the same
+    size and the same type with objectPoints.
+    @param size Image size of calibration images.
+    @param K Output calibrated camera matrix.
+    @param xi Output parameter xi for CMei's model
+    @param D Output distortion parameters \f$(k_1, k_2, p_1, p_2)\f$
+    @param rvecs Output rotations for each calibration images
+    @param tvecs Output translation for each calibration images
+    @param flags The flags that control calibrate
+    @param criteria Termination criteria for optimization
+    @param idx Indices of images that pass initialization, which are really used in calibration. So the size of rvecs is the
+    same as idx.total().
+    */
+    CV_EXPORTS_W double calibrate(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size size,
+        InputOutputArray K, InputOutputArray xi, InputOutputArray D, OutputArrayOfArrays rvecs, OutputArrayOfArrays tvecs,
+        int flags, TermCriteria criteria, OutputArray idx=noArray());
+
+    /** @brief Stereo calibration for omnidirectional camera model. It computes the intrinsic parameters for two
+    cameras and the extrinsic parameters between two cameras. The default depth of outputs is CV_64F.
+
+    @param objectPoints Object points in world (pattern) coordinate. Its type is vector<vector<Vec3f> >.
+    It also can be vector of Mat with size 1xN/Nx1 and type CV_32FC3. Data with depth of 64_F is also acceptable.
+    @param imagePoints1 The corresponding image points of the first camera, with type vector<vector<Vec2f> >.
+    It must be the same size and the same type as objectPoints.
+    @param imagePoints2 The corresponding image points of the second camera, with type vector<vector<Vec2f> >.
+    It must be the same size and the same type as objectPoints.
+    @param imageSize1 Image size of calibration images of the first camera.
+    @param imageSize2 Image size of calibration images of the second camera.
+    @param K1 Output camera matrix for the first camera.
+    @param xi1 Output parameter xi of Mei's model for the first camera
+    @param D1 Output distortion parameters \f$(k_1, k_2, p_1, p_2)\f$ for the first camera
+    @param K2 Output camera matrix for the first camera.
+    @param xi2 Output parameter xi of CMei's model for the second camera
+    @param D2 Output distortion parameters \f$(k_1, k_2, p_1, p_2)\f$ for the second camera
+    @param rvec Output rotation between the first and second camera
+    @param tvec Output translation between the first and second camera
+    @param rvecsL Output rotation for each image of the first camera
+    @param tvecsL Output translation for each image of the first camera
+    @param flags The flags that control stereoCalibrate
+    @param criteria Termination criteria for optimization
+    @param idx Indices of image pairs that pass initialization, which are really used in calibration. So the size of rvecs is the
+    same as idx.total().
+    @
+    */
+    CV_EXPORTS_W double stereoCalibrate(InputOutputArrayOfArrays objectPoints, InputOutputArrayOfArrays imagePoints1, InputOutputArrayOfArrays imagePoints2,
+        const Size& imageSize1, const Size& imageSize2, InputOutputArray K1, InputOutputArray xi1, InputOutputArray D1, InputOutputArray K2, InputOutputArray xi2,
+        InputOutputArray D2, OutputArray rvec, OutputArray tvec, OutputArrayOfArrays rvecsL, OutputArrayOfArrays tvecsL, int flags, TermCriteria criteria, OutputArray idx=noArray());
+
+    /** @brief Stereo rectification for omnidirectional camera model. It computes the rectification rotations for two cameras
+
+    @param R Rotation between the first and second camera
+    @param T Translation between the first and second camera
+    @param R1 Output 3x3 rotation matrix for the first camera
+    @param R2 Output 3x3 rotation matrix for the second camera
+    */
+    CV_EXPORTS_W void stereoRectify(InputArray R, InputArray T, OutputArray R1, OutputArray R2);
+
+    /** @brief Stereo 3D reconstruction from a pair of images
+
+    @param image1 The first input image
+    @param image2 The second input image
+    @param K1 Input camera matrix of the first camera
+    @param D1 Input distortion parameters \f$(k_1, k_2, p_1, p_2)\f$ for the first camera
+    @param xi1 Input parameter xi for the first camera for CMei's model
+    @param K2 Input camera matrix of the second camera
+    @param D2 Input distortion parameters \f$(k_1, k_2, p_1, p_2)\f$ for the second camera
+    @param xi2 Input parameter xi for the second camera for CMei's model
+    @param R Rotation between the first and second camera
+    @param T Translation between the first and second camera
+    @param flag Flag of rectification type, RECTIFY_PERSPECTIVE or RECTIFY_LONGLATI
+    @param numDisparities The parameter 'numDisparities' in StereoSGBM, see StereoSGBM for details.
+    @param SADWindowSize The parameter 'SADWindowSize' in StereoSGBM, see StereoSGBM for details.
+    @param disparity Disparity map generated by stereo matching
+    @param image1Rec Rectified image of the first image
+    @param image2Rec rectified image of the second image
+    @param newSize Image size of rectified image, see omnidir::undistortImage
+    @param Knew New camera matrix of rectified image, see omnidir::undistortImage
+    @param pointCloud Point cloud of 3D reconstruction, with type CV_64FC3
+    @param pointType Point cloud type, it can be XYZRGB or XYZ
+    */
+    CV_EXPORTS_W void stereoReconstruct(InputArray image1, InputArray image2, InputArray K1, InputArray D1, InputArray xi1,
+        InputArray K2, InputArray D2, InputArray xi2, InputArray R, InputArray T, int flag, int numDisparities, int SADWindowSize,
+        OutputArray disparity, OutputArray image1Rec, OutputArray image2Rec, const Size& newSize = Size(), InputArray Knew = cv::noArray(),
+        OutputArray pointCloud = cv::noArray(), int pointType = XYZRGB);
+
+namespace internal
+{
+    void initializeCalibration(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, Size size, OutputArrayOfArrays omAll,
+        OutputArrayOfArrays tAll, OutputArray K, double& xi, OutputArray idx = noArray());
+
+    void initializeStereoCalibration(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2,
+        const Size& size1, const Size& size2, OutputArray om, OutputArray T, OutputArrayOfArrays omL, OutputArrayOfArrays tL, OutputArray K1, OutputArray D1, OutputArray K2, OutputArray D2,
+        double &xi1, double &xi2, int flags, OutputArray idx);
+
+    void computeJacobian(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, InputArray parameters, Mat& JTJ_inv, Mat& JTE, int flags,
+							double epsilon);
+
+    void computeJacobianStereo(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2,
+        InputArray parameters, Mat& JTJ_inv, Mat& JTE, int flags, double epsilon);
+
+    void encodeParameters(InputArray K, InputArrayOfArrays omAll, InputArrayOfArrays tAll, InputArray distoaration, double xi, OutputArray parameters);
+
+    void encodeParametersStereo(InputArray K1, InputArray K2, InputArray om, InputArray T, InputArrayOfArrays omL, InputArrayOfArrays tL,
+        InputArray D1, InputArray D2, double xi1, double xi2, OutputArray parameters);
+
+    void decodeParameters(InputArray paramsters, OutputArray K, OutputArrayOfArrays omAll, OutputArrayOfArrays tAll, OutputArray distoration, double& xi);
+
+    void decodeParametersStereo(InputArray parameters, OutputArray K1, OutputArray K2, OutputArray om, OutputArray T, OutputArrayOfArrays omL,
+        OutputArrayOfArrays tL, OutputArray D1, OutputArray D2, double& xi1, double& xi2);
+
+    void estimateUncertainties(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, InputArray parameters, Mat& errors, Vec2d& std_error, double& rms, int flags);
+
+    void estimateUncertaintiesStereo(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputArray parameters, Mat& errors,
+        Vec2d& std_error, double& rms, int flags);
+
+    double computeMeanReproErr(InputArrayOfArrays imagePoints, InputArrayOfArrays proImagePoints);
+
+    double computeMeanReproErr(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints, InputArray K, InputArray D, double xi, InputArrayOfArrays omAll,
+        InputArrayOfArrays tAll);
+
+    double computeMeanReproErrStereo(InputArrayOfArrays objectPoints, InputArrayOfArrays imagePoints1, InputArrayOfArrays imagePoints2, InputArray K1, InputArray K2,
+        InputArray D1, InputArray D2, double xi1, double xi2, InputArray om, InputArray T, InputArrayOfArrays omL, InputArrayOfArrays TL);
+
+    void subMatrix(const Mat& src, Mat& dst, const std::vector<int>& cols, const std::vector<int>& rows);
+
+    void flags2idx(int flags, std::vector<int>& idx, int n);
+
+    void flags2idxStereo(int flags, std::vector<int>& idx, int n);
+
+    void fillFixed(Mat&G, int flags, int n);
+
+    void fillFixedStereo(Mat& G, int flags, int n);
+
+    double findMedian(const Mat& row);
+
+    Vec3d findMedian3(InputArray mat);
+
+    void getInterset(InputArray idx1, InputArray idx2, OutputArray inter1, OutputArray inter2, OutputArray inter_ori);
+
+    void compose_motion(InputArray _om1, InputArray _T1, InputArray _om2, InputArray _T2, Mat& om3, Mat& T3, Mat& dom3dom1,
+        Mat& dom3dT1, Mat& dom3dom2, Mat& dom3dT2, Mat& dT3dom1, Mat& dT3dT1, Mat& dT3dom2, Mat& dT3dT2);
+
+    //void JRodriguesMatlab(const Mat& src, Mat& dst);
+
+    //void dAB(InputArray A, InputArray B, OutputArray dABdA, OutputArray dABdB);
+} // internal
+
+//! @}
+
+} // omnidir
+
+} //cv
+#endif
--- a/3rdparty/opencv/inc/opencv2/ccalib/randpattern.hpp
+++ b/3rdparty/opencv/inc/opencv2/ccalib/randpattern.hpp
@@ -0,0 +1,184 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, Baisheng Lai (laibaisheng@gmail.com), Zhejiang University,
+// all rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_RANDOMPATTERN_HPP__
+#define __OPENCV_RANDOMPATTERN_HPP__
+
+#include "opencv2/features2d.hpp"
+#include "opencv2/highgui.hpp"
+
+namespace cv { namespace randpattern {
+
+
+//! @addtogroup ccalib
+//! @{
+
+/** @brief Class for finding features points and corresponding 3D in world coordinate of
+a "random" pattern, which can be to be used in calibration. It is useful when pattern is
+partly occluded or only a part of pattern can be observed in multiple cameras calibration.
+The pattern can be generated by RandomPatternGenerator class described in this file.
+
+Please refer to paper
+    B. Li, L. Heng, K. Kevin  and M. Pollefeys, "A Multiple-Camera System
+    Calibration Toolbox Using A Feature Descriptor-Based Calibration
+    Pattern", in IROS 2013.
+*/
+
+class CV_EXPORTS RandomPatternCornerFinder
+{
+public:
+
+    /* @brief Construct RandomPatternCornerFinder object
+
+    @param patternWidth the real width of "random" pattern in a user defined unit.
+    @param patternHeight the real height of "random" pattern in a user defined unit.
+    @param nMiniMatch number of minimal matches, otherwise that image is abandoned
+    @depth depth of output objectPoints and imagePoints, set it to be CV_32F or CV_64F.
+    @showExtraction whether show feature extraction, 0 for no and 1 for yes.
+    @detector feature detector to detect feature points in pattern and images.
+    @descriptor feature descriptor.
+    @matcher feature matcher.
+    */
+    RandomPatternCornerFinder(float patternWidth, float patternHeight,
+        int nminiMatch = 20, int depth = CV_32F, int verbose = 0, int showExtraction = 0,
+        Ptr<FeatureDetector> detector = AKAZE::create(AKAZE::DESCRIPTOR_MLDB, 0, 3, 0.005f),
+        Ptr<DescriptorExtractor> descriptor = AKAZE::create(AKAZE::DESCRIPTOR_MLDB,0, 3, 0.005f),
+        Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-L1"));
+
+    /* @brief Load pattern image and compute features for pattern
+    @param patternImage image for "random" pattern generated by RandomPatternGenerator, run it first.
+    */
+    void loadPattern(const cv::Mat &patternImage);
+
+    /* @brief Load pattern and features
+	@param patternImage image for "random" pattern generated by RandomPatternGenerator, run it first.
+	@param patternKeyPoints keyPoints created from a FeatureDetector.
+	@param patternDescriptors descriptors created from a DescriptorExtractor.
+	*/
+    void loadPattern(const cv::Mat &patternImage, const std::vector<cv::KeyPoint> &patternKeyPoints, const cv::Mat &patternDescriptors);
+
+    /* @brief Compute matched object points and image points which are used for calibration
+    The objectPoints (3D) and imagePoints (2D) are stored inside the class. Run getObjectPoints()
+    and getImagePoints() to get them.
+
+    @param inputImages vector of 8-bit grayscale images containing "random" pattern
+    that are used for calibration.
+    */
+    void computeObjectImagePoints(std::vector<cv::Mat> inputImages);
+
+    //void computeObjectImagePoints2(std::vector<cv::Mat> inputImages);
+
+    /* @brief Compute object and image points for a single image. It returns a vector<Mat> that
+    the first element stores the imagePoints and the second one stores the objectPoints.
+
+    @param inputImage single input image for calibration
+    */
+    std::vector<cv::Mat> computeObjectImagePointsForSingle(cv::Mat inputImage);
+
+    /* @brief Get object(3D) points
+    */
+    const std::vector<cv::Mat> &getObjectPoints();
+
+    /* @brief and image(2D) points
+    */
+    const std::vector<cv::Mat> &getImagePoints();
+
+private:
+
+    std::vector<cv::Mat> _objectPonits, _imagePoints;
+    float _patternWidth, _patternHeight;
+    cv::Size _patternImageSize;
+    int _nminiMatch;
+    int _depth;
+	int _verbose;
+
+    Ptr<FeatureDetector> _detector;
+    Ptr<DescriptorExtractor> _descriptor;
+    Ptr<DescriptorMatcher> _matcher;
+    Mat _descriptorPattern;
+    std::vector<cv::KeyPoint> _keypointsPattern;
+    Mat _patternImage;
+    int _showExtraction;
+
+    void keyPoints2MatchedLocation(const std::vector<cv::KeyPoint>& imageKeypoints,
+        const std::vector<cv::KeyPoint>& patternKeypoints, const std::vector<cv::DMatch> matchces,
+        cv::Mat& matchedImagelocation, cv::Mat& matchedPatternLocation);
+    void getFilteredLocation(cv::Mat& imageKeypoints, cv::Mat& patternKeypoints, const cv::Mat mask);
+    void getObjectImagePoints(const cv::Mat& imageKeypoints, const cv::Mat& patternKeypoints);
+    void crossCheckMatching( cv::Ptr<DescriptorMatcher>& descriptorMatcher,
+        const Mat& descriptors1, const Mat& descriptors2,
+        std::vector<DMatch>& filteredMatches12, int knn=1 );
+    void drawCorrespondence(const Mat& image1, const std::vector<cv::KeyPoint> keypoint1,
+        const Mat& image2, const std::vector<cv::KeyPoint> keypoint2, const std::vector<cv::DMatch> matchces,
+        const Mat& mask1, const Mat& mask2, const int step);
+};
+
+/* @brief Class to generate "random" pattern image that are used for RandomPatternCornerFinder
+Please refer to paper
+B. Li, L. Heng, K. Kevin  and M. Pollefeys, "A Multiple-Camera System
+Calibration Toolbox Using A Feature Descriptor-Based Calibration
+Pattern", in IROS 2013.
+*/
+class CV_EXPORTS RandomPatternGenerator
+{
+public:
+    /* @brief Construct RandomPatternGenerator
+
+    @param imageWidth image width of the generated pattern image
+    @param imageHeight image height of the generated pattern image
+    */
+    RandomPatternGenerator(int imageWidth, int imageHeight);
+
+    /* @brief Generate pattern
+    */
+    void generatePattern();
+    /* @brief Get pattern
+    */
+    cv::Mat getPattern();
+private:
+    cv::Mat _pattern;
+    int _imageWidth, _imageHeight;
+};
+
+//! @}
+
+}} //namespace randpattern, cv
+#endif
--- a/3rdparty/opencv/inc/opencv2/core.hpp
+++ b/3rdparty/opencv/inc/opencv2/core.hpp
--- a/3rdparty/opencv/inc/opencv2/core/affine.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/affine.hpp
@@ -0,0 +1,678 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_AFFINE3_HPP
+#define OPENCV_CORE_AFFINE3_HPP
+
+#ifdef __cplusplus
+
+#include <opencv2/core.hpp>
+
+namespace cv
+{
+
+//! @addtogroup core
+//! @{
+
+    /** @brief Affine transform
+     *
+     * It represents a 4x4 homogeneous transformation matrix \f$T\f$
+     *
+     *  \f[T =
+     *  \begin{bmatrix}
+     *  R & t\\
+     *  0 & 1\\
+     *  \end{bmatrix}
+     *  \f]
+     *
+     *  where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector.
+     *
+     *  You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector,
+     *  which is converted to a 3x3 rotation matrix by the Rodrigues formula.
+     *
+     *  To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation
+     *  angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use
+     *
+     *  @code
+     *  cv::Vec3f r, t;
+     *  cv::Affine3f T(r, t);
+     *  @endcode
+     *
+     *  If you already have the rotation matrix \f$R\f$, then you can use
+     *
+     *  @code
+     *  cv::Matx33f R;
+     *  cv::Affine3f T(R, t);
+     *  @endcode
+     *
+     *  To extract the rotation matrix \f$R\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Matx33f R = T.rotation();
+     *  @endcode
+     *
+     *  To extract the translation vector \f$t\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Vec3f t = T.translation();
+     *  @endcode
+     *
+     *  To extract the rotation vector \f$r\f$ from \f$T\f$, use
+     *
+     *  @code
+     *  cv::Vec3f r = T.rvec();
+     *  @endcode
+     *
+     *  Note that since the mapping from rotation vectors to rotation matrices
+     *  is many to one. The returned rotation vector is not necessarily the one
+     *  you used before to set the matrix.
+     *
+     *  If you have two transformations \f$T = T_1 * T_2\f$, use
+     *
+     *  @code
+     *  cv::Affine3f T, T1, T2;
+     *  T = T2.concatenate(T1);
+     *  @endcode
+     *
+     *  To get the inverse transform of \f$T\f$, use
+     *
+     *  @code
+     *  cv::Affine3f T, T_inv;
+     *  T_inv = T.inv();
+     *  @endcode
+     *
+     */
+    template<typename T>
+    class Affine3
+    {
+    public:
+        typedef T float_type;
+        typedef Matx<float_type, 3, 3> Mat3;
+        typedef Matx<float_type, 4, 4> Mat4;
+        typedef Vec<float_type, 3> Vec3;
+
+       //! Default constructor. It represents a 4x4 identity matrix.
+        Affine3();
+
+        //! Augmented affine matrix
+        Affine3(const Mat4& affine);
+
+        /**
+         *  The resulting 4x4 matrix is
+         *
+         *  \f[
+         *  \begin{bmatrix}
+         *  R & t\\
+         *  0 & 1\\
+         *  \end{bmatrix}
+         *  \f]
+         *
+         * @param R 3x3 rotation matrix.
+         * @param t 3x1 translation vector.
+         */
+        Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
+
+        /**
+         * Rodrigues vector.
+         *
+         * The last row of the current matrix is set to [0,0,0,1].
+         *
+         * @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length
+         *             indicates the rotation angle in radian (using right hand rule).
+         * @param t 3x1 translation vector.
+         */
+        Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
+
+        /**
+         * Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix.
+         *
+         * The last row of the current matrix is set to [0,0,0,1] when data is not 4x4.
+         *
+         * @param data 1-channel matrix.
+         *             when it is 4x4, it is copied to the current matrix and t is not used.
+         *             When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used.
+         *             When it is 3x3, it is copied to the upper left 3x3 part of the current matrix.
+         *             When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used
+         *                             to compute a 3x3 rotation matrix.
+         * @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4.
+         */
+        explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
+
+        //! From 16-element array
+        explicit Affine3(const float_type* vals);
+
+        //! Create an 4x4 identity transform
+        static Affine3 Identity();
+
+        /**
+         * Rotation matrix.
+         *
+         * Copy the rotation matrix to the upper left 3x3 part of the current matrix.
+         * The remaining elements of the current matrix are not changed.
+         *
+         * @param R 3x3 rotation matrix.
+         *
+         */
+        void rotation(const Mat3& R);
+
+        /**
+         * Rodrigues vector.
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param rvec 3x1 rotation vector. The direction indicates the rotation axis and
+         *             its length indicates the rotation angle in radian (using the right thumb convention).
+         */
+        void rotation(const Vec3& rvec);
+
+        /**
+         * Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix.
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param data 1-channel matrix.
+         *             When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix.
+         *             When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula
+         *             is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix.
+         */
+        void rotation(const Mat& data);
+
+        /**
+         * Copy the 3x3 matrix L to the upper left part of the current matrix
+         *
+         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
+         *
+         * @param L 3x3 matrix.
+         */
+        void linear(const Mat3& L);
+
+        /**
+         * Copy t to the first three elements of the last column of the current matrix
+         *
+         * It sets the upper right 3x1 part of the matrix. The remaining part is unaffected.
+         *
+         * @param t 3x1 translation vector.
+         */
+        void translation(const Vec3& t);
+
+        //! @return the upper left 3x3 part
+        Mat3 rotation() const;
+
+        //! @return the upper left 3x3 part
+        Mat3 linear() const;
+
+        //! @return the upper right 3x1 part
+        Vec3 translation() const;
+
+        //! Rodrigues vector.
+        //! @return a vector representing the upper left 3x3 rotation matrix of the current matrix.
+        //! @warning  Since the mapping between rotation vectors and rotation matrices is many to one,
+        //!           this function returns only one rotation vector that represents the current rotation matrix,
+        //!           which is not necessarily the same one set by `rotation(const Vec3& rvec)`.
+        Vec3 rvec() const;
+
+        //! @return the inverse of the current matrix.
+        Affine3 inv(int method = cv::DECOMP_SVD) const;
+
+        //! a.rotate(R) is equivalent to Affine(R, 0) * a;
+        Affine3 rotate(const Mat3& R) const;
+
+        //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
+        Affine3 rotate(const Vec3& rvec) const;
+
+        //! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix
+        Affine3 translate(const Vec3& t) const;
+
+        //! a.concatenate(affine) is equivalent to affine * a;
+        Affine3 concatenate(const Affine3& affine) const;
+
+        template <typename Y> operator Affine3<Y>() const;
+
+        template <typename Y> Affine3<Y> cast() const;
+
+        Mat4 matrix;
+
+#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
+        Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
+        Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
+        operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
+        operator Eigen::Transform<T, 3, Eigen::Affine>() const;
+#endif
+    };
+
+    template<typename T> static
+    Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
+
+    //! V is a 3-element vector with member fields x, y and z
+    template<typename T, typename V> static
+    V operator*(const Affine3<T>& affine, const V& vector);
+
+    typedef Affine3<float> Affine3f;
+    typedef Affine3<double> Affine3d;
+
+    static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
+    static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);
+
+    template<typename _Tp> class DataType< Affine3<_Tp> >
+    {
+    public:
+        typedef Affine3<_Tp>                               value_type;
+        typedef Affine3<typename DataType<_Tp>::work_type> work_type;
+        typedef _Tp                                        channel_type;
+
+        enum { generic_type = 0,
+               channels     = 16,
+               fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
+#ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
+               ,depth        = DataType<channel_type>::depth
+               ,type         = CV_MAKETYPE(depth, channels)
+#endif
+             };
+
+        typedef Vec<channel_type, channels> vec_type;
+    };
+
+    namespace traits {
+    template<typename _Tp>
+    struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; };
+    template<typename _Tp>
+    struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; };
+    } // namespace
+
+//! @} core
+
+}
+
+//! @cond IGNORED
+
+///////////////////////////////////////////////////////////////////////////////////
+// Implementation
+
+template<typename T> inline
+cv::Affine3<T>::Affine3()
+    : matrix(Mat4::eye())
+{}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Mat4& affine)
+    : matrix(affine)
+{}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
+{
+    rotation(R);
+    translation(t);
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
+{
+    rotation(_rvec);
+    translation(t);
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
+{
+    CV_Assert(data.type() == cv::traits::Type<T>::value);
+    CV_Assert(data.channels() == 1);
+
+    if (data.cols == 4 && data.rows == 4)
+    {
+        data.copyTo(matrix);
+        return;
+    }
+    else if (data.cols == 4 && data.rows == 3)
+    {
+        rotation(data(Rect(0, 0, 3, 3)));
+        translation(data(Rect(3, 0, 1, 3)));
+    }
+    else
+    {
+        rotation(data);
+        translation(t);
+    }
+
+    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
+    matrix.val[15] = 1;
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
+{}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::Identity()
+{
+    return Affine3<T>(cv::Affine3<T>::Mat4::eye());
+}
+
+template<typename T> inline
+void cv::Affine3<T>::rotation(const Mat3& R)
+{
+    linear(R);
+}
+
+template<typename T> inline
+void cv::Affine3<T>::rotation(const Vec3& _rvec)
+{
+    double theta = norm(_rvec);
+
+    if (theta < DBL_EPSILON)
+        rotation(Mat3::eye());
+    else
+    {
+        double c = std::cos(theta);
+        double s = std::sin(theta);
+        double c1 = 1. - c;
+        double itheta = (theta != 0) ? 1./theta : 0.;
+
+        Point3_<T> r = _rvec*itheta;
+
+        Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z );
+        Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 );
+
+        // R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
+        // where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
+        Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x;
+
+        rotation(R);
+    }
+}
+
+//Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix;
+template<typename T> inline
+void cv::Affine3<T>::rotation(const cv::Mat& data)
+{
+    CV_Assert(data.type() == cv::traits::Type<T>::value);
+    CV_Assert(data.channels() == 1);
+
+    if (data.cols == 3 && data.rows == 3)
+    {
+        Mat3 R;
+        data.copyTo(R);
+        rotation(R);
+    }
+    else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
+    {
+        Vec3 _rvec;
+        data.reshape(1, 3).copyTo(_rvec);
+        rotation(_rvec);
+    }
+    else
+        CV_Error(Error::StsError, "Input matrix can only be 3x3, 1x3 or 3x1");
+}
+
+template<typename T> inline
+void cv::Affine3<T>::linear(const Mat3& L)
+{
+    matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1];  matrix.val[ 2] = L.val[2];
+    matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4];  matrix.val[ 6] = L.val[5];
+    matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7];  matrix.val[10] = L.val[8];
+}
+
+template<typename T> inline
+void cv::Affine3<T>::translation(const Vec3& t)
+{
+    matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
+{
+    return linear();
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
+{
+    typename cv::Affine3<T>::Mat3 R;
+    R.val[0] = matrix.val[0];  R.val[1] = matrix.val[1];  R.val[2] = matrix.val[ 2];
+    R.val[3] = matrix.val[4];  R.val[4] = matrix.val[5];  R.val[5] = matrix.val[ 6];
+    R.val[6] = matrix.val[8];  R.val[7] = matrix.val[9];  R.val[8] = matrix.val[10];
+    return R;
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
+{
+    return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
+}
+
+template<typename T> inline
+typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
+{
+    cv::Vec3d w;
+    cv::Matx33d u, vt, R = rotation();
+    cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
+    R = u * vt;
+
+    double rx = R.val[7] - R.val[5];
+    double ry = R.val[2] - R.val[6];
+    double rz = R.val[3] - R.val[1];
+
+    double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
+    double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
+    c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
+    double theta = std::acos(c);
+
+    if( s < 1e-5 )
+    {
+        if( c > 0 )
+            rx = ry = rz = 0;
+        else
+        {
+            double t;
+            t = (R.val[0] + 1) * 0.5;
+            rx = std::sqrt(std::max(t, 0.0));
+            t = (R.val[4] + 1) * 0.5;
+            ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
+            t = (R.val[8] + 1) * 0.5;
+            rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);
+
+            if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
+                rz = -rz;
+            theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
+            rx *= theta;
+            ry *= theta;
+            rz *= theta;
+        }
+    }
+    else
+    {
+        double vth = 1/(2*s);
+        vth *= theta;
+        rx *= vth; ry *= vth; rz *= vth;
+    }
+
+    return cv::Vec3d(rx, ry, rz);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::inv(int method) const
+{
+    return matrix.inv(method);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
+{
+    Mat3 Lc = linear();
+    Vec3 tc = translation();
+    Mat4 result;
+    result.val[12] = result.val[13] = result.val[14] = 0;
+    result.val[15] = 1;
+
+    for(int j = 0; j < 3; ++j)
+    {
+        for(int i = 0; i < 3; ++i)
+        {
+            float_type value = 0;
+            for(int k = 0; k < 3; ++k)
+                value += R(j, k) * Lc(k, i);
+            result(j, i) = value;
+        }
+
+        result(j, 3) = R.row(j).dot(tc.t());
+    }
+    return result;
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
+{
+    return rotate(Affine3f(_rvec).rotation());
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
+{
+    Mat4 m = matrix;
+    m.val[ 3] += t[0];
+    m.val[ 7] += t[1];
+    m.val[11] += t[2];
+    return m;
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
+{
+    return (*this).rotate(affine.rotation()).translate(affine.translation());
+}
+
+template<typename T> template <typename Y> inline
+cv::Affine3<T>::operator Affine3<Y>() const
+{
+    return Affine3<Y>(matrix);
+}
+
+template<typename T> template <typename Y> inline
+cv::Affine3<Y> cv::Affine3<T>::cast() const
+{
+    return Affine3<Y>(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
+{
+    return affine2.concatenate(affine1);
+}
+
+template<typename T, typename V> inline
+V cv::operator*(const cv::Affine3<T>& affine, const V& v)
+{
+    const typename Affine3<T>::Mat4& m = affine.matrix;
+
+    V r;
+    r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
+    r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
+    r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
+    return r;
+}
+
+static inline
+cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
+{
+    const cv::Matx44f& m = affine.matrix;
+    cv::Vec3f r;
+    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
+    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
+    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
+    return r;
+}
+
+static inline
+cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
+{
+    const cv::Matx44d& m = affine.matrix;
+    cv::Vec3d r;
+    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
+    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
+    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
+    return r;
+}
+
+
+
+#if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
+{
+    cv::Mat(4, 4, cv::traits::Type<T>::value, affine.matrix().data()).copyTo(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
+{
+    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
+    cv::Mat(4, 4, cv::traits::Type<T>::value, a.matrix().data()).copyTo(matrix);
+}
+
+template<typename T> inline
+cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
+{
+    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
+    cv::Mat hdr(4, 4, cv::traits::Type<T>::value, r.matrix().data());
+    cv::Mat(matrix, false).copyTo(hdr);
+    return r;
+}
+
+template<typename T> inline
+cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
+{
+    return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
+}
+
+#endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */
+
+//! @endcond
+
+#endif /* __cplusplus */
+
+#endif /* OPENCV_CORE_AFFINE3_HPP */
--- a/3rdparty/opencv/inc/opencv2/core/async.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/async.hpp
@@ -0,0 +1,105 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_ASYNC_HPP
+#define OPENCV_CORE_ASYNC_HPP
+
+#include <opencv2/core/mat.hpp>
+
+#ifdef CV_CXX11
+//#include <future>
+#include <chrono>
+#endif
+
+namespace cv {
+
+/** @addtogroup core_async
+
+@{
+*/
+
+
+/** @brief Returns result of asynchronous operations
+
+Object has attached asynchronous state.
+Assignment operator doesn't clone asynchronous state (it is shared between all instances).
+
+Result can be fetched via get() method only once.
+
+*/
+class CV_EXPORTS_W AsyncArray
+{
+public:
+    ~AsyncArray() CV_NOEXCEPT;
+    CV_WRAP AsyncArray() CV_NOEXCEPT;
+    AsyncArray(const AsyncArray& o) CV_NOEXCEPT;
+    AsyncArray& operator=(const AsyncArray& o) CV_NOEXCEPT;
+    CV_WRAP void release() CV_NOEXCEPT;
+
+    /** Fetch the result.
+    @param[out] dst destination array
+
+    Waits for result until container has valid result.
+    Throws exception if exception was stored as a result.
+
+    Throws exception on invalid container state.
+
+    @note Result or stored exception can be fetched only once.
+    */
+    CV_WRAP void get(OutputArray dst) const;
+
+    /** Retrieving the result with timeout
+    @param[out] dst destination array
+    @param[in] timeoutNs timeout in nanoseconds, -1 for infinite wait
+
+    @returns true if result is ready, false if the timeout has expired
+
+    @note Result or stored exception can be fetched only once.
+    */
+    bool get(OutputArray dst, int64 timeoutNs) const;
+
+    CV_WRAP inline
+    bool get(OutputArray dst, double timeoutNs) const { return get(dst, (int64)timeoutNs); }
+
+    bool wait_for(int64 timeoutNs) const;
+
+    CV_WRAP inline
+    bool wait_for(double timeoutNs) const { return wait_for((int64)timeoutNs); }
+
+    CV_WRAP bool valid() const CV_NOEXCEPT;
+
+#ifdef CV_CXX11
+    inline AsyncArray(AsyncArray&& o) { p = o.p; o.p = NULL; }
+    inline AsyncArray& operator=(AsyncArray&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
+
+    template<typename _Rep, typename _Period>
+    inline bool get(OutputArray dst, const std::chrono::duration<_Rep, _Period>& timeout)
+    {
+        return get(dst, (int64)(std::chrono::nanoseconds(timeout).count()));
+    }
+
+    template<typename _Rep, typename _Period>
+    inline bool wait_for(const std::chrono::duration<_Rep, _Period>& timeout)
+    {
+        return wait_for((int64)(std::chrono::nanoseconds(timeout).count()));
+    }
+
+#if 0
+    std::future<Mat> getFutureMat() const;
+    std::future<UMat> getFutureUMat() const;
+#endif
+#endif
+
+
+    // PImpl
+    struct Impl; friend struct Impl;
+    inline void* _getImpl() const CV_NOEXCEPT { return p; }
+protected:
+    Impl* p;
+};
+
+
+//! @}
+} // namespace
+#endif // OPENCV_CORE_ASYNC_HPP
--- a/3rdparty/opencv/inc/opencv2/core/base.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/base.hpp
@@ -0,0 +1,664 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2014, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_BASE_HPP
+#define OPENCV_CORE_BASE_HPP
+
+#ifndef __cplusplus
+#  error base.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/opencv_modules.hpp"
+
+#include <climits>
+#include <algorithm>
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+
+namespace cv
+{
+
+//! @addtogroup core_utils
+//! @{
+
+namespace Error {
+//! error codes
+enum Code {
+    StsOk=                       0,  //!< everything is ok
+    StsBackTrace=               -1,  //!< pseudo error for back trace
+    StsError=                   -2,  //!< unknown /unspecified error
+    StsInternal=                -3,  //!< internal error (bad state)
+    StsNoMem=                   -4,  //!< insufficient memory
+    StsBadArg=                  -5,  //!< function arg/param is bad
+    StsBadFunc=                 -6,  //!< unsupported function
+    StsNoConv=                  -7,  //!< iteration didn't converge
+    StsAutoTrace=               -8,  //!< tracing
+    HeaderIsNull=               -9,  //!< image header is NULL
+    BadImageSize=              -10,  //!< image size is invalid
+    BadOffset=                 -11,  //!< offset is invalid
+    BadDataPtr=                -12,  //!<
+    BadStep=                   -13,  //!< image step is wrong, this may happen for a non-continuous matrix.
+    BadModelOrChSeq=           -14,  //!<
+    BadNumChannels=            -15,  //!< bad number of channels, for example, some functions accept only single channel matrices.
+    BadNumChannel1U=           -16,  //!<
+    BadDepth=                  -17,  //!< input image depth is not supported by the function
+    BadAlphaChannel=           -18,  //!<
+    BadOrder=                  -19,  //!< number of dimensions is out of range
+    BadOrigin=                 -20,  //!< incorrect input origin
+    BadAlign=                  -21,  //!< incorrect input align
+    BadCallBack=               -22,  //!<
+    BadTileSize=               -23,  //!<
+    BadCOI=                    -24,  //!< input COI is not supported
+    BadROISize=                -25,  //!< incorrect input roi
+    MaskIsTiled=               -26,  //!<
+    StsNullPtr=                -27,  //!< null pointer
+    StsVecLengthErr=           -28,  //!< incorrect vector length
+    StsFilterStructContentErr= -29,  //!< incorrect filter structure content
+    StsKernelStructContentErr= -30,  //!< incorrect transform kernel content
+    StsFilterOffsetErr=        -31,  //!< incorrect filter offset value
+    StsBadSize=                -201, //!< the input/output structure size is incorrect
+    StsDivByZero=              -202, //!< division by zero
+    StsInplaceNotSupported=    -203, //!< in-place operation is not supported
+    StsObjectNotFound=         -204, //!< request can't be completed
+    StsUnmatchedFormats=       -205, //!< formats of input/output arrays differ
+    StsBadFlag=                -206, //!< flag is wrong or not supported
+    StsBadPoint=               -207, //!< bad CvPoint
+    StsBadMask=                -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
+    StsUnmatchedSizes=         -209, //!< sizes of input/output structures do not match
+    StsUnsupportedFormat=      -210, //!< the data format/type is not supported by the function
+    StsOutOfRange=             -211, //!< some of parameters are out of range
+    StsParseError=             -212, //!< invalid syntax/structure of the parsed file
+    StsNotImplemented=         -213, //!< the requested function/feature is not implemented
+    StsBadMemBlock=            -214, //!< an allocated block has been corrupted
+    StsAssert=                 -215, //!< assertion failed
+    GpuNotSupported=           -216, //!< no CUDA support
+    GpuApiCallError=           -217, //!< GPU API call error
+    OpenGlNotSupported=        -218, //!< no OpenGL support
+    OpenGlApiCallError=        -219, //!< OpenGL API call error
+    OpenCLApiCallError=        -220, //!< OpenCL API call error
+    OpenCLDoubleNotSupported=  -221,
+    OpenCLInitError=           -222, //!< OpenCL initialization error
+    OpenCLNoAMDBlasFft=        -223
+};
+} //Error
+
+//! @} core_utils
+
+//! @addtogroup core_array
+//! @{
+
+//! matrix decomposition types
+enum DecompTypes {
+    /** Gaussian elimination with the optimal pivot element chosen. */
+    DECOMP_LU       = 0,
+    /** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
+    src1 can be singular */
+    DECOMP_SVD      = 1,
+    /** eigenvalue decomposition; the matrix src1 must be symmetrical */
+    DECOMP_EIG      = 2,
+    /** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
+    defined */
+    DECOMP_CHOLESKY = 3,
+    /** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
+    DECOMP_QR       = 4,
+    /** while all the previous flags are mutually exclusive, this flag can be used together with
+    any of the previous; it means that the normal equations
+    \f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
+    solved instead of the original system
+    \f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
+    DECOMP_NORMAL   = 16
+};
+
+/** norm types
+
+src1 and src2 denote input arrays.
+*/
+
+enum NormTypes {
+                /**
+                \f[
+                norm =  \forkthree
+                {\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
+                {\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
+                {\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) }
+                \f]
+                */
+                NORM_INF       = 1,
+                /**
+                \f[
+                norm =  \forkthree
+                {\| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\)}
+                { \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
+                { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) }
+                \f]*/
+                 NORM_L1        = 2,
+                 /**
+                 \f[
+                 norm =  \forkthree
+                 { \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
+                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
+                 { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
+                 \f]
+                 */
+                 NORM_L2        = 4,
+                 /**
+                 \f[
+                 norm =  \forkthree
+                 { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if  \(\texttt{normType} = \texttt{NORM_L2SQR}\)}
+                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} =  \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if  \(\texttt{normType} = \texttt{NORM_L2SQR}\) }
+                 { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2SQR}\) }
+                 \f]
+                 */
+                 NORM_L2SQR     = 5,
+                 /**
+                 In the case of one input array, calculates the Hamming distance of the array from zero,
+                 In the case of two input arrays, calculates the Hamming distance between the arrays.
+                 */
+                 NORM_HAMMING   = 6,
+                 /**
+                 Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will
+                 be added and treated as a single bit to be used in the same calculation as NORM_HAMMING.
+                 */
+                 NORM_HAMMING2  = 7,
+                 NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags
+                 NORM_RELATIVE  = 8, //!< flag
+                 NORM_MINMAX    = 32 //!< flag
+               };
+
+//! comparison types
+enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
+                CMP_GT = 1, //!< src1 is greater than src2.
+                CMP_GE = 2, //!< src1 is greater than or equal to src2.
+                CMP_LT = 3, //!< src1 is less than src2.
+                CMP_LE = 4, //!< src1 is less than or equal to src2.
+                CMP_NE = 5  //!< src1 is unequal to src2.
+              };
+
+//! generalized matrix multiplication flags
+enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
+                 GEMM_2_T = 2, //!< transposes src2
+                 GEMM_3_T = 4 //!< transposes src3
+               };
+
+enum DftFlags {
+    /** performs an inverse 1D or 2D transform instead of the default forward
+        transform. */
+    DFT_INVERSE        = 1,
+    /** scales the result: divide it by the number of array elements. Normally, it is
+        combined with DFT_INVERSE. */
+    DFT_SCALE          = 2,
+    /** performs a forward or inverse transform of every individual row of the input
+        matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
+        decrease the overhead (which is sometimes several times larger than the processing itself) to
+        perform 3D and higher-dimensional transformations and so forth.*/
+    DFT_ROWS           = 4,
+    /** performs a forward transformation of 1D or 2D real array; the result,
+        though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
+        description below for details), and such an array can be packed into a real array of the same
+        size as input, which is the fastest option and which is what the function does by default;
+        however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
+        pass the flag to enable the function to produce a full-size complex output array. */
+    DFT_COMPLEX_OUTPUT = 16,
+    /** performs an inverse transformation of a 1D or 2D complex array; the
+        result is normally a complex array of the same size, however, if the input array has
+        conjugate-complex symmetry (for example, it is a result of forward transformation with
+        DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
+        check whether the input is symmetrical or not, you can pass the flag and then the function
+        will assume the symmetry and produce the real output array (note that when the input is packed
+        into a real array and inverse transformation is executed, the function treats the input as a
+        packed complex-conjugate symmetrical array, and the output will also be a real array). */
+    DFT_REAL_OUTPUT    = 32,
+    /** specifies that input is complex input. If this flag is set, the input must have 2 channels.
+        On the other hand, for backwards compatibility reason, if input has 2 channels, input is
+        already considered complex. */
+    DFT_COMPLEX_INPUT  = 64,
+    /** performs an inverse 1D or 2D transform instead of the default forward transform. */
+    DCT_INVERSE        = DFT_INVERSE,
+    /** performs a forward or inverse transform of every individual row of the input
+        matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
+        decrease the overhead (which is sometimes several times larger than the processing itself) to
+        perform 3D and higher-dimensional transforms and so forth.*/
+    DCT_ROWS           = DFT_ROWS
+};
+
+//! Various border types, image boundaries are denoted with `|`
+//! @see borderInterpolate, copyMakeBorder
+enum BorderTypes {
+    BORDER_CONSTANT    = 0, //!< `iiiiii|abcdefgh|iiiiiii`  with some specified `i`
+    BORDER_REPLICATE   = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
+    BORDER_REFLECT     = 2, //!< `fedcba|abcdefgh|hgfedcb`
+    BORDER_WRAP        = 3, //!< `cdefgh|abcdefgh|abcdefg`
+    BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
+    BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`
+
+    BORDER_REFLECT101  = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
+    BORDER_DEFAULT     = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
+    BORDER_ISOLATED    = 16 //!< do not look outside of ROI
+};
+
+//! @} core_array
+
+//! @addtogroup core_utils
+//! @{
+
+/*! @brief Signals an error and raises the exception.
+
+By default the function prints information about the error to stderr,
+then it either stops if setBreakOnError() had been called before or raises the exception.
+It is possible to alternate error processing by using redirectError().
+@param _code - error code (Error::Code)
+@param _err - error description
+@param _func - function name. Available only when the compiler supports getting it
+@param _file - source file name where the error has occurred
+@param _line - line number in the source file where the error has occurred
+@see CV_Error, CV_Error_, CV_Assert, CV_DbgAssert
+ */
+CV_EXPORTS CV_NORETURN void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
+
+#ifdef CV_STATIC_ANALYSIS
+
+// In practice, some macro are not processed correctly (noreturn is not detected).
+// We need to use simplified definition for them.
+#define CV_Error(code, msg) do { (void)(code); (void)(msg); abort(); } while (0)
+#define CV_Error_(code, args) do { (void)(code); (void)(cv::format args); abort(); } while (0)
+#define CV_Assert( expr ) do { if (!(expr)) abort(); } while (0)
+
+#else // CV_STATIC_ANALYSIS
+
+/** @brief Call the error handler.
+
+Currently, the error handler prints the error code and the error message to the standard
+error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
+the execution stack and all the parameters can be analyzed by the debugger. In the Release
+configuration, the exception is thrown.
+
+@param code one of Error::Code
+@param msg error message
+*/
+#define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )
+
+/**  @brief Call the error handler.
+
+This macro can be used to construct an error message on-fly to include some dynamic information,
+for example:
+@code
+    // note the extra parentheses around the formatted text message
+    CV_Error_(Error::StsOutOfRange,
+    ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
+@endcode
+@param code one of Error::Code
+@param args printf-like formatted error message in parentheses
+*/
+#define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )
+
+/** @brief Checks a condition at runtime and throws exception if it fails
+
+The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
+raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
+configurations while CV_DbgAssert is only retained in the Debug configuration.
+*/
+#define CV_Assert( expr ) do { if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ ); } while(0)
+
+#endif // CV_STATIC_ANALYSIS
+
+//! @cond IGNORED
+#if !defined(__OPENCV_BUILD)  // TODO: backward compatibility only
+#ifndef CV_ErrorNoReturn
+#define CV_ErrorNoReturn CV_Error
+#endif
+#ifndef CV_ErrorNoReturn_
+#define CV_ErrorNoReturn_ CV_Error_
+#endif
+#endif
+
+#define CV_Assert_1 CV_Assert
+#define CV_Assert_2( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_1( __VA_ARGS__ ))
+#define CV_Assert_3( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_2( __VA_ARGS__ ))
+#define CV_Assert_4( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_3( __VA_ARGS__ ))
+#define CV_Assert_5( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_4( __VA_ARGS__ ))
+#define CV_Assert_6( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_5( __VA_ARGS__ ))
+#define CV_Assert_7( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_6( __VA_ARGS__ ))
+#define CV_Assert_8( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_7( __VA_ARGS__ ))
+#define CV_Assert_9( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_8( __VA_ARGS__ ))
+#define CV_Assert_10( expr, ... ) CV_Assert_1(expr); __CV_EXPAND(CV_Assert_9( __VA_ARGS__ ))
+
+#define CV_Assert_N(...) do { __CV_EXPAND(__CV_CAT(CV_Assert_, __CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__)); } while(0)
+
+//! @endcond
+
+#if defined _DEBUG || defined CV_STATIC_ANALYSIS
+#  define CV_DbgAssert(expr) CV_Assert(expr)
+#else
+/** replaced with CV_Assert(expr) in Debug configuration */
+#  define CV_DbgAssert(expr)
+#endif
+
+/*
+ * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ * bit count of A exclusive XOR'ed with B
+ */
+struct CV_EXPORTS Hamming
+{
+    static const NormTypes normType = NORM_HAMMING;
+    typedef unsigned char ValueType;
+    typedef int ResultType;
+
+    /** this will count the bits in a ^ b
+     */
+    ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
+};
+
+typedef Hamming HammingLUT;
+
+/////////////////////////////////// inline norms ////////////////////////////////////
+
+template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
+inline int cv_abs(uchar x) { return x; }
+inline int cv_abs(schar x) { return std::abs(x); }
+inline int cv_abs(ushort x) { return x; }
+inline int cv_abs(short x) { return std::abs(x); }
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL2Sqr(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    int i=0;
+#if CV_ENABLE_UNROLLED
+    for( ; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
+        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = a[i];
+        s += v*v;
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL1(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    int i = 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
+            (_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
+    }
+#endif
+    for( ; i < n; i++ )
+        s += cv_abs(a[i]);
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normInf(const _Tp* a, int n)
+{
+    _AccTp s = 0;
+    for( int i = 0; i < n; i++ )
+        s = std::max(s, (_AccTp)cv_abs(a[i]));
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    int i= 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
+        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = _AccTp(a[i] - b[i]);
+        s += v*v;
+    }
+    return s;
+}
+
+static inline float normL2Sqr(const float* a, const float* b, int n)
+{
+    float s = 0.f;
+    for( int i = 0; i < n; i++ )
+    {
+        float v = a[i] - b[i];
+        s += v*v;
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normL1(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    int i= 0;
+#if CV_ENABLE_UNROLLED
+    for(; i <= n - 4; i += 4 )
+    {
+        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
+        s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
+    }
+#endif
+    for( ; i < n; i++ )
+    {
+        _AccTp v = _AccTp(a[i] - b[i]);
+        s += std::abs(v);
+    }
+    return s;
+}
+
+inline float normL1(const float* a, const float* b, int n)
+{
+    float s = 0.f;
+    for( int i = 0; i < n; i++ )
+    {
+        s += std::abs(a[i] - b[i]);
+    }
+    return s;
+}
+
+inline int normL1(const uchar* a, const uchar* b, int n)
+{
+    int s = 0;
+    for( int i = 0; i < n; i++ )
+    {
+        s += std::abs(a[i] - b[i]);
+    }
+    return s;
+}
+
+template<typename _Tp, typename _AccTp> static inline
+_AccTp normInf(const _Tp* a, const _Tp* b, int n)
+{
+    _AccTp s = 0;
+    for( int i = 0; i < n; i++ )
+    {
+        _AccTp v0 = a[i] - b[i];
+        s = std::max(s, std::abs(v0));
+    }
+    return s;
+}
+
+/** @brief Computes the cube root of an argument.
+
+ The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
+ NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
+ single-precision data.
+ @param val A function argument.
+ */
+CV_EXPORTS_W float cubeRoot(float val);
+
+/** @overload
+
+cubeRoot with argument of `double` type calls `std::cbrt(double)`
+*/
+static inline
+double cubeRoot(double val)
+{
+    return std::cbrt(val);
+}
+
+/** @brief Calculates the angle of a 2D vector in degrees.
+
+ The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
+ in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
+ @param x x-coordinate of the vector.
+ @param y y-coordinate of the vector.
+ */
+CV_EXPORTS_W float fastAtan2(float y, float x);
+
+/** proxy for hal::LU */
+CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+/** proxy for hal::LU */
+CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+/** proxy for hal::Cholesky */
+CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+/** proxy for hal::Cholesky */
+CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+
+////////////////// forward declarations for important OpenCV types //////////////////
+
+//! @cond IGNORED
+
+template<typename _Tp, int cn> class Vec;
+template<typename _Tp, int m, int n> class Matx;
+
+template<typename _Tp> class Complex;
+template<typename _Tp> class Point_;
+template<typename _Tp> class Point3_;
+template<typename _Tp> class Size_;
+template<typename _Tp> class Rect_;
+template<typename _Tp> class Scalar_;
+
+class CV_EXPORTS RotatedRect;
+class CV_EXPORTS Range;
+class CV_EXPORTS TermCriteria;
+class CV_EXPORTS KeyPoint;
+class CV_EXPORTS DMatch;
+class CV_EXPORTS RNG;
+
+class CV_EXPORTS Mat;
+class CV_EXPORTS MatExpr;
+
+class CV_EXPORTS UMat;
+
+class CV_EXPORTS SparseMat;
+typedef Mat MatND;
+
+template<typename _Tp> class Mat_;
+template<typename _Tp> class SparseMat_;
+
+class CV_EXPORTS MatConstIterator;
+class CV_EXPORTS SparseMatIterator;
+class CV_EXPORTS SparseMatConstIterator;
+template<typename _Tp> class MatIterator_;
+template<typename _Tp> class MatConstIterator_;
+template<typename _Tp> class SparseMatIterator_;
+template<typename _Tp> class SparseMatConstIterator_;
+
+namespace ogl
+{
+    class CV_EXPORTS Buffer;
+    class CV_EXPORTS Texture2D;
+    class CV_EXPORTS Arrays;
+}
+
+namespace cuda
+{
+    class CV_EXPORTS GpuMat;
+    class CV_EXPORTS HostMem;
+    class CV_EXPORTS Stream;
+    class CV_EXPORTS Event;
+}
+
+namespace cudev
+{
+    template <typename _Tp> class GpuMat_;
+}
+
+namespace ipp
+{
+CV_EXPORTS   unsigned long long getIppFeatures();
+CV_EXPORTS   void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
+                             int line = 0);
+CV_EXPORTS   int getIppStatus();
+CV_EXPORTS   String getIppErrorLocation();
+CV_EXPORTS_W bool   useIPP();
+CV_EXPORTS_W void   setUseIPP(bool flag);
+CV_EXPORTS_W String getIppVersion();
+
+// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
+// but have internal accuracy differences which have too much direct or indirect impact on accuracy tests.
+CV_EXPORTS_W bool useIPP_NotExact();
+CV_EXPORTS_W void setUseIPP_NotExact(bool flag);
+#ifndef DISABLE_OPENCV_3_COMPATIBILITY
+static inline bool useIPP_NE() { return useIPP_NotExact(); }
+static inline void setUseIPP_NE(bool flag) { setUseIPP_NotExact(flag); }
+#endif
+
+} // ipp
+
+//! @endcond
+
+//! @} core_utils
+
+
+
+
+} // cv
+
+#include "opencv2/core/neon_utils.hpp"
+#include "opencv2/core/vsx_utils.hpp"
+#include "opencv2/core/check.hpp"
+
+#endif //OPENCV_CORE_BASE_HPP
--- a/3rdparty/opencv/inc/opencv2/core/bindings_utils.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/bindings_utils.hpp
@@ -0,0 +1,233 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_BINDINGS_UTILS_HPP
+#define OPENCV_CORE_BINDINGS_UTILS_HPP
+
+#include <opencv2/core/async.hpp>
+#include <opencv2/core/detail/async_promise.hpp>
+#include <opencv2/core/utils/logger.hpp>
+
+#include <stdexcept>
+
+namespace cv { namespace utils {
+//! @addtogroup core_utils
+//! @{
+
+CV_EXPORTS_W String dumpInputArray(InputArray argument);
+
+CV_EXPORTS_W String dumpInputArrayOfArrays(InputArrayOfArrays argument);
+
+CV_EXPORTS_W String dumpInputOutputArray(InputOutputArray argument);
+
+CV_EXPORTS_W String dumpInputOutputArrayOfArrays(InputOutputArrayOfArrays argument);
+
+CV_WRAP static inline
+String dumpBool(bool argument)
+{
+    return (argument) ? String("Bool: True") : String("Bool: False");
+}
+
+CV_WRAP static inline
+String dumpInt(int argument)
+{
+    return cv::format("Int: %d", argument);
+}
+
+CV_WRAP static inline
+String dumpSizeT(size_t argument)
+{
+    std::ostringstream oss("size_t: ", std::ios::ate);
+    oss << argument;
+    return oss.str();
+}
+
+CV_WRAP static inline
+String dumpFloat(float argument)
+{
+    return cv::format("Float: %.2f", argument);
+}
+
+CV_WRAP static inline
+String dumpDouble(double argument)
+{
+    return cv::format("Double: %.2f", argument);
+}
+
+CV_WRAP static inline
+String dumpCString(const char* argument)
+{
+    return cv::format("String: %s", argument);
+}
+
+CV_WRAP static inline
+String dumpString(const String& argument)
+{
+    return cv::format("String: %s", argument.c_str());
+}
+
+CV_WRAP static inline
+String testOverloadResolution(int value, const Point& point = Point(42, 24))
+{
+    return format("overload (int=%d, point=(x=%d, y=%d))", value, point.x,
+                  point.y);
+}
+
+CV_WRAP static inline
+String testOverloadResolution(const Rect& rect)
+{
+    return format("overload (rect=(x=%d, y=%d, w=%d, h=%d))", rect.x, rect.y,
+                  rect.width, rect.height);
+}
+
+CV_WRAP static inline
+String dumpRect(const Rect& argument)
+{
+    return format("rect: (x=%d, y=%d, w=%d, h=%d)", argument.x, argument.y,
+                  argument.width, argument.height);
+}
+
+CV_WRAP static inline
+String dumpTermCriteria(const TermCriteria& argument)
+{
+    return format("term_criteria: (type=%d, max_count=%d, epsilon=%lf",
+                  argument.type, argument.maxCount, argument.epsilon);
+}
+
+CV_WRAP static inline
+String dumpRotatedRect(const RotatedRect& argument)
+{
+    return format("rotated_rect: (c_x=%f, c_y=%f, w=%f, h=%f, a=%f)",
+                  argument.center.x, argument.center.y, argument.size.width,
+                  argument.size.height, argument.angle);
+}
+
+CV_WRAP static inline
+String dumpRange(const Range& argument)
+{
+    if (argument == Range::all())
+    {
+        return "range: all";
+    }
+    else
+    {
+        return format("range: (s=%d, e=%d)", argument.start, argument.end);
+    }
+}
+
+CV_WRAP static inline
+int testOverwriteNativeMethod(int argument)
+{
+    return argument;
+}
+
+CV_WRAP static inline
+String testReservedKeywordConversion(int positional_argument, int lambda = 2, int from = 3)
+{
+    return format("arg=%d, lambda=%d, from=%d", positional_argument, lambda, from);
+}
+
+CV_EXPORTS_W String dumpVectorOfInt(const std::vector<int>& vec);
+
+CV_EXPORTS_W String dumpVectorOfDouble(const std::vector<double>& vec);
+
+CV_EXPORTS_W String dumpVectorOfRect(const std::vector<Rect>& vec);
+
+CV_WRAP static inline
+void generateVectorOfRect(size_t len, CV_OUT std::vector<Rect>& vec)
+{
+    vec.resize(len);
+    if (len > 0)
+    {
+        RNG rng(12345);
+        Mat tmp(static_cast<int>(len), 1, CV_32SC4);
+        rng.fill(tmp, RNG::UNIFORM, 10, 20);
+        tmp.copyTo(vec);
+    }
+}
+
+CV_WRAP static inline
+void generateVectorOfInt(size_t len, CV_OUT std::vector<int>& vec)
+{
+    vec.resize(len);
+    if (len > 0)
+    {
+        RNG rng(554433);
+        Mat tmp(static_cast<int>(len), 1, CV_32SC1);
+        rng.fill(tmp, RNG::UNIFORM, -10, 10);
+        tmp.copyTo(vec);
+    }
+}
+
+CV_WRAP static inline
+void generateVectorOfMat(size_t len, int rows, int cols, int dtype, CV_OUT std::vector<Mat>& vec)
+{
+    vec.resize(len);
+    if (len > 0)
+    {
+        RNG rng(65431);
+        for (size_t i = 0; i < len; ++i)
+        {
+            vec[i].create(rows, cols, dtype);
+            rng.fill(vec[i], RNG::UNIFORM, 0, 10);
+        }
+    }
+}
+
+CV_WRAP static inline
+void testRaiseGeneralException()
+{
+    throw std::runtime_error("exception text");
+}
+
+CV_WRAP static inline
+AsyncArray testAsyncArray(InputArray argument)
+{
+    AsyncPromise p;
+    p.setValue(argument);
+    return p.getArrayResult();
+}
+
+CV_WRAP static inline
+AsyncArray testAsyncException()
+{
+    AsyncPromise p;
+    try
+    {
+        CV_Error(Error::StsOk, "Test: Generated async error");
+    }
+    catch (const cv::Exception& e)
+    {
+        p.setException(e);
+    }
+    return p.getArrayResult();
+}
+
+namespace fs {
+    CV_EXPORTS_W cv::String getCacheDirectoryForDownloads();
+} // namespace fs
+
+//! @}  // core_utils
+}  // namespace cv::utils
+
+//! @cond IGNORED
+
+CV_WRAP static inline
+int setLogLevel(int level)
+{
+    // NB: Binding generators doesn't work with enums properly yet, so we define separate overload here
+    return cv::utils::logging::setLogLevel((cv::utils::logging::LogLevel)level);
+}
+
+CV_WRAP static inline
+int getLogLevel()
+{
+    return cv::utils::logging::getLogLevel();
+}
+
+//! @endcond IGNORED
+
+} // namespaces cv /  utils
+
+#endif // OPENCV_CORE_BINDINGS_UTILS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/bufferpool.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/bufferpool.hpp
@@ -0,0 +1,40 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+
+#ifndef OPENCV_CORE_BUFFER_POOL_HPP
+#define OPENCV_CORE_BUFFER_POOL_HPP
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4265)
+#endif
+
+namespace cv
+{
+
+//! @addtogroup core
+//! @{
+
+class BufferPoolController
+{
+protected:
+    ~BufferPoolController() { }
+public:
+    virtual size_t getReservedSize() const = 0;
+    virtual size_t getMaxReservedSize() const = 0;
+    virtual void setMaxReservedSize(size_t size) = 0;
+    virtual void freeAllReservedBuffers() = 0;
+};
+
+//! @}
+
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif // OPENCV_CORE_BUFFER_POOL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/check.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/check.hpp
@@ -0,0 +1,160 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_CHECK_HPP
+#define OPENCV_CORE_CHECK_HPP
+
+#include <opencv2/core/base.hpp>
+
+namespace cv {
+
+/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or "<invalid depth>" */
+CV_EXPORTS const char* depthToString(int depth);
+
+/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or "<invalid type>" */
+CV_EXPORTS const String typeToString(int type);
+
+
+//! @cond IGNORED
+namespace detail {
+
+/** Returns string of cv::Mat depth value: CV_8U -> "CV_8U" or NULL */
+CV_EXPORTS const char* depthToString_(int depth);
+
+/** Returns string of cv::Mat depth value: CV_8UC3 -> "CV_8UC3" or cv::String() */
+CV_EXPORTS const cv::String typeToString_(int type);
+
+enum TestOp {
+  TEST_CUSTOM = 0,
+  TEST_EQ = 1,
+  TEST_NE = 2,
+  TEST_LE = 3,
+  TEST_LT = 4,
+  TEST_GE = 5,
+  TEST_GT = 6,
+  CV__LAST_TEST_OP
+};
+
+struct CheckContext {
+    const char* func;
+    const char* file;
+    int line;
+    enum TestOp testOp;
+    const char* message;
+    const char* p1_str;
+    const char* p2_str;
+};
+
+#ifndef CV__CHECK_FILENAME
+# define CV__CHECK_FILENAME __FILE__
+#endif
+
+#ifndef CV__CHECK_FUNCTION
+# if defined _MSC_VER
+#   define CV__CHECK_FUNCTION __FUNCSIG__
+# elif defined __GNUC__
+#   define CV__CHECK_FUNCTION __PRETTY_FUNCTION__
+# else
+#   define CV__CHECK_FUNCTION "<unknown>"
+# endif
+#endif
+
+#define CV__CHECK_LOCATION_VARNAME(id) CVAUX_CONCAT(CVAUX_CONCAT(__cv_check_, id), __LINE__)
+#define CV__DEFINE_CHECK_CONTEXT(id, message, testOp, p1_str, p2_str) \
+    static const cv::detail::CheckContext CV__CHECK_LOCATION_VARNAME(id) = \
+            { CV__CHECK_FUNCTION, CV__CHECK_FILENAME, __LINE__, testOp, "" message, "" p1_str, "" p2_str }
+
+CV_EXPORTS void CV_NORETURN check_failed_auto(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v1, const size_t v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const float v1, const float v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const double v1, const double v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v1, const Size_<int> v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v1, const int v2, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v1, const int v2, const CheckContext& ctx);
+
+CV_EXPORTS void CV_NORETURN check_failed_auto(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const size_t v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const float v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const double v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const Size_<int> v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_auto(const std::string& v1, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatDepth(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatType(const int v, const CheckContext& ctx);
+CV_EXPORTS void CV_NORETURN check_failed_MatChannels(const int v, const CheckContext& ctx);
+
+
+#define CV__TEST_EQ(v1, v2) ((v1) == (v2))
+#define CV__TEST_NE(v1, v2) ((v1) != (v2))
+#define CV__TEST_LE(v1, v2) ((v1) <= (v2))
+#define CV__TEST_LT(v1, v2) ((v1) < (v2))
+#define CV__TEST_GE(v1, v2) ((v1) >= (v2))
+#define CV__TEST_GT(v1, v2) ((v1) > (v2))
+
+#define CV__CHECK(id, op, type, v1, v2, v1_str, v2_str, msg_str) do { \
+    if(CV__TEST_##op((v1), (v2))) ; else { \
+        CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_ ## op, v1_str, v2_str); \
+        cv::detail::check_failed_ ## type((v1), (v2), CV__CHECK_LOCATION_VARNAME(id)); \
+    } \
+} while (0)
+
+#define CV__CHECK_CUSTOM_TEST(id, type, v, test_expr, v_str, test_expr_str, msg_str) do { \
+    if(!!(test_expr)) ; else { \
+        CV__DEFINE_CHECK_CONTEXT(id, msg_str, cv::detail::TEST_CUSTOM, v_str, test_expr_str); \
+        cv::detail::check_failed_ ## type((v), CV__CHECK_LOCATION_VARNAME(id)); \
+    } \
+} while (0)
+
+} // namespace
+//! @endcond
+
+
+/// Supported values of these types: int, float, double
+#define CV_CheckEQ(v1, v2, msg)  CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckNE(v1, v2, msg)  CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckLE(v1, v2, msg)  CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckLT(v1, v2, msg)  CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckGE(v1, v2, msg)  CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
+#define CV_CheckGT(v1, v2, msg)  CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
+
+/// Check with additional "decoding" of type values in error message
+#define CV_CheckTypeEQ(t1, t2, msg)  CV__CHECK(_, EQ, MatType, t1, t2, #t1, #t2, msg)
+/// Check with additional "decoding" of depth values in error message
+#define CV_CheckDepthEQ(d1, d2, msg)  CV__CHECK(_, EQ, MatDepth, d1, d2, #d1, #d2, msg)
+
+#define CV_CheckChannelsEQ(c1, c2, msg)  CV__CHECK(_, EQ, MatChannels, c1, c2, #c1, #c2, msg)
+
+/// Example: type == CV_8UC1 || type == CV_8UC3
+#define CV_CheckType(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatType, t, (test_expr), #t, #test_expr, msg)
+
+/// Example: depth == CV_32F || depth == CV_64F
+#define CV_CheckDepth(t, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, MatDepth, t, (test_expr), #t, #test_expr, msg)
+
+/// Example: v == A || v == B
+#define CV_Check(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
+
+/// Some complex conditions: CV_Check(src2, src2.empty() || (src2.type() == src1.type() && src2.size() == src1.size()), "src2 should have same size/type as src1")
+// TODO define pretty-printers
+
+#ifndef NDEBUG
+#define CV_DbgCheck(v, test_expr, msg)  CV__CHECK_CUSTOM_TEST(_, auto, v, (test_expr), #v, #test_expr, msg)
+#define CV_DbgCheckEQ(v1, v2, msg)  CV__CHECK(_, EQ, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckNE(v1, v2, msg)  CV__CHECK(_, NE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckLE(v1, v2, msg)  CV__CHECK(_, LE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckLT(v1, v2, msg)  CV__CHECK(_, LT, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckGE(v1, v2, msg)  CV__CHECK(_, GE, auto, v1, v2, #v1, #v2, msg)
+#define CV_DbgCheckGT(v1, v2, msg)  CV__CHECK(_, GT, auto, v1, v2, #v1, #v2, msg)
+#else
+#define CV_DbgCheck(v, test_expr, msg)  do { } while (0)
+#define CV_DbgCheckEQ(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckNE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckLE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckLT(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckGE(v1, v2, msg)  do { } while (0)
+#define CV_DbgCheckGT(v1, v2, msg)  do { } while (0)
+#endif
+
+} // namespace
+
+#endif // OPENCV_CORE_CHECK_HPP
--- a/3rdparty/opencv/inc/opencv2/core/core.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/core.hpp
@@ -0,0 +1,48 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifdef __OPENCV_BUILD
+#error this is a compatibility header which should not be used inside the OpenCV library
+#endif
+
+#include "opencv2/core.hpp"
--- a/3rdparty/opencv/inc/opencv2/core/core_c.h
+++ b/3rdparty/opencv/inc/opencv2/core/core_c.h
--- a/3rdparty/opencv/inc/opencv2/core/cuda.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda.hpp
--- a/3rdparty/opencv/inc/opencv2/core/cuda.inl.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda.inl.hpp
@@ -0,0 +1,723 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDAINL_HPP
+#define OPENCV_CORE_CUDAINL_HPP
+
+#include "opencv2/core/cuda.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda {
+
+//===================================================================================
+// GpuMat
+//===================================================================================
+
+inline
+GpuMat::GpuMat(Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{}
+
+inline
+GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (rows_ > 0 && cols_ > 0)
+        create(rows_, cols_, type_);
+}
+
+inline
+GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (size_.height > 0 && size_.width > 0)
+        create(size_.height, size_.width, type_);
+}
+
+inline
+GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (rows_ > 0 && cols_ > 0)
+    {
+        create(rows_, cols_, type_);
+        setTo(s_);
+    }
+}
+
+inline
+GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    if (size_.height > 0 && size_.width > 0)
+    {
+        create(size_.height, size_.width, type_);
+        setTo(s_);
+    }
+}
+
+inline
+GpuMat::GpuMat(const GpuMat& m)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
+{
+    if (refcount)
+        CV_XADD(refcount, 1);
+}
+
+inline
+GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
+    flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
+{
+    upload(arr);
+}
+
+inline
+GpuMat::~GpuMat()
+{
+    release();
+}
+
+inline
+GpuMat& GpuMat::operator =(const GpuMat& m)
+{
+    if (this != &m)
+    {
+        GpuMat temp(m);
+        swap(temp);
+    }
+
+    return *this;
+}
+
+inline
+void GpuMat::create(Size size_, int type_)
+{
+    create(size_.height, size_.width, type_);
+}
+
+inline
+void GpuMat::swap(GpuMat& b)
+{
+    std::swap(flags, b.flags);
+    std::swap(rows, b.rows);
+    std::swap(cols, b.cols);
+    std::swap(step, b.step);
+    std::swap(data, b.data);
+    std::swap(datastart, b.datastart);
+    std::swap(dataend, b.dataend);
+    std::swap(refcount, b.refcount);
+    std::swap(allocator, b.allocator);
+}
+
+inline
+GpuMat GpuMat::clone() const
+{
+    GpuMat m;
+    copyTo(m);
+    return m;
+}
+
+inline
+void GpuMat::copyTo(OutputArray dst, InputArray mask) const
+{
+    copyTo(dst, mask, Stream::Null());
+}
+
+inline
+GpuMat& GpuMat::setTo(Scalar s)
+{
+    return setTo(s, Stream::Null());
+}
+
+inline
+GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
+{
+    return setTo(s, mask, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype) const
+{
+    convertTo(dst, rtype, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
+{
+    convertTo(dst, rtype, alpha, beta, Stream::Null());
+}
+
+inline
+void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
+{
+    convertTo(dst, rtype, alpha, 0.0, stream);
+}
+
+inline
+void GpuMat::assignTo(GpuMat& m, int _type) const
+{
+    if (_type < 0)
+        m = *this;
+    else
+        convertTo(m, _type);
+}
+
+inline
+uchar* GpuMat::ptr(int y)
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return data + step * y;
+}
+
+inline
+const uchar* GpuMat::ptr(int y) const
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return data + step * y;
+}
+
+template<typename _Tp> inline
+_Tp* GpuMat::ptr(int y)
+{
+    return (_Tp*)ptr(y);
+}
+
+template<typename _Tp> inline
+const _Tp* GpuMat::ptr(int y) const
+{
+    return (const _Tp*)ptr(y);
+}
+
+template <class T> inline
+GpuMat::operator PtrStepSz<T>() const
+{
+    return PtrStepSz<T>(rows, cols, (T*)data, step);
+}
+
+template <class T> inline
+GpuMat::operator PtrStep<T>() const
+{
+    return PtrStep<T>((T*)data, step);
+}
+
+inline
+GpuMat GpuMat::row(int y) const
+{
+    return GpuMat(*this, Range(y, y+1), Range::all());
+}
+
+inline
+GpuMat GpuMat::col(int x) const
+{
+    return GpuMat(*this, Range::all(), Range(x, x+1));
+}
+
+inline
+GpuMat GpuMat::rowRange(int startrow, int endrow) const
+{
+    return GpuMat(*this, Range(startrow, endrow), Range::all());
+}
+
+inline
+GpuMat GpuMat::rowRange(Range r) const
+{
+    return GpuMat(*this, r, Range::all());
+}
+
+inline
+GpuMat GpuMat::colRange(int startcol, int endcol) const
+{
+    return GpuMat(*this, Range::all(), Range(startcol, endcol));
+}
+
+inline
+GpuMat GpuMat::colRange(Range r) const
+{
+    return GpuMat(*this, Range::all(), r);
+}
+
+inline
+GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
+{
+    return GpuMat(*this, rowRange_, colRange_);
+}
+
+inline
+GpuMat GpuMat::operator ()(Rect roi) const
+{
+    return GpuMat(*this, roi);
+}
+
+inline
+bool GpuMat::isContinuous() const
+{
+    return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+size_t GpuMat::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t GpuMat::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int GpuMat::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int GpuMat::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int GpuMat::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t GpuMat::step1() const
+{
+    return step / elemSize1();
+}
+
+inline
+Size GpuMat::size() const
+{
+    return Size(cols, rows);
+}
+
+inline
+bool GpuMat::empty() const
+{
+    return data == 0;
+}
+
+inline
+void* GpuMat::cudaPtr() const
+{
+    return data;
+}
+
+static inline
+GpuMat createContinuous(int rows, int cols, int type)
+{
+    GpuMat m;
+    createContinuous(rows, cols, type, m);
+    return m;
+}
+
+static inline
+void createContinuous(Size size, int type, OutputArray arr)
+{
+    createContinuous(size.height, size.width, type, arr);
+}
+
+static inline
+GpuMat createContinuous(Size size, int type)
+{
+    GpuMat m;
+    createContinuous(size, type, m);
+    return m;
+}
+
+static inline
+void ensureSizeIsEnough(Size size, int type, OutputArray arr)
+{
+    ensureSizeIsEnough(size.height, size.width, type, arr);
+}
+
+static inline
+void swap(GpuMat& a, GpuMat& b)
+{
+    a.swap(b);
+}
+
+//===================================================================================
+// GpuMatND
+//===================================================================================
+
+inline
+GpuMatND::GpuMatND() :
+    flags(0), dims(0), data(nullptr), offset(0)
+{
+}
+
+inline
+GpuMatND::GpuMatND(SizeArray _size, int _type) :
+    flags(0), dims(0), data(nullptr), offset(0)
+{
+    create(std::move(_size), _type);
+}
+
+inline
+void GpuMatND::swap(GpuMatND& m) noexcept
+{
+    std::swap(*this, m);
+}
+
+inline
+bool GpuMatND::isContinuous() const
+{
+    return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+bool GpuMatND::isSubmatrix() const
+{
+    return (flags & Mat::SUBMATRIX_FLAG) != 0;
+}
+
+inline
+size_t GpuMatND::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t GpuMatND::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+bool GpuMatND::empty() const
+{
+    return data == nullptr;
+}
+
+inline
+bool GpuMatND::external() const
+{
+    return !empty() && data_.use_count() == 0;
+}
+
+inline
+uchar* GpuMatND::getDevicePtr() const
+{
+    return data + offset;
+}
+
+inline
+size_t GpuMatND::total() const
+{
+    size_t p = 1;
+    for(auto s : size)
+        p *= s;
+    return p;
+}
+
+inline
+size_t GpuMatND::totalMemSize() const
+{
+    return size[0] * step[0];
+}
+
+inline
+int GpuMatND::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+//===================================================================================
+// HostMem
+//===================================================================================
+
+inline
+HostMem::HostMem(AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+}
+
+inline
+HostMem::HostMem(const HostMem& m)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
+{
+    if( refcount )
+        CV_XADD(refcount, 1);
+}
+
+inline
+HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    if (rows_ > 0 && cols_ > 0)
+        create(rows_, cols_, type_);
+}
+
+inline
+HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    if (size_.height > 0 && size_.width > 0)
+        create(size_.height, size_.width, type_);
+}
+
+inline
+HostMem::HostMem(InputArray arr, AllocType alloc_type_)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
+{
+    arr.getMat().copyTo(*this);
+}
+
+inline
+HostMem::~HostMem()
+{
+    release();
+}
+
+inline
+HostMem& HostMem::operator =(const HostMem& m)
+{
+    if (this != &m)
+    {
+        HostMem temp(m);
+        swap(temp);
+    }
+
+    return *this;
+}
+
+inline
+void HostMem::swap(HostMem& b)
+{
+    std::swap(flags, b.flags);
+    std::swap(rows, b.rows);
+    std::swap(cols, b.cols);
+    std::swap(step, b.step);
+    std::swap(data, b.data);
+    std::swap(datastart, b.datastart);
+    std::swap(dataend, b.dataend);
+    std::swap(refcount, b.refcount);
+    std::swap(alloc_type, b.alloc_type);
+}
+
+inline
+HostMem HostMem::clone() const
+{
+    HostMem m(size(), type(), alloc_type);
+    createMatHeader().copyTo(m);
+    return m;
+}
+
+inline
+void HostMem::create(Size size_, int type_)
+{
+    create(size_.height, size_.width, type_);
+}
+
+inline
+Mat HostMem::createMatHeader() const
+{
+    return Mat(size(), type(), data, step);
+}
+
+inline
+bool HostMem::isContinuous() const
+{
+    return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+size_t HostMem::elemSize() const
+{
+    return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t HostMem::elemSize1() const
+{
+    return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int HostMem::type() const
+{
+    return CV_MAT_TYPE(flags);
+}
+
+inline
+int HostMem::depth() const
+{
+    return CV_MAT_DEPTH(flags);
+}
+
+inline
+int HostMem::channels() const
+{
+    return CV_MAT_CN(flags);
+}
+
+inline
+size_t HostMem::step1() const
+{
+    return step / elemSize1();
+}
+
+inline
+Size HostMem::size() const
+{
+    return Size(cols, rows);
+}
+
+inline
+bool HostMem::empty() const
+{
+    return data == 0;
+}
+
+static inline
+void swap(HostMem& a, HostMem& b)
+{
+    a.swap(b);
+}
+
+//===================================================================================
+// Stream
+//===================================================================================
+
+inline
+Stream::Stream(const Ptr<Impl>& impl)
+    : impl_(impl)
+{
+}
+
+//===================================================================================
+// Event
+//===================================================================================
+
+inline
+Event::Event(const Ptr<Impl>& impl)
+    : impl_(impl)
+{
+}
+
+//===================================================================================
+// Initialization & Info
+//===================================================================================
+
+inline
+bool TargetArchs::has(int major, int minor)
+{
+    return hasPtx(major, minor) || hasBin(major, minor);
+}
+
+inline
+bool TargetArchs::hasEqualOrGreater(int major, int minor)
+{
+    return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
+}
+
+inline
+DeviceInfo::DeviceInfo()
+{
+    device_id_ = getDevice();
+}
+
+inline
+DeviceInfo::DeviceInfo(int device_id)
+{
+    CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
+    device_id_ = device_id;
+}
+
+inline
+int DeviceInfo::deviceID() const
+{
+    return device_id_;
+}
+
+inline
+size_t DeviceInfo::freeMemory() const
+{
+    size_t _totalMemory = 0, _freeMemory = 0;
+    queryMemory(_totalMemory, _freeMemory);
+    return _freeMemory;
+}
+
+inline
+size_t DeviceInfo::totalMemory() const
+{
+    size_t _totalMemory = 0, _freeMemory = 0;
+    queryMemory(_totalMemory, _freeMemory);
+    return _totalMemory;
+}
+
+inline
+bool DeviceInfo::supports(FeatureSet feature_set) const
+{
+    int version = majorVersion() * 10 + minorVersion();
+    return version >= feature_set;
+}
+
+
+}} // namespace cv { namespace cuda {
+
+//===================================================================================
+// Mat
+//===================================================================================
+
+namespace cv {
+
+inline
+Mat::Mat(const cuda::GpuMat& m)
+    : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
+{
+    m.download(*this);
+}
+
+}
+
+//! @endcond
+
+#endif // OPENCV_CORE_CUDAINL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/block.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/block.hpp
@@ -0,0 +1,211 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP
+#define OPENCV_CUDA_DEVICE_BLOCK_HPP
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    struct Block
+    {
+        static __device__ __forceinline__ unsigned int id()
+        {
+            return blockIdx.x;
+        }
+
+        static __device__ __forceinline__ unsigned int stride()
+        {
+            return blockDim.x * blockDim.y * blockDim.z;
+        }
+
+        static __device__ __forceinline__ void sync()
+        {
+            __syncthreads();
+        }
+
+        static __device__ __forceinline__ int flattenedThreadId()
+        {
+            return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
+        }
+
+        template<typename It, typename T>
+        static __device__ __forceinline__ void fill(It beg, It end, const T& value)
+        {
+            int STRIDE = stride();
+            It t = beg + flattenedThreadId();
+
+            for(; t < end; t += STRIDE)
+                *t = value;
+        }
+
+        template<typename OutIt, typename T>
+        static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
+        {
+            int STRIDE = stride();
+            int tid = flattenedThreadId();
+            value += tid;
+
+            for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
+                *t = value;
+        }
+
+        template<typename InIt, typename OutIt>
+        static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
+        {
+            int STRIDE = stride();
+            InIt  t = beg + flattenedThreadId();
+            OutIt o = out + (t - beg);
+
+            for(; t < end; t += STRIDE, o += STRIDE)
+                *o = *t;
+        }
+
+        template<typename InIt, typename OutIt, class UnOp>
+        static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op)
+        {
+            int STRIDE = stride();
+            InIt  t = beg + flattenedThreadId();
+            OutIt o = out + (t - beg);
+
+            for(; t < end; t += STRIDE, o += STRIDE)
+                *o = op(*t);
+        }
+
+        template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
+        static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
+        {
+            int STRIDE = stride();
+            InIt1 t1 = beg1 + flattenedThreadId();
+            InIt2 t2 = beg2 + flattenedThreadId();
+            OutIt o  = out + (t1 - beg1);
+
+            for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
+                *o = op(*t1, *t2);
+        }
+
+        template<int CTA_SIZE, typename T, class BinOp>
+        static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
+        {
+            int tid = flattenedThreadId();
+            T val =  buffer[tid];
+
+            if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
+            if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
+            if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
+            if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }
+
+            if (tid < 32)
+            {
+                if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
+                if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
+                if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
+                if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
+                if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
+                if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
+            }
+        }
+
+        template<int CTA_SIZE, typename T, class BinOp>
+        static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
+        {
+            int tid = flattenedThreadId();
+            T val =  buffer[tid] = init;
+            __syncthreads();
+
+            if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
+            if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
+            if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
+            if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }
+
+            if (tid < 32)
+            {
+                if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
+                if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
+                if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
+                if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
+                if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
+                if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
+            }
+            __syncthreads();
+            return buffer[0];
+        }
+
+        template <typename T, class BinOp>
+        static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
+        {
+            int ftid = flattenedThreadId();
+            int sft = stride();
+
+            if (sft < n)
+            {
+                for (unsigned int i = sft + ftid; i < n; i += sft)
+                    data[ftid] = op(data[ftid], data[i]);
+
+                __syncthreads();
+
+                n = sft;
+            }
+
+            while (n > 1)
+            {
+                unsigned int half = n/2;
+
+                if (ftid < half)
+                    data[ftid] = op(data[ftid], data[n - ftid - 1]);
+
+                __syncthreads();
+
+                n = n - half;
+            }
+        }
+    };
+}}}
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */
--- a/3rdparty/opencv/inc/opencv2/core/cuda/border_interpolate.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/border_interpolate.hpp
@@ -0,0 +1,722 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP
+#define OPENCV_CUDA_BORDER_INTERPOLATE_HPP
+
+#include "saturate_cast.hpp"
+#include "vec_traits.hpp"
+#include "vec_math.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    //////////////////////////////////////////////////////////////
+    // BrdConstant
+
+    template <typename D> struct BrdRowConstant
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return x >= 0 ? saturate_cast<D>(data[x]) : val;
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return x < width ? saturate_cast<D>(data[x]) : val;
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
+        }
+
+        int width;
+        D val;
+    };
+
+    template <typename D> struct BrdColConstant
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
+        }
+
+        int height;
+        D val;
+    };
+
+    template <typename D> struct BrdConstant
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
+        {
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
+        }
+
+        int height;
+        int width;
+        D val;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BrdReplicate
+
+    template <typename D> struct BrdRowReplicate
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return ::max(x, 0);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::min(x, last_col);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_low(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_high(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col(x)]);
+        }
+
+        int last_col;
+    };
+
+    template <typename D> struct BrdColReplicate
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return ::max(y, 0);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::min(y, last_row);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
+        }
+
+        int last_row;
+    };
+
+    template <typename D> struct BrdReplicate
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return ::max(y, 0);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::min(y, last_row);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return ::max(x, 0);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::min(x, last_col);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
+        }
+
+        int last_row;
+        int last_col;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BrdReflect101
+
+    template <typename D> struct BrdRowReflect101
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return ::abs(x) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_low(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_high(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col(x)]);
+        }
+
+        int last_col;
+    };
+
+    template <typename D> struct BrdColReflect101
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return ::abs(y) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
+        }
+
+        int last_row;
+    };
+
+    template <typename D> struct BrdReflect101
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return ::abs(y) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return ::abs(x) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
+        }
+
+        int last_row;
+        int last_col;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BrdReflect
+
+    template <typename D> struct BrdRowReflect
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return (::abs(x) - (x < 0)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_high(::abs(x) - (x < 0));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_low(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_high(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col(x)]);
+        }
+
+        int last_col;
+    };
+
+    template <typename D> struct BrdColReflect
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return (::abs(y) - (y < 0)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_high(::abs(y) - (y < 0));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
+        }
+
+        int last_row;
+    };
+
+    template <typename D> struct BrdReflect
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return (::abs(y) - (y < 0)) % (last_row + 1);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_low(idx_row_high(y));
+        }
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return (::abs(x) - (x < 0)) % (last_col + 1);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return (last_col - ::abs(last_col - x) + (x > last_col));
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_low(idx_col_high(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
+        }
+
+        int last_row;
+        int last_col;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BrdWrap
+
+    template <typename D> struct BrdRowWrap
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return (x < width) * x + (x >= width) * (x % width);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_high(idx_col_low(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_low(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col_high(x)]);
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
+        {
+            return saturate_cast<D>(data[idx_col(x)]);
+        }
+
+        int width;
+    };
+
+    template <typename D> struct BrdColWrap
+    {
+        typedef D result_type;
+
+        explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
+        template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return (y < height) * y + (y >= height) * (y % height);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_high(idx_row_low(y));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
+        }
+
+        int height;
+    };
+
+    template <typename D> struct BrdWrap
+    {
+        typedef D result_type;
+
+        __host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
+            height(height_), width(width_)
+        {
+        }
+        template <typename U>
+        __host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
+            height(height_), width(width_)
+        {
+        }
+
+        __device__ __forceinline__ int idx_row_low(int y) const
+        {
+            return (y >= 0) ? y : (y - ((y - height + 1) / height) * height);
+        }
+
+        __device__ __forceinline__ int idx_row_high(int y) const
+        {
+            return (y < height) ? y : (y % height);
+        }
+
+        __device__ __forceinline__ int idx_row(int y) const
+        {
+            return idx_row_high(idx_row_low(y));
+        }
+
+        __device__ __forceinline__ int idx_col_low(int x) const
+        {
+            return (x >= 0) ? x : (x - ((x - width + 1) / width) * width);
+        }
+
+        __device__ __forceinline__ int idx_col_high(int x) const
+        {
+            return (x < width) ? x : (x % width);
+        }
+
+        __device__ __forceinline__ int idx_col(int x) const
+        {
+            return idx_col_high(idx_col_low(x));
+        }
+
+        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
+        {
+            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
+        }
+
+        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
+        {
+            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
+        }
+
+        int height;
+        int width;
+    };
+
+    //////////////////////////////////////////////////////////////
+    // BorderReader
+
+    template <typename Ptr2D, typename B> struct BorderReader
+    {
+        typedef typename B::result_type elem_type;
+        typedef typename Ptr2D::index_type index_type;
+
+        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
+
+        __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
+        {
+            return b.at(y, x, ptr);
+        }
+
+        Ptr2D ptr;
+        B b;
+    };
+
+    // under win32 there is some bug with templated types that passed as kernel parameters
+    // with this specialization all works fine
+    template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
+    {
+        typedef typename BrdConstant<D>::result_type elem_type;
+        typedef typename Ptr2D::index_type index_type;
+
+        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
+            src(src_), height(b.height), width(b.width), val(b.val)
+        {
+        }
+
+        __device__ __forceinline__ D operator ()(index_type y, index_type x) const
+        {
+            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
+        }
+
+        Ptr2D src;
+        int height;
+        int width;
+        D val;
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/color.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/color.hpp
@@ -0,0 +1,309 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_COLOR_HPP
+#define OPENCV_CUDA_COLOR_HPP
+
+#include "detail/color_detail.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    // All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
+    // template <typename T> class ColorSpace1_to_ColorSpace2_traits
+    // {
+    //     typedef ... functor_type;
+    //     static __host__ __device__ functor_type create_functor();
+    // };
+
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
+    OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
+
+    #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
+    OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
+
+    #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
+    OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
+    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)
+
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)
+
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)
+
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
+    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS
+
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)
+
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
+    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
+
+    #undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_COLOR_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/common.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/common.hpp
@@ -0,0 +1,123 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_COMMON_HPP
+#define OPENCV_CUDA_COMMON_HPP
+
+#include <cuda_runtime.h>
+#include "opencv2/core/cuda_types.hpp"
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/base.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+#ifndef CV_PI_F
+    #ifndef CV_PI
+        #define CV_PI_F 3.14159265f
+    #else
+        #define CV_PI_F ((float)CV_PI)
+    #endif
+#endif
+
+namespace cv { namespace cuda {
+    static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
+    {
+        if (cudaSuccess != err)
+            cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
+    }
+}}
+
+#ifndef cudaSafeCall
+    #define cudaSafeCall(expr)  cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
+#endif
+
+namespace cv { namespace cuda
+{
+    template <typename T> static inline bool isAligned(const T* ptr, size_t size)
+    {
+        return reinterpret_cast<size_t>(ptr) % size == 0;
+    }
+
+    static inline bool isAligned(size_t step, size_t size)
+    {
+        return step % size == 0;
+    }
+}}
+
+namespace cv { namespace cuda
+{
+    namespace device
+    {
+        __host__ __device__ __forceinline__ int divUp(int total, int grain)
+        {
+            return (total + grain - 1) / grain;
+        }
+
+        template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
+        {
+            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
+            cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
+        }
+
+        template<class T> inline void createTextureObjectPitch2D(cudaTextureObject_t* tex, PtrStepSz<T>& img, const cudaTextureDesc& texDesc)
+        {
+            cudaResourceDesc resDesc;
+            memset(&resDesc, 0, sizeof(resDesc));
+            resDesc.resType = cudaResourceTypePitch2D;
+            resDesc.res.pitch2D.devPtr = static_cast<void*>(img.ptr());
+            resDesc.res.pitch2D.height = img.rows;
+            resDesc.res.pitch2D.width = img.cols;
+            resDesc.res.pitch2D.pitchInBytes = img.step;
+            resDesc.res.pitch2D.desc = cudaCreateChannelDesc<T>();
+
+            cudaSafeCall( cudaCreateTextureObject(tex, &resDesc, &texDesc, NULL) );
+        }
+    }
+}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_COMMON_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/datamov_utils.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/datamov_utils.hpp
@@ -0,0 +1,113 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
+#define OPENCV_CUDA_DATAMOV_UTILS_HPP
+
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
+
+        // for Fermi memory space is detected automatically
+        template <typename T> struct ForceGlob
+        {
+            __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val)  { val = ptr[offset];  }
+        };
+
+    #else // __CUDA_ARCH__ >= 200
+
+        #if defined(_WIN64) || defined(__LP64__)
+            // 64-bit register modifier for inlined asm
+            #define OPENCV_CUDA_ASM_PTR "l"
+        #else
+            // 32-bit register modifier for inlined asm
+            #define OPENCV_CUDA_ASM_PTR "r"
+        #endif
+
+        template<class T> struct ForceGlob;
+
+        #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
+            template <> struct ForceGlob<base_type> \
+            { \
+                __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
+                { \
+                    asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
+                } \
+            };
+
+        #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
+            template <> struct ForceGlob<base_type> \
+            { \
+                __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
+                { \
+                    asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
+                } \
+            };
+
+            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar,  u8)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar,  s8)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char,   b8)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (ushort, u16, h)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (short,  s16, h)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (uint,   u32, r)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (int,    s32, r)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (float,  f32, f)
+            OPENCV_CUDA_DEFINE_FORCE_GLOB  (double, f64, d)
+
+        #undef OPENCV_CUDA_DEFINE_FORCE_GLOB
+        #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
+        #undef OPENCV_CUDA_ASM_PTR
+
+    #endif // __CUDA_ARCH__ >= 200
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_DATAMOV_UTILS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/detail/color_detail.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/detail/color_detail.hpp
--- a/3rdparty/opencv/inc/opencv2/core/cuda/detail/reduce.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/detail/reduce.hpp
@@ -0,0 +1,365 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP
+#define OPENCV_CUDA_REDUCE_DETAIL_HPP
+
+#include <thrust/tuple.h>
+#include "../warp.hpp"
+#include "../warp_shuffle.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace reduce_detail
+    {
+        template <typename T> struct GetType;
+        template <typename T> struct GetType<T*>
+        {
+            typedef T type;
+        };
+        template <typename T> struct GetType<volatile T*>
+        {
+            typedef T type;
+        };
+        template <typename T> struct GetType<T&>
+        {
+            typedef T type;
+        };
+
+        template <unsigned int I, unsigned int N>
+        struct For
+        {
+            template <class PointerTuple, class ValTuple>
+            static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
+            {
+                thrust::get<I>(smem)[tid] = thrust::get<I>(val);
+
+                For<I + 1, N>::loadToSmem(smem, val, tid);
+            }
+            template <class PointerTuple, class ValTuple>
+            static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
+            {
+                thrust::get<I>(val) = thrust::get<I>(smem)[tid];
+
+                For<I + 1, N>::loadFromSmem(smem, val, tid);
+            }
+
+            template <class PointerTuple, class ValTuple, class OpTuple>
+            static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
+            {
+                typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
+                thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
+
+                For<I + 1, N>::merge(smem, val, tid, delta, op);
+            }
+            template <class ValTuple, class OpTuple>
+            static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
+            {
+                typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
+                thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
+
+                For<I + 1, N>::mergeShfl(val, delta, width, op);
+            }
+        };
+        template <unsigned int N>
+        struct For<N, N>
+        {
+            template <class PointerTuple, class ValTuple>
+            static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
+            {
+            }
+            template <class PointerTuple, class ValTuple>
+            static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
+            {
+            }
+
+            template <class PointerTuple, class ValTuple, class OpTuple>
+            static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
+            {
+            }
+            template <class ValTuple, class OpTuple>
+            static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
+            {
+            }
+        };
+
+        template <typename T>
+        __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
+        {
+            smem[tid] = val;
+        }
+        template <typename T>
+        __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
+        {
+            val = smem[tid];
+        }
+        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
+        __device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                                       const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                                       unsigned int tid)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
+        }
+        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
+        __device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                                         const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                                         unsigned int tid)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
+        }
+
+        template <typename T, class Op>
+        __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
+        {
+            T reg = smem[tid + delta];
+            smem[tid] = val = op(val, reg);
+        }
+        template <typename T, class Op>
+        __device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
+        {
+            T reg = shfl_down(val, delta, width);
+            val = op(val, reg);
+        }
+        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+                  class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+        __device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                              const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                              unsigned int tid,
+                                              unsigned int delta,
+                                              const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
+        }
+        template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+                  class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+        __device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                                  unsigned int delta,
+                                                  unsigned int width,
+                                                  const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
+        }
+
+        template <unsigned int N> struct Generic
+        {
+            template <typename Pointer, typename Reference, class Op>
+            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
+            {
+                loadToSmem(smem, val, tid);
+                if (N >= 32)
+                    __syncthreads();
+
+                if (N >= 2048)
+                {
+                    if (tid < 1024)
+                        merge(smem, val, tid, 1024, op);
+
+                    __syncthreads();
+                }
+                if (N >= 1024)
+                {
+                    if (tid < 512)
+                        merge(smem, val, tid, 512, op);
+
+                    __syncthreads();
+                }
+                if (N >= 512)
+                {
+                    if (tid < 256)
+                        merge(smem, val, tid, 256, op);
+
+                    __syncthreads();
+                }
+                if (N >= 256)
+                {
+                    if (tid < 128)
+                        merge(smem, val, tid, 128, op);
+
+                    __syncthreads();
+                }
+                if (N >= 128)
+                {
+                    if (tid < 64)
+                        merge(smem, val, tid, 64, op);
+
+                    __syncthreads();
+                }
+                if (N >= 64)
+                {
+                    if (tid < 32)
+                        merge(smem, val, tid, 32, op);
+                }
+
+                if (tid < 16)
+                {
+                    merge(smem, val, tid, 16, op);
+                    merge(smem, val, tid, 8, op);
+                    merge(smem, val, tid, 4, op);
+                    merge(smem, val, tid, 2, op);
+                    merge(smem, val, tid, 1, op);
+                }
+            }
+        };
+
+        template <unsigned int I, typename Pointer, typename Reference, class Op>
+        struct Unroll
+        {
+            static __device__ void loopShfl(Reference val, Op op, unsigned int N)
+            {
+                mergeShfl(val, I, N, op);
+                Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
+            }
+            static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
+            {
+                merge(smem, val, tid, I, op);
+                Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+            }
+        };
+        template <typename Pointer, typename Reference, class Op>
+        struct Unroll<0, Pointer, Reference, Op>
+        {
+            static __device__ void loopShfl(Reference, Op, unsigned int)
+            {
+            }
+            static __device__ void loop(Pointer, Reference, unsigned int, Op)
+            {
+            }
+        };
+
+        template <unsigned int N> struct WarpOptimized
+        {
+            template <typename Pointer, typename Reference, class Op>
+            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
+            {
+            #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+                CV_UNUSED(smem);
+                CV_UNUSED(tid);
+
+                Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
+            #else
+                loadToSmem(smem, val, tid);
+
+                if (tid < N / 2)
+                    Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+            #endif
+            }
+        };
+
+        template <unsigned int N> struct GenericOptimized32
+        {
+            enum { M = N / 32 };
+
+            template <typename Pointer, typename Reference, class Op>
+            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
+            {
+                const unsigned int laneId = Warp::laneId();
+
+            #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+                Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
+
+                if (laneId == 0)
+                    loadToSmem(smem, val, tid / 32);
+            #else
+                loadToSmem(smem, val, tid);
+
+                if (laneId < 16)
+                    Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
+
+                __syncthreads();
+
+                if (laneId == 0)
+                    loadToSmem(smem, val, tid / 32);
+            #endif
+
+                __syncthreads();
+
+                loadFromSmem(smem, val, tid);
+
+                if (tid < 32)
+                {
+                #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+                    Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
+                #else
+                    Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
+                #endif
+                }
+            }
+        };
+
+        template <bool val, class T1, class T2> struct StaticIf;
+        template <class T1, class T2> struct StaticIf<true, T1, T2>
+        {
+            typedef T1 type;
+        };
+        template <class T1, class T2> struct StaticIf<false, T1, T2>
+        {
+            typedef T2 type;
+        };
+
+        template <unsigned int N> struct IsPowerOf2
+        {
+            enum { value = ((N != 0) && !(N & (N - 1))) };
+        };
+
+        template <unsigned int N> struct Dispatcher
+        {
+            typedef typename StaticIf<
+                (N <= 32) && IsPowerOf2<N>::value,
+                WarpOptimized<N>,
+                typename StaticIf<
+                    (N <= 1024) && IsPowerOf2<N>::value,
+                    GenericOptimized32<N>,
+                    Generic<N>
+                >::type
+            >::type reductor;
+        };
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_REDUCE_DETAIL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/detail/reduce_key_val.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/detail/reduce_key_val.hpp
@@ -0,0 +1,502 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
+#define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
+
+#include <thrust/tuple.h>
+#include "../warp.hpp"
+#include "../warp_shuffle.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace reduce_key_val_detail
+    {
+        template <typename T> struct GetType;
+        template <typename T> struct GetType<T*>
+        {
+            typedef T type;
+        };
+        template <typename T> struct GetType<volatile T*>
+        {
+            typedef T type;
+        };
+        template <typename T> struct GetType<T&>
+        {
+            typedef T type;
+        };
+
+        template <unsigned int I, unsigned int N>
+        struct For
+        {
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
+            {
+                thrust::get<I>(smem)[tid] = thrust::get<I>(data);
+
+                For<I + 1, N>::loadToSmem(smem, data, tid);
+            }
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
+            {
+                thrust::get<I>(data) = thrust::get<I>(smem)[tid];
+
+                For<I + 1, N>::loadFromSmem(smem, data, tid);
+            }
+
+            template <class ReferenceTuple>
+            static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
+            {
+                thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
+
+                For<I + 1, N>::copyShfl(val, delta, width);
+            }
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
+            {
+                thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
+
+                For<I + 1, N>::copy(svals, val, tid, delta);
+            }
+
+            template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
+            static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
+            {
+                typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
+
+                if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
+                {
+                    thrust::get<I>(key) = reg;
+                    thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
+                }
+
+                For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
+            }
+            template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
+            static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
+                                         const ValPointerTuple& svals, const ValReferenceTuple& val,
+                                         const CmpTuple& cmp,
+                                         unsigned int tid, unsigned int delta)
+            {
+                typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
+
+                if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
+                {
+                    thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
+                    thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
+                }
+
+                For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
+            }
+        };
+        template <unsigned int N>
+        struct For<N, N>
+        {
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
+            {
+            }
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
+            {
+            }
+
+            template <class ReferenceTuple>
+            static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
+            {
+            }
+            template <class PointerTuple, class ReferenceTuple>
+            static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
+            {
+            }
+
+            template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
+            static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
+            {
+            }
+            template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
+            static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
+                                         const ValPointerTuple&, const ValReferenceTuple&,
+                                         const CmpTuple&,
+                                         unsigned int, unsigned int)
+            {
+            }
+        };
+
+        //////////////////////////////////////////////////////
+        // loadToSmem
+
+        template <typename T>
+        __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
+        {
+            smem[tid] = data;
+        }
+        template <typename T>
+        __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
+        {
+            data = smem[tid];
+        }
+        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+        __device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
+                                                   const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
+                                                   unsigned int tid)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
+        }
+        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+        __device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
+                                                     const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
+                                                     unsigned int tid)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
+        }
+
+        //////////////////////////////////////////////////////
+        // copyVals
+
+        template <typename V>
+        __device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
+        {
+            val = shfl_down(val, delta, width);
+        }
+        template <typename V>
+        __device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
+        {
+            svals[tid] = val = svals[tid + delta];
+        }
+        template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+        __device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                     unsigned int delta,
+                                                     int width)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
+        }
+        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
+        __device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                 unsigned int tid, unsigned int delta)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
+        }
+
+        //////////////////////////////////////////////////////
+        // merge
+
+        template <typename K, typename V, class Cmp>
+        __device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
+        {
+            K reg = shfl_down(key, delta, width);
+
+            if (cmp(reg, key))
+            {
+                key = reg;
+                copyValsShfl(val, delta, width);
+            }
+        }
+        template <typename K, typename V, class Cmp>
+        __device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
+        {
+            K reg = skeys[tid + delta];
+
+            if (cmp(reg, key))
+            {
+                skeys[tid] = key = reg;
+                copyVals(svals, val, tid, delta);
+            }
+        }
+        template <typename K,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+                  class Cmp>
+        __device__ __forceinline__ void mergeShfl(K& key,
+                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                  const Cmp& cmp,
+                                                  unsigned int delta, int width)
+        {
+            K reg = shfl_down(key, delta, width);
+
+            if (cmp(reg, key))
+            {
+                key = reg;
+                copyValsShfl(val, delta, width);
+            }
+        }
+        template <typename K,
+                  typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+                  class Cmp>
+        __device__ __forceinline__ void merge(volatile K* skeys, K& key,
+                                              const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                              const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                              const Cmp& cmp, unsigned int tid, unsigned int delta)
+        {
+            K reg = skeys[tid + delta];
+
+            if (cmp(reg, key))
+            {
+                skeys[tid] = key = reg;
+                copyVals(svals, val, tid, delta);
+            }
+        }
+        template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+                  class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+        __device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                  const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
+                                                  unsigned int delta, int width)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
+        }
+        template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
+                  typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+                  typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+                  class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+        __device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
+                                              const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                              const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                              const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                              const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
+                                              unsigned int tid, unsigned int delta)
+        {
+            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
+        }
+
+        //////////////////////////////////////////////////////
+        // Generic
+
+        template <unsigned int N> struct Generic
+        {
+            template <class KP, class KR, class VP, class VR, class Cmp>
+            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
+            {
+                loadToSmem(skeys, key, tid);
+                loadValsToSmem(svals, val, tid);
+                if (N >= 32)
+                    __syncthreads();
+
+                if (N >= 2048)
+                {
+                    if (tid < 1024)
+                        merge(skeys, key, svals, val, cmp, tid, 1024);
+
+                    __syncthreads();
+                }
+                if (N >= 1024)
+                {
+                    if (tid < 512)
+                        merge(skeys, key, svals, val, cmp, tid, 512);
+
+                    __syncthreads();
+                }
+                if (N >= 512)
+                {
+                    if (tid < 256)
+                        merge(skeys, key, svals, val, cmp, tid, 256);
+
+                    __syncthreads();
+                }
+                if (N >= 256)
+                {
+                    if (tid < 128)
+                        merge(skeys, key, svals, val, cmp, tid, 128);
+
+                    __syncthreads();
+                }
+                if (N >= 128)
+                {
+                    if (tid < 64)
+                        merge(skeys, key, svals, val, cmp, tid, 64);
+
+                    __syncthreads();
+                }
+                if (N >= 64)
+                {
+                    if (tid < 32)
+                        merge(skeys, key, svals, val, cmp, tid, 32);
+                }
+
+                if (tid < 16)
+                {
+                    merge(skeys, key, svals, val, cmp, tid, 16);
+                    merge(skeys, key, svals, val, cmp, tid, 8);
+                    merge(skeys, key, svals, val, cmp, tid, 4);
+                    merge(skeys, key, svals, val, cmp, tid, 2);
+                    merge(skeys, key, svals, val, cmp, tid, 1);
+                }
+            }
+        };
+
+        template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
+        struct Unroll
+        {
+            static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
+            {
+                mergeShfl(key, val, cmp, I, N);
+                Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
+            }
+            static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
+            {
+                merge(skeys, key, svals, val, cmp, tid, I);
+                Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+            }
+        };
+        template <class KP, class KR, class VP, class VR, class Cmp>
+        struct Unroll<0, KP, KR, VP, VR, Cmp>
+        {
+            static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
+            {
+            }
+            static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
+            {
+            }
+        };
+
+        template <unsigned int N> struct WarpOptimized
+        {
+            template <class KP, class KR, class VP, class VR, class Cmp>
+            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
+            {
+            #if 0 // __CUDA_ARCH__ >= 300
+                CV_UNUSED(skeys);
+                CV_UNUSED(svals);
+                CV_UNUSED(tid);
+
+                Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
+            #else
+                loadToSmem(skeys, key, tid);
+                loadToSmem(svals, val, tid);
+
+                if (tid < N / 2)
+                    Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+            #endif
+            }
+        };
+
+        template <unsigned int N> struct GenericOptimized32
+        {
+            enum { M = N / 32 };
+
+            template <class KP, class KR, class VP, class VR, class Cmp>
+            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
+            {
+                const unsigned int laneId = Warp::laneId();
+
+            #if 0 // __CUDA_ARCH__ >= 300
+                Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
+
+                if (laneId == 0)
+                {
+                    loadToSmem(skeys, key, tid / 32);
+                    loadToSmem(svals, val, tid / 32);
+                }
+            #else
+                loadToSmem(skeys, key, tid);
+                loadToSmem(svals, val, tid);
+
+                if (laneId < 16)
+                    Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+
+                __syncthreads();
+
+                if (laneId == 0)
+                {
+                    loadToSmem(skeys, key, tid / 32);
+                    loadToSmem(svals, val, tid / 32);
+                }
+            #endif
+
+                __syncthreads();
+
+                loadFromSmem(skeys, key, tid);
+
+                if (tid < 32)
+                {
+                #if 0 // __CUDA_ARCH__ >= 300
+                    loadFromSmem(svals, val, tid);
+
+                    Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
+                #else
+                    Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
+                #endif
+                }
+            }
+        };
+
+        template <bool val, class T1, class T2> struct StaticIf;
+        template <class T1, class T2> struct StaticIf<true, T1, T2>
+        {
+            typedef T1 type;
+        };
+        template <class T1, class T2> struct StaticIf<false, T1, T2>
+        {
+            typedef T2 type;
+        };
+
+        template <unsigned int N> struct IsPowerOf2
+        {
+            enum { value = ((N != 0) && !(N & (N - 1))) };
+        };
+
+        template <unsigned int N> struct Dispatcher
+        {
+            typedef typename StaticIf<
+                (N <= 32) && IsPowerOf2<N>::value,
+                WarpOptimized<N>,
+                typename StaticIf<
+                    (N <= 1024) && IsPowerOf2<N>::value,
+                    GenericOptimized32<N>,
+                    Generic<N>
+                >::type
+            >::type reductor;
+        };
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/detail/transform_detail.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/detail/transform_detail.hpp
@@ -0,0 +1,392 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
+#define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
+
+#include "../common.hpp"
+#include "../vec_traits.hpp"
+#include "../functional.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace transform_detail
+    {
+        //! Read Write Traits
+
+        template <typename T, typename D, int shift> struct UnaryReadWriteTraits
+        {
+            typedef typename TypeVec<T, shift>::vec_type read_type;
+            typedef typename TypeVec<D, shift>::vec_type write_type;
+        };
+
+        template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
+        {
+            typedef typename TypeVec<T1, shift>::vec_type read_type1;
+            typedef typename TypeVec<T2, shift>::vec_type read_type2;
+            typedef typename TypeVec<D, shift>::vec_type write_type;
+        };
+
+        //! Transform kernels
+
+        template <int shift> struct OpUnroller;
+        template <> struct OpUnroller<1>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src.x);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src1.x, src2.x);
+            }
+        };
+        template <> struct OpUnroller<2>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src.y);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src1.x, src2.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src1.y, src2.y);
+            }
+        };
+        template <> struct OpUnroller<3>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src.y);
+                if (mask(y, x_shifted + 2))
+                    dst.z = op(src.z);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src1.x, src2.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src1.y, src2.y);
+                if (mask(y, x_shifted + 2))
+                    dst.z = op(src1.z, src2.z);
+            }
+        };
+        template <> struct OpUnroller<4>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src.y);
+                if (mask(y, x_shifted + 2))
+                    dst.z = op(src.z);
+                if (mask(y, x_shifted + 3))
+                    dst.w = op(src.w);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.x = op(src1.x, src2.x);
+                if (mask(y, x_shifted + 1))
+                    dst.y = op(src1.y, src2.y);
+                if (mask(y, x_shifted + 2))
+                    dst.z = op(src1.z, src2.z);
+                if (mask(y, x_shifted + 3))
+                    dst.w = op(src1.w, src2.w);
+            }
+        };
+        template <> struct OpUnroller<8>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.a0 = op(src.a0);
+                if (mask(y, x_shifted + 1))
+                    dst.a1 = op(src.a1);
+                if (mask(y, x_shifted + 2))
+                    dst.a2 = op(src.a2);
+                if (mask(y, x_shifted + 3))
+                    dst.a3 = op(src.a3);
+                if (mask(y, x_shifted + 4))
+                    dst.a4 = op(src.a4);
+                if (mask(y, x_shifted + 5))
+                    dst.a5 = op(src.a5);
+                if (mask(y, x_shifted + 6))
+                    dst.a6 = op(src.a6);
+                if (mask(y, x_shifted + 7))
+                    dst.a7 = op(src.a7);
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
+            {
+                if (mask(y, x_shifted))
+                    dst.a0 = op(src1.a0, src2.a0);
+                if (mask(y, x_shifted + 1))
+                    dst.a1 = op(src1.a1, src2.a1);
+                if (mask(y, x_shifted + 2))
+                    dst.a2 = op(src1.a2, src2.a2);
+                if (mask(y, x_shifted + 3))
+                    dst.a3 = op(src1.a3, src2.a3);
+                if (mask(y, x_shifted + 4))
+                    dst.a4 = op(src1.a4, src2.a4);
+                if (mask(y, x_shifted + 5))
+                    dst.a5 = op(src1.a5, src2.a5);
+                if (mask(y, x_shifted + 6))
+                    dst.a6 = op(src1.a6, src2.a6);
+                if (mask(y, x_shifted + 7))
+                    dst.a7 = op(src1.a7, src2.a7);
+            }
+        };
+
+        template <typename T, typename D, typename UnOp, typename Mask>
+        static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
+        {
+            typedef TransformFunctorTraits<UnOp> ft;
+            typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
+            typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
+
+            const int x = threadIdx.x + blockIdx.x * blockDim.x;
+            const int y = threadIdx.y + blockIdx.y * blockDim.y;
+            const int x_shifted = x * ft::smart_shift;
+
+            if (y < src_.rows)
+            {
+                const T* src = src_.ptr(y);
+                D* dst = dst_.ptr(y);
+
+                if (x_shifted + ft::smart_shift - 1 < src_.cols)
+                {
+                    const read_type src_n_el = ((const read_type*)src)[x];
+                    OpUnroller<ft::smart_shift>::unroll(src_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
+                }
+                else
+                {
+                    for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
+                    {
+                        if (mask(y, real_x))
+                            dst[real_x] = op(src[real_x]);
+                    }
+                }
+            }
+        }
+
+        template <typename T, typename D, typename UnOp, typename Mask>
+        __global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
+        {
+            const int x = blockDim.x * blockIdx.x + threadIdx.x;
+            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+            if (x < src.cols && y < src.rows && mask(y, x))
+            {
+                dst.ptr(y)[x] = op(src.ptr(y)[x]);
+            }
+        }
+
+        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+        static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
+            const Mask mask, const BinOp op)
+        {
+            typedef TransformFunctorTraits<BinOp> ft;
+            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
+            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
+            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
+
+            const int x = threadIdx.x + blockIdx.x * blockDim.x;
+            const int y = threadIdx.y + blockIdx.y * blockDim.y;
+            const int x_shifted = x * ft::smart_shift;
+
+            if (y < src1_.rows)
+            {
+                const T1* src1 = src1_.ptr(y);
+                const T2* src2 = src2_.ptr(y);
+                D* dst = dst_.ptr(y);
+
+                if (x_shifted + ft::smart_shift - 1 < src1_.cols)
+                {
+                    const read_type1 src1_n_el = ((const read_type1*)src1)[x];
+                    const read_type2 src2_n_el = ((const read_type2*)src2)[x];
+
+                    OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, ((write_type*)dst)[x], mask, op, x_shifted, y);
+                }
+                else
+                {
+                    for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
+                    {
+                        if (mask(y, real_x))
+                            dst[real_x] = op(src1[real_x], src2[real_x]);
+                    }
+                }
+            }
+        }
+
+        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+        static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
+            const Mask mask, const BinOp op)
+        {
+            const int x = blockDim.x * blockIdx.x + threadIdx.x;
+            const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+            if (x < src1.cols && y < src1.rows && mask(y, x))
+            {
+                const T1 src1_data = src1.ptr(y)[x];
+                const T2 src2_data = src2.ptr(y)[x];
+                dst.ptr(y)[x] = op(src1_data, src2_data);
+            }
+        }
+
+        template <bool UseSmart> struct TransformDispatcher;
+        template<> struct TransformDispatcher<false>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
+            {
+                typedef TransformFunctorTraits<UnOp> ft;
+
+                const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
+                const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
+
+                transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
+                cudaSafeCall( cudaGetLastError() );
+
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
+            {
+                typedef TransformFunctorTraits<BinOp> ft;
+
+                const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
+                const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
+
+                transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
+                cudaSafeCall( cudaGetLastError() );
+
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+        };
+        template<> struct TransformDispatcher<true>
+        {
+            template <typename T, typename D, typename UnOp, typename Mask>
+            static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
+            {
+                typedef TransformFunctorTraits<UnOp> ft;
+
+                CV_StaticAssert(ft::smart_shift != 1, "");
+
+                if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
+                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
+                {
+                    TransformDispatcher<false>::call(src, dst, op, mask, stream);
+                    return;
+                }
+
+                const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
+                const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
+
+                transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
+                cudaSafeCall( cudaGetLastError() );
+
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+
+            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+            static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
+            {
+                typedef TransformFunctorTraits<BinOp> ft;
+
+                CV_StaticAssert(ft::smart_shift != 1, "");
+
+                if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
+                    !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
+                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
+                {
+                    TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
+                    return;
+                }
+
+                const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
+                const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
+
+                transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
+                cudaSafeCall( cudaGetLastError() );
+
+                if (stream == 0)
+                    cudaSafeCall( cudaDeviceSynchronize() );
+            }
+        };
+    } // namespace transform_detail
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/detail/type_traits_detail.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/detail/type_traits_detail.hpp
@@ -0,0 +1,191 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
+#define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
+
+#include "../common.hpp"
+#include "../vec_traits.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace type_traits_detail
+    {
+        template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
+        template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
+
+        template <typename T> struct IsSignedIntergral { enum {value = 0}; };
+        template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<short> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<int> { enum {value = 1}; };
+        template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
+
+        template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
+        template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
+        template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
+
+        template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
+        template <> struct IsIntegral<char> { enum {value = 1}; };
+        template <> struct IsIntegral<bool> { enum {value = 1}; };
+
+        template <typename T> struct IsFloat { enum {value = 0}; };
+        template <> struct IsFloat<float> { enum {value = 1}; };
+        template <> struct IsFloat<double> { enum {value = 1}; };
+
+        template <typename T> struct IsVec { enum {value = 0}; };
+        template <> struct IsVec<uchar1> { enum {value = 1}; };
+        template <> struct IsVec<uchar2> { enum {value = 1}; };
+        template <> struct IsVec<uchar3> { enum {value = 1}; };
+        template <> struct IsVec<uchar4> { enum {value = 1}; };
+        template <> struct IsVec<uchar8> { enum {value = 1}; };
+        template <> struct IsVec<char1> { enum {value = 1}; };
+        template <> struct IsVec<char2> { enum {value = 1}; };
+        template <> struct IsVec<char3> { enum {value = 1}; };
+        template <> struct IsVec<char4> { enum {value = 1}; };
+        template <> struct IsVec<char8> { enum {value = 1}; };
+        template <> struct IsVec<ushort1> { enum {value = 1}; };
+        template <> struct IsVec<ushort2> { enum {value = 1}; };
+        template <> struct IsVec<ushort3> { enum {value = 1}; };
+        template <> struct IsVec<ushort4> { enum {value = 1}; };
+        template <> struct IsVec<ushort8> { enum {value = 1}; };
+        template <> struct IsVec<short1> { enum {value = 1}; };
+        template <> struct IsVec<short2> { enum {value = 1}; };
+        template <> struct IsVec<short3> { enum {value = 1}; };
+        template <> struct IsVec<short4> { enum {value = 1}; };
+        template <> struct IsVec<short8> { enum {value = 1}; };
+        template <> struct IsVec<uint1> { enum {value = 1}; };
+        template <> struct IsVec<uint2> { enum {value = 1}; };
+        template <> struct IsVec<uint3> { enum {value = 1}; };
+        template <> struct IsVec<uint4> { enum {value = 1}; };
+        template <> struct IsVec<uint8> { enum {value = 1}; };
+        template <> struct IsVec<int1> { enum {value = 1}; };
+        template <> struct IsVec<int2> { enum {value = 1}; };
+        template <> struct IsVec<int3> { enum {value = 1}; };
+        template <> struct IsVec<int4> { enum {value = 1}; };
+        template <> struct IsVec<int8> { enum {value = 1}; };
+        template <> struct IsVec<float1> { enum {value = 1}; };
+        template <> struct IsVec<float2> { enum {value = 1}; };
+        template <> struct IsVec<float3> { enum {value = 1}; };
+        template <> struct IsVec<float4> { enum {value = 1}; };
+        template <> struct IsVec<float8> { enum {value = 1}; };
+        template <> struct IsVec<double1> { enum {value = 1}; };
+        template <> struct IsVec<double2> { enum {value = 1}; };
+        template <> struct IsVec<double3> { enum {value = 1}; };
+        template <> struct IsVec<double4> { enum {value = 1}; };
+        template <> struct IsVec<double8> { enum {value = 1}; };
+
+        template <class U> struct AddParameterType { typedef const U& type; };
+        template <class U> struct AddParameterType<U&> { typedef U& type; };
+        template <> struct AddParameterType<void> { typedef void type; };
+
+        template <class U> struct ReferenceTraits
+        {
+            enum { value = false };
+            typedef U type;
+        };
+        template <class U> struct ReferenceTraits<U&>
+        {
+            enum { value = true };
+            typedef U type;
+        };
+
+        template <class U> struct PointerTraits
+        {
+            enum { value = false };
+            typedef void type;
+        };
+        template <class U> struct PointerTraits<U*>
+        {
+            enum { value = true };
+            typedef U type;
+        };
+        template <class U> struct PointerTraits<U*&>
+        {
+            enum { value = true };
+            typedef U type;
+        };
+
+        template <class U> struct UnConst
+        {
+            typedef U type;
+            enum { value = 0 };
+        };
+        template <class U> struct UnConst<const U>
+        {
+            typedef U type;
+            enum { value = 1 };
+        };
+        template <class U> struct UnConst<const U&>
+        {
+            typedef U& type;
+            enum { value = 1 };
+        };
+
+        template <class U> struct UnVolatile
+        {
+            typedef U type;
+            enum { value = 0 };
+        };
+        template <class U> struct UnVolatile<volatile U>
+        {
+            typedef U type;
+            enum { value = 1 };
+        };
+        template <class U> struct UnVolatile<volatile U&>
+        {
+            typedef U& type;
+            enum { value = 1 };
+        };
+    } // namespace type_traits_detail
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/detail/vec_distance_detail.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/detail/vec_distance_detail.hpp
@@ -0,0 +1,121 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
+#define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
+
+#include "../datamov_utils.hpp"
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    namespace vec_distance_detail
+    {
+        template <int THREAD_DIM, int N> struct UnrollVecDiffCached
+        {
+            template <typename Dist, typename T1, typename T2>
+            static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
+            {
+                if (ind < len)
+                {
+                    T1 val1 = *vecCached++;
+
+                    T2 val2;
+                    ForceGlob<T2>::Load(vecGlob, ind, val2);
+
+                    dist.reduceIter(val1, val2);
+
+                    UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
+                }
+            }
+
+            template <typename Dist, typename T1, typename T2>
+            static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
+            {
+                T1 val1 = *vecCached++;
+
+                T2 val2;
+                ForceGlob<T2>::Load(vecGlob, 0, val2);
+                vecGlob += THREAD_DIM;
+
+                dist.reduceIter(val1, val2);
+
+                UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
+            }
+        };
+        template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
+        {
+            template <typename Dist, typename T1, typename T2>
+            static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
+            {
+            }
+
+            template <typename Dist, typename T1, typename T2>
+            static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
+            {
+            }
+        };
+
+        template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
+        template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
+        {
+            template <typename Dist, typename T1, typename T2>
+            static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
+            {
+                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
+            }
+        };
+        template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
+        {
+            template <typename Dist, typename T1, typename T2>
+            static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
+            {
+                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
+            }
+        };
+    } // namespace vec_distance_detail
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/dynamic_smem.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/dynamic_smem.hpp
@@ -0,0 +1,88 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP
+#define OPENCV_CUDA_DYNAMIC_SMEM_HPP
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template<class T> struct DynamicSharedMem
+    {
+        __device__ __forceinline__ operator T*()
+        {
+            extern __shared__ int __smem[];
+            return (T*)__smem;
+        }
+
+        __device__ __forceinline__ operator const T*() const
+        {
+            extern __shared__ int __smem[];
+            return (T*)__smem;
+        }
+    };
+
+    // specialize for double to avoid unaligned memory access compile errors
+    template<> struct DynamicSharedMem<double>
+    {
+        __device__ __forceinline__ operator double*()
+        {
+            extern __shared__ double __smem_d[];
+            return (double*)__smem_d;
+        }
+
+        __device__ __forceinline__ operator const double*() const
+        {
+            extern __shared__ double __smem_d[];
+            return (double*)__smem_d;
+        }
+    };
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/emulation.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/emulation.hpp
@@ -0,0 +1,269 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_EMULATION_HPP_
+#define OPENCV_CUDA_EMULATION_HPP_
+
+#include "common.hpp"
+#include "warp_reduce.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    struct Emulation
+    {
+
+        static __device__ __forceinline__ int syncthreadsOr(int pred)
+        {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
+                // just campilation stab
+                return 0;
+#else
+                return __syncthreads_or(pred);
+#endif
+        }
+
+        template<int CTA_SIZE>
+        static __forceinline__ __device__ int Ballot(int predicate)
+        {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
+            return __ballot(predicate);
+#else
+            __shared__ volatile int cta_buffer[CTA_SIZE];
+
+            int tid = threadIdx.x;
+            cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
+            return warp_reduce(cta_buffer);
+#endif
+        }
+
+        struct smem
+        {
+            enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
+
+            template<typename T>
+            static __device__ __forceinline__ T atomicInc(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count;
+                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
+                do
+                {
+                    count = *address & TAG_MASK;
+                    count = tag | (count + 1);
+                    *address = count;
+                } while (*address != count);
+
+                return (count & TAG_MASK) - 1;
+#else
+                return ::atomicInc(address, val);
+#endif
+            }
+
+            template<typename T>
+            static __device__ __forceinline__ T atomicAdd(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count;
+                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
+                do
+                {
+                    count = *address & TAG_MASK;
+                    count = tag | (count + val);
+                    *address = count;
+                } while (*address != count);
+
+                return (count & TAG_MASK) - val;
+#else
+                return ::atomicAdd(address, val);
+#endif
+            }
+
+            template<typename T>
+            static __device__ __forceinline__ T atomicMin(T* address, T val)
+            {
+#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
+                T count = ::min(*address, val);
+                do
+                {
+                    *address = count;
+                } while (*address > count);
+
+                return count;
+#else
+                return ::atomicMin(address, val);
+#endif
+            }
+        }; // struct cmem
+
+        struct glob
+        {
+            static __device__ __forceinline__ int atomicAdd(int* address, int val)
+            {
+                return ::atomicAdd(address, val);
+            }
+            static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
+            {
+                return ::atomicAdd(address, val);
+            }
+            static __device__ __forceinline__ float atomicAdd(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 200
+                return ::atomicAdd(address, val);
+            #else
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(val + __int_as_float(assumed)));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #endif
+            }
+            static __device__ __forceinline__ double atomicAdd(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(val + __longlong_as_double(assumed)));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                CV_UNUSED(address);
+                CV_UNUSED(val);
+                return 0.0;
+            #endif
+            }
+
+            static __device__ __forceinline__ int atomicMin(int* address, int val)
+            {
+                return ::atomicMin(address, val);
+            }
+            static __device__ __forceinline__ float atomicMin(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 120
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(::fminf(val, __int_as_float(assumed))));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #else
+                CV_UNUSED(address);
+                CV_UNUSED(val);
+                return 0.0f;
+            #endif
+            }
+            static __device__ __forceinline__ double atomicMin(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                CV_UNUSED(address);
+                CV_UNUSED(val);
+                return 0.0;
+            #endif
+            }
+
+            static __device__ __forceinline__ int atomicMax(int* address, int val)
+            {
+                return ::atomicMax(address, val);
+            }
+            static __device__ __forceinline__ float atomicMax(float* address, float val)
+            {
+            #if __CUDA_ARCH__ >= 120
+                int* address_as_i = (int*) address;
+                int old = *address_as_i, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_i, assumed,
+                        __float_as_int(::fmaxf(val, __int_as_float(assumed))));
+                } while (assumed != old);
+                return __int_as_float(old);
+            #else
+                CV_UNUSED(address);
+                CV_UNUSED(val);
+                return 0.0f;
+            #endif
+            }
+            static __device__ __forceinline__ double atomicMax(double* address, double val)
+            {
+            #if __CUDA_ARCH__ >= 130
+                unsigned long long int* address_as_ull = (unsigned long long int*) address;
+                unsigned long long int old = *address_as_ull, assumed;
+                do {
+                    assumed = old;
+                    old = ::atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
+                } while (assumed != old);
+                return __longlong_as_double(old);
+            #else
+                CV_UNUSED(address);
+                CV_UNUSED(val);
+                return 0.0;
+            #endif
+            }
+        };
+    }; //struct Emulation
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_EMULATION_HPP_ */
--- a/3rdparty/opencv/inc/opencv2/core/cuda/filters.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/filters.hpp
@@ -0,0 +1,293 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_FILTERS_HPP
+#define OPENCV_CUDA_FILTERS_HPP
+
+#include "saturate_cast.hpp"
+#include "vec_traits.hpp"
+#include "vec_math.hpp"
+#include "type_traits.hpp"
+#include "nppdefs.h"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename Ptr2D> struct PointFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
+        : src(src_)
+        {
+            CV_UNUSED(fx);
+            CV_UNUSED(fy);
+        }
+
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            return src(__float2int_rz(y), __float2int_rz(x));
+        }
+
+        Ptr2D src;
+    };
+
+    template <typename Ptr2D> struct LinearFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
+        : src(src_)
+        {
+            CV_UNUSED(fx);
+            CV_UNUSED(fy);
+        }
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+
+            work_type out = VecTraits<work_type>::all(0);
+
+            const int x1 = __float2int_rd(x);
+            const int y1 = __float2int_rd(y);
+            if (x1 <= NPP_MIN_32S || x1 >= NPP_MAX_32S || y1 <= NPP_MIN_32S || y1 >= NPP_MAX_32S)
+            {
+                elem_type src_reg = src(y1, x1);
+                out = out + src_reg * 1.0f;
+                return saturate_cast<elem_type>(out);
+            }
+            const int x2 = x1 + 1;
+            const int y2 = y1 + 1;
+
+            elem_type src_reg = src(y1, x1);
+            out = out + src_reg * ((x2 - x) * (y2 - y));
+
+            src_reg = src(y1, x2);
+            out = out + src_reg * ((x - x1) * (y2 - y));
+
+            src_reg = src(y2, x1);
+            out = out + src_reg * ((x2 - x) * (y - y1));
+
+            src_reg = src(y2, x2);
+            out = out + src_reg * ((x - x1) * (y - y1));
+
+            return saturate_cast<elem_type>(out);
+        }
+
+        Ptr2D src;
+    };
+
+    template <typename Ptr2D> struct CubicFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+        typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+
+        explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
+        : src(src_)
+        {
+            CV_UNUSED(fx);
+            CV_UNUSED(fy);
+        }
+
+        static __device__ __forceinline__ float bicubicCoeff(float x_)
+        {
+            float x = fabsf(x_);
+            if (x <= 1.0f)
+            {
+                return x * x * (1.5f * x - 2.5f) + 1.0f;
+            }
+            else if (x < 2.0f)
+            {
+                return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
+            }
+            else
+            {
+                return 0.0f;
+            }
+        }
+
+        __device__ elem_type operator ()(float y, float x) const
+        {
+            const float xmin = ::ceilf(x - 2.0f);
+            const float xmax = ::floorf(x + 2.0f);
+
+            const float ymin = ::ceilf(y - 2.0f);
+            const float ymax = ::floorf(y + 2.0f);
+
+            work_type sum = VecTraits<work_type>::all(0);
+            float wsum = 0.0f;
+
+            for (float cy = ymin; cy <= ymax; cy += 1.0f)
+            {
+                for (float cx = xmin; cx <= xmax; cx += 1.0f)
+                {
+                    const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
+                    sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
+                    wsum += w;
+                }
+            }
+
+            work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
+
+            return saturate_cast<elem_type>(res);
+        }
+
+        Ptr2D src;
+    };
+    // for integer scaling
+    template <typename Ptr2D> struct IntegerAreaFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
+            : src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
+
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            float fsx1 = x * scale_x;
+            float fsx2 = fsx1 + scale_x;
+
+            int sx1 = __float2int_ru(fsx1);
+            int sx2 = __float2int_rd(fsx2);
+
+            float fsy1 = y * scale_y;
+            float fsy2 = fsy1 + scale_y;
+
+            int sy1 = __float2int_ru(fsy1);
+            int sy2 = __float2int_rd(fsy2);
+
+            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+            work_type out = VecTraits<work_type>::all(0.f);
+
+            for(int dy = sy1; dy < sy2; ++dy)
+                for(int dx = sx1; dx < sx2; ++dx)
+                {
+                    out = out + src(dy, dx) * scale;
+                }
+
+            return saturate_cast<elem_type>(out);
+        }
+
+        Ptr2D src;
+        float scale_x, scale_y ,scale;
+    };
+
+    template <typename Ptr2D> struct AreaFilter
+    {
+        typedef typename Ptr2D::elem_type elem_type;
+        typedef float index_type;
+
+        explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
+            : src(src_), scale_x(scale_x_), scale_y(scale_y_){}
+
+        __device__ __forceinline__ elem_type operator ()(float y, float x) const
+        {
+            float fsx1 = x * scale_x;
+            float fsx2 = fsx1 + scale_x;
+
+            int sx1 = __float2int_ru(fsx1);
+            int sx2 = __float2int_rd(fsx2);
+
+            float fsy1 = y * scale_y;
+            float fsy2 = fsy1 + scale_y;
+
+            int sy1 = __float2int_ru(fsy1);
+            int sy2 = __float2int_rd(fsy2);
+
+            float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
+
+            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
+            work_type out = VecTraits<work_type>::all(0.f);
+
+            for (int dy = sy1; dy < sy2; ++dy)
+            {
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src(dy, dx) * scale;
+
+                if (sx1 > fsx1)
+                    out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
+
+                if (sx2 < fsx2)
+                    out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
+            }
+
+            if (sy1 > fsy1)
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
+
+            if (sy2 < fsy2)
+                for (int dx = sx1; dx < sx2; ++dx)
+                    out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
+
+            if ((sy1 > fsy1) &&  (sx1 > fsx1))
+                out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
+
+            if ((sy1 > fsy1) &&  (sx2 < fsx2))
+                out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
+
+            if ((sy2 < fsy2) &&  (sx2 < fsx2))
+                out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
+
+            if ((sy2 < fsy2) &&  (sx1 > fsx1))
+                out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
+
+            return saturate_cast<elem_type>(out);
+        }
+
+        Ptr2D src;
+        float scale_x, scale_y;
+        int width, haight;
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_FILTERS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/funcattrib.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/funcattrib.hpp
@@ -0,0 +1,79 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
+#define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
+
+#include <cstdio>
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template<class Func>
+    void printFuncAttrib(Func& func)
+    {
+
+        cudaFuncAttributes attrs;
+        cudaFuncGetAttributes(&attrs, func);
+
+        printf("=== Function stats ===\n");
+        printf("Name: \n");
+        printf("sharedSizeBytes    = %d\n", attrs.sharedSizeBytes);
+        printf("constSizeBytes     = %d\n", attrs.constSizeBytes);
+        printf("localSizeBytes     = %d\n", attrs.localSizeBytes);
+        printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
+        printf("numRegs            = %d\n", attrs.numRegs);
+        printf("ptxVersion         = %d\n", attrs.ptxVersion);
+        printf("binaryVersion      = %d\n", attrs.binaryVersion);
+        printf("\n");
+        fflush(stdout);
+    }
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif  /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */
--- a/3rdparty/opencv/inc/opencv2/core/cuda/functional.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/functional.hpp
@@ -0,0 +1,805 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_FUNCTIONAL_HPP
+#define OPENCV_CUDA_FUNCTIONAL_HPP
+
+#include <functional>
+#include "saturate_cast.hpp"
+#include "vec_traits.hpp"
+#include "type_traits.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    // Function Objects
+    template<typename Argument, typename Result> struct unary_function
+    {
+        typedef Argument argument_type;
+        typedef Result result_type;
+    };
+    template<typename Argument1, typename Argument2, typename Result> struct binary_function
+    {
+        typedef Argument1 first_argument_type;
+        typedef Argument2 second_argument_type;
+        typedef Result result_type;
+    };
+
+    // Arithmetic Operations
+    template <typename T> struct plus : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a + b;
+        }
+        __host__ __device__ __forceinline__ plus() {}
+        __host__ __device__ __forceinline__ plus(const plus&) {}
+    };
+
+    template <typename T> struct minus : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a - b;
+        }
+        __host__ __device__ __forceinline__ minus() {}
+        __host__ __device__ __forceinline__ minus(const minus&) {}
+    };
+
+    template <typename T> struct multiplies : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a * b;
+        }
+        __host__ __device__ __forceinline__ multiplies() {}
+        __host__ __device__ __forceinline__ multiplies(const multiplies&) {}
+    };
+
+    template <typename T> struct divides : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a / b;
+        }
+        __host__ __device__ __forceinline__ divides() {}
+        __host__ __device__ __forceinline__ divides(const divides&) {}
+    };
+
+    template <typename T> struct modulus : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a % b;
+        }
+        __host__ __device__ __forceinline__ modulus() {}
+        __host__ __device__ __forceinline__ modulus(const modulus&) {}
+    };
+
+    template <typename T> struct negate : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
+        {
+            return -a;
+        }
+        __host__ __device__ __forceinline__ negate() {}
+        __host__ __device__ __forceinline__ negate(const negate&) {}
+    };
+
+    // Comparison Operations
+    template <typename T> struct equal_to : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a == b;
+        }
+        __host__ __device__ __forceinline__ equal_to() {}
+        __host__ __device__ __forceinline__ equal_to(const equal_to&) {}
+    };
+
+    template <typename T> struct not_equal_to : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a != b;
+        }
+        __host__ __device__ __forceinline__ not_equal_to() {}
+        __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
+    };
+
+    template <typename T> struct greater : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a > b;
+        }
+        __host__ __device__ __forceinline__ greater() {}
+        __host__ __device__ __forceinline__ greater(const greater&) {}
+    };
+
+    template <typename T> struct less : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a < b;
+        }
+        __host__ __device__ __forceinline__ less() {}
+        __host__ __device__ __forceinline__ less(const less&) {}
+    };
+
+    template <typename T> struct greater_equal : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a >= b;
+        }
+        __host__ __device__ __forceinline__ greater_equal() {}
+        __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
+    };
+
+    template <typename T> struct less_equal : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a <= b;
+        }
+        __host__ __device__ __forceinline__ less_equal() {}
+        __host__ __device__ __forceinline__ less_equal(const less_equal&) {}
+    };
+
+    // Logical Operations
+    template <typename T> struct logical_and : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a && b;
+        }
+        __host__ __device__ __forceinline__ logical_and() {}
+        __host__ __device__ __forceinline__ logical_and(const logical_and&) {}
+    };
+
+    template <typename T> struct logical_or : binary_function<T, T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
+                                                    typename TypeTraits<T>::ParameterType b) const
+        {
+            return a || b;
+        }
+        __host__ __device__ __forceinline__ logical_or() {}
+        __host__ __device__ __forceinline__ logical_or(const logical_or&) {}
+    };
+
+    template <typename T> struct logical_not : unary_function<T, bool>
+    {
+        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
+        {
+            return !a;
+        }
+        __host__ __device__ __forceinline__ logical_not() {}
+        __host__ __device__ __forceinline__ logical_not(const logical_not&) {}
+    };
+
+    // Bitwise Operations
+    template <typename T> struct bit_and : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a & b;
+        }
+        __host__ __device__ __forceinline__ bit_and() {}
+        __host__ __device__ __forceinline__ bit_and(const bit_and&) {}
+    };
+
+    template <typename T> struct bit_or : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a | b;
+        }
+        __host__ __device__ __forceinline__ bit_or() {}
+        __host__ __device__ __forceinline__ bit_or(const bit_or&) {}
+    };
+
+    template <typename T> struct bit_xor : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
+                                                 typename TypeTraits<T>::ParameterType b) const
+        {
+            return a ^ b;
+        }
+        __host__ __device__ __forceinline__ bit_xor() {}
+        __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
+    };
+
+    template <typename T> struct bit_not : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
+        {
+            return ~v;
+        }
+        __host__ __device__ __forceinline__ bit_not() {}
+        __host__ __device__ __forceinline__ bit_not(const bit_not&) {}
+    };
+
+    // Generalized Identity Operations
+    template <typename T> struct identity : unary_function<T, T>
+    {
+        __device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
+        {
+            return x;
+        }
+        __host__ __device__ __forceinline__ identity() {}
+        __host__ __device__ __forceinline__ identity(const identity&) {}
+    };
+
+    template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
+    {
+        __device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
+        {
+            return lhs;
+        }
+        __host__ __device__ __forceinline__ project1st() {}
+        __host__ __device__ __forceinline__ project1st(const project1st&) {}
+    };
+
+    template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
+    {
+        __device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
+        {
+            return rhs;
+        }
+        __host__ __device__ __forceinline__ project2nd() {}
+        __host__ __device__ __forceinline__ project2nd(const project2nd&) {}
+    };
+
+    // Min/Max Operations
+
+#define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
+    template <> struct name<type> : binary_function<type, type, type> \
+    { \
+        __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
+        __host__ __device__ __forceinline__ name() {}\
+        __host__ __device__ __forceinline__ name(const name&) {}\
+    };
+
+    template <typename T> struct maximum : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
+        {
+            return max(lhs, rhs);
+        }
+        __host__ __device__ __forceinline__ maximum() {}
+        __host__ __device__ __forceinline__ maximum(const maximum&) {}
+    };
+
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)
+
+    template <typename T> struct minimum : binary_function<T, T, T>
+    {
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
+        {
+            return min(lhs, rhs);
+        }
+        __host__ __device__ __forceinline__ minimum() {}
+        __host__ __device__ __forceinline__ minimum(const minimum&) {}
+    };
+
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
+    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)
+
+#undef OPENCV_CUDA_IMPLEMENT_MINMAX
+
+    // Math functions
+
+    template <typename T> struct abs_func : unary_function<T, T>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
+        {
+            return abs(x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
+    {
+        __device__ __forceinline__ unsigned char operator ()(unsigned char x) const
+        {
+            return x;
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<signed char> : unary_function<signed char, signed char>
+    {
+        __device__ __forceinline__ signed char operator ()(signed char x) const
+        {
+            return ::abs((int)x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<char> : unary_function<char, char>
+    {
+        __device__ __forceinline__ char operator ()(char x) const
+        {
+            return ::abs((int)x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
+    {
+        __device__ __forceinline__ unsigned short operator ()(unsigned short x) const
+        {
+            return x;
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<short> : unary_function<short, short>
+    {
+        __device__ __forceinline__ short operator ()(short x) const
+        {
+            return ::abs((int)x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
+    {
+        __device__ __forceinline__ unsigned int operator ()(unsigned int x) const
+        {
+            return x;
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<int> : unary_function<int, int>
+    {
+        __device__ __forceinline__ int operator ()(int x) const
+        {
+            return ::abs(x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<float> : unary_function<float, float>
+    {
+        __device__ __forceinline__ float operator ()(float x) const
+        {
+            return ::fabsf(x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+    template <> struct abs_func<double> : unary_function<double, double>
+    {
+        __device__ __forceinline__ double operator ()(double x) const
+        {
+            return ::fabs(x);
+        }
+
+        __host__ __device__ __forceinline__ abs_func() {}
+        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
+    };
+
+#define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
+    template <typename T> struct name ## _func : unary_function<T, float> \
+    { \
+        __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
+        { \
+            return func ## f(v); \
+        } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+    }; \
+    template <> struct name ## _func<double> : unary_function<double, double> \
+    { \
+        __device__ __forceinline__ double operator ()(double v) const \
+        { \
+            return func(v); \
+        } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+    };
+
+#define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
+    template <typename T> struct name ## _func : binary_function<T, T, float> \
+    { \
+        __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
+        { \
+            return func ## f(v1, v2); \
+        } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+    }; \
+    template <> struct name ## _func<double> : binary_function<double, double, double> \
+    { \
+        __device__ __forceinline__ double operator ()(double v1, double v2) const \
+        { \
+            return func(v1, v2); \
+        } \
+        __host__ __device__ __forceinline__ name ## _func() {} \
+        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
+    };
+
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
+    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
+
+    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
+    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
+    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
+
+    #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
+    #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
+    #undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR
+
+    template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
+    {
+        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
+        {
+            return src1 * src1 + src2 * src2;
+        }
+        __host__ __device__ __forceinline__ hypot_sqr_func() {}
+        __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
+    };
+
+    // Saturate Cast Functor
+    template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
+    {
+        __device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
+        {
+            return saturate_cast<D>(v);
+        }
+        __host__ __device__ __forceinline__ saturate_cast_func() {}
+        __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
+    };
+
+    // Threshold Functors
+    template <typename T> struct thresh_binary_func : unary_function<T, T>
+    {
+        __host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return (src > thresh) * maxVal;
+        }
+
+        __host__ __device__ __forceinline__ thresh_binary_func() {}
+        __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
+            : thresh(other.thresh), maxVal(other.maxVal) {}
+
+        T thresh;
+        T maxVal;
+    };
+
+    template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
+    {
+        __host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return (src <= thresh) * maxVal;
+        }
+
+        __host__ __device__ __forceinline__ thresh_binary_inv_func() {}
+        __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
+            : thresh(other.thresh), maxVal(other.maxVal) {}
+
+        T thresh;
+        T maxVal;
+    };
+
+    template <typename T> struct thresh_trunc_func : unary_function<T, T>
+    {
+        explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return minimum<T>()(src, thresh);
+        }
+
+        __host__ __device__ __forceinline__ thresh_trunc_func() {}
+        __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
+            : thresh(other.thresh) {}
+
+        T thresh;
+    };
+
+    template <typename T> struct thresh_to_zero_func : unary_function<T, T>
+    {
+        explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return (src > thresh) * src;
+        }
+
+        __host__ __device__ __forceinline__ thresh_to_zero_func() {}
+       __host__  __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
+            : thresh(other.thresh) {}
+
+        T thresh;
+    };
+
+    template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
+    {
+        explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {CV_UNUSED(maxVal_);}
+
+        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
+        {
+            return (src <= thresh) * src;
+        }
+
+        __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
+        __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
+            : thresh(other.thresh) {}
+
+        T thresh;
+    };
+
+    // Function Object Adaptors
+    template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
+    {
+      explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
+
+      __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
+      {
+          return !pred(x);
+      }
+
+      __host__ __device__ __forceinline__ unary_negate() {}
+      __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
+
+      Predicate pred;
+    };
+
+    template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
+    {
+        return unary_negate<Predicate>(pred);
+    }
+
+    template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
+    {
+        explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
+
+        __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
+                                                   typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
+        {
+            return !pred(x,y);
+        }
+
+        __host__ __device__ __forceinline__ binary_negate() {}
+        __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
+
+        Predicate pred;
+    };
+
+    template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
+    {
+        return binary_negate<BinaryPredicate>(pred);
+    }
+
+    template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
+    {
+        __host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
+
+        __device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
+        {
+            return op(arg1, a);
+        }
+
+        __host__ __device__ __forceinline__ binder1st() {}
+        __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
+
+        Op op;
+        typename Op::first_argument_type arg1;
+    };
+
+    template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
+    {
+        return binder1st<Op>(op, typename Op::first_argument_type(x));
+    }
+
+    template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
+    {
+        __host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
+
+        __forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
+        {
+            return op(a, arg2);
+        }
+
+        __host__ __device__ __forceinline__ binder2nd() {}
+        __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
+
+        Op op;
+        typename Op::second_argument_type arg2;
+    };
+
+    template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
+    {
+        return binder2nd<Op>(op, typename Op::second_argument_type(x));
+    }
+
+    // Functor Traits
+    template <typename F> struct IsUnaryFunction
+    {
+        typedef char Yes;
+        struct No {Yes a[2];};
+
+        template <typename T, typename D> static Yes check(unary_function<T, D>);
+        static No check(...);
+
+        static F makeF();
+
+        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
+    };
+
+    template <typename F> struct IsBinaryFunction
+    {
+        typedef char Yes;
+        struct No {Yes a[2];};
+
+        template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
+        static No check(...);
+
+        static F makeF();
+
+        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
+    };
+
+    namespace functional_detail
+    {
+        template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
+        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
+        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
+
+        template <typename T, typename D> struct DefaultUnaryShift
+        {
+            enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
+        };
+
+        template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
+        template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
+        template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
+
+        template <typename T1, typename T2, typename D> struct DefaultBinaryShift
+        {
+            enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
+        };
+
+        template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
+        template <typename Func> struct ShiftDispatcher<Func, true>
+        {
+            enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
+        };
+        template <typename Func> struct ShiftDispatcher<Func, false>
+        {
+            enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
+        };
+    }
+
+    template <typename Func> struct DefaultTransformShift
+    {
+        enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
+    };
+
+    template <typename Func> struct DefaultTransformFunctorTraits
+    {
+        enum { simple_block_dim_x = 16 };
+        enum { simple_block_dim_y = 16 };
+
+        enum { smart_block_dim_x = 16 };
+        enum { smart_block_dim_y = 16 };
+        enum { smart_shift = DefaultTransformShift<Func>::shift };
+    };
+
+    template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
+
+#define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
+    template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_FUNCTIONAL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/limits.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/limits.hpp
@@ -0,0 +1,128 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_LIMITS_HPP
+#define OPENCV_CUDA_LIMITS_HPP
+
+#include <limits.h>
+#include <float.h>
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+template <class T> struct numeric_limits;
+
+template <> struct numeric_limits<bool>
+{
+    __device__ __forceinline__ static bool min() { return false; }
+    __device__ __forceinline__ static bool max() { return true;  }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<signed char>
+{
+    __device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
+    __device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<unsigned char>
+{
+    __device__ __forceinline__ static unsigned char min() { return 0; }
+    __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<short>
+{
+    __device__ __forceinline__ static short min() { return SHRT_MIN; }
+    __device__ __forceinline__ static short max() { return SHRT_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<unsigned short>
+{
+    __device__ __forceinline__ static unsigned short min() { return 0; }
+    __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<int>
+{
+    __device__ __forceinline__ static int min() { return INT_MIN; }
+    __device__ __forceinline__ static int max() { return INT_MAX; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<unsigned int>
+{
+    __device__ __forceinline__ static unsigned int min() { return 0; }
+    __device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
+    static const bool is_signed = false;
+};
+
+template <> struct numeric_limits<float>
+{
+    __device__ __forceinline__ static float min() { return FLT_MIN; }
+    __device__ __forceinline__ static float max() { return FLT_MAX; }
+    __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
+    static const bool is_signed = true;
+};
+
+template <> struct numeric_limits<double>
+{
+    __device__ __forceinline__ static double min() { return DBL_MIN; }
+    __device__ __forceinline__ static double max() { return DBL_MAX; }
+    __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
+    static const bool is_signed = true;
+};
+}}} // namespace cv { namespace cuda { namespace cudev {
+
+//! @endcond
+
+#endif // OPENCV_CUDA_LIMITS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/reduce.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/reduce.hpp
@@ -0,0 +1,209 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_REDUCE_HPP
+#define OPENCV_CUDA_REDUCE_HPP
+
+#ifndef THRUST_DEBUG // eliminate -Wundef warning
+#define THRUST_DEBUG 0
+#endif
+
+#include <thrust/tuple.h>
+#include "detail/reduce.hpp"
+#include "detail/reduce_key_val.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <int N, typename T, class Op>
+    __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
+    {
+        reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
+    }
+    template <int N,
+              typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
+              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
+              class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
+    __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
+                                           const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
+                                           unsigned int tid,
+                                           const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
+    {
+        reduce_detail::Dispatcher<N>::reductor::template reduce<
+                const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
+                const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
+                const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
+    }
+
+    template <unsigned int N, typename K, typename V, class Cmp>
+    __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
+    {
+        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
+    }
+    template <unsigned int N,
+              typename K,
+              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+              class Cmp>
+    __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
+                                                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                 unsigned int tid, const Cmp& cmp)
+    {
+        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
+                const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
+                const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
+                const Cmp&>(skeys, key, svals, val, tid, cmp);
+    }
+    template <unsigned int N,
+              typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
+              typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
+              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
+              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
+              class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
+    __device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
+                                                 const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
+                                                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
+                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
+                                                 unsigned int tid,
+                                                 const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
+    {
+        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
+                const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
+                const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
+                const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
+                const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
+                const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
+                >(skeys, key, svals, val, tid, cmp);
+    }
+
+    // smem_tuple
+
+    template <typename T0>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*>
+    smem_tuple(T0* t0)
+    {
+        return thrust::make_tuple((volatile T0*) t0);
+    }
+
+    template <typename T0, typename T1>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*>
+    smem_tuple(T0* t0, T1* t1)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
+    }
+
+    template <typename T0, typename T1, typename T2>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
+    smem_tuple(T0* t0, T1* t1, T2* t2)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
+    }
+
+    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
+    __device__ __forceinline__
+    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
+    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
+    {
+        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_REDUCE_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/saturate_cast.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/saturate_cast.hpp
@@ -0,0 +1,292 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_SATURATE_CAST_HPP
+#define OPENCV_CUDA_SATURATE_CAST_HPP
+
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
+    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
+
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
+    {
+        uint res = 0;
+        int vi = v;
+        asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
+    {
+        uint res = 0;
+        asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        uint res = 0;
+        asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
+        return res;
+    #else
+        return saturate_cast<uchar>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
+    {
+        uint res = 0;
+        uint vi = v;
+        asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
+    {
+        uint res = 0;
+        asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
+    {
+        uint res = 0;
+        asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
+    {
+        uint res = 0;
+        asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
+    {
+        uint res = 0;
+        asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
+    {
+        uint res = 0;
+        asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        uint res = 0;
+        asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
+        return res;
+    #else
+        return saturate_cast<schar>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
+    {
+        ushort res = 0;
+        int vi = v;
+        asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
+    {
+        ushort res = 0;
+        asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
+    {
+        ushort res = 0;
+        asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
+    {
+        ushort res = 0;
+        asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
+    {
+        ushort res = 0;
+        asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        ushort res = 0;
+        asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
+        return res;
+    #else
+        return saturate_cast<ushort>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
+    {
+        short res = 0;
+        asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ short saturate_cast<short>(int v)
+    {
+        short res = 0;
+        asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
+    {
+        short res = 0;
+        asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ short saturate_cast<short>(float v)
+    {
+        short res = 0;
+        asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ short saturate_cast<short>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        short res = 0;
+        asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
+        return res;
+    #else
+        return saturate_cast<short>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
+    {
+        int res = 0;
+        asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ int saturate_cast<int>(float v)
+    {
+        return __float2int_rn(v);
+    }
+    template<> __device__ __forceinline__ int saturate_cast<int>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        return __double2int_rn(v);
+    #else
+        return saturate_cast<int>((float)v);
+    #endif
+    }
+
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
+    {
+        uint res = 0;
+        int vi = v;
+        asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
+        return res;
+    }
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
+    {
+        uint res = 0;
+        asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
+        return res;
+    }
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
+    {
+        return __float2uint_rn(v);
+    }
+    template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
+        return __double2uint_rn(v);
+    #else
+        return saturate_cast<uint>((float)v);
+    #endif
+    }
+}}}
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_SATURATE_CAST_HPP */
--- a/3rdparty/opencv/inc/opencv2/core/cuda/scan.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/scan.hpp
@@ -0,0 +1,258 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_SCAN_HPP
+#define OPENCV_CUDA_SCAN_HPP
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/cuda/utility.hpp"
+#include "opencv2/core/cuda/warp.hpp"
+#include "opencv2/core/cuda/warp_shuffle.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 };
+
+    template <ScanKind Kind, typename T, typename F> struct WarpScan
+    {
+        __device__ __forceinline__ WarpScan() {}
+        __device__ __forceinline__ WarpScan(const WarpScan& other) { CV_UNUSED(other); }
+
+        __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
+        {
+            const unsigned int lane = idx & 31;
+            F op;
+
+            if ( lane >=  1) ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
+            if ( lane >=  2) ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
+            if ( lane >=  4) ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
+            if ( lane >=  8) ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
+            if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
+
+            if( Kind == INCLUSIVE )
+                return ptr [idx];
+            else
+                return (lane > 0) ? ptr [idx - 1] : 0;
+        }
+
+        __device__ __forceinline__ unsigned int index(const unsigned int tid)
+        {
+            return tid;
+        }
+
+        __device__ __forceinline__ void init(volatile T *ptr){}
+
+        static const int warp_offset      = 0;
+
+        typedef WarpScan<INCLUSIVE, T, F>  merge;
+    };
+
+    template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
+    {
+        __device__ __forceinline__ WarpScanNoComp() {}
+        __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { CV_UNUSED(other); }
+
+        __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
+        {
+            const unsigned int lane = threadIdx.x & 31;
+            F op;
+
+            ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
+            ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
+            ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
+            ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
+            ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
+
+            if( Kind == INCLUSIVE )
+                return ptr [idx];
+            else
+                return (lane > 0) ? ptr [idx - 1] : 0;
+        }
+
+        __device__ __forceinline__ unsigned int index(const unsigned int tid)
+        {
+            return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
+        }
+
+        __device__ __forceinline__ void init(volatile T *ptr)
+        {
+            ptr[threadIdx.x] = 0;
+        }
+
+        static const int warp_smem_stride = 32 + 16 + 1;
+        static const int warp_offset      = 16;
+        static const int warp_log         = 5;
+        static const int warp_mask        = 31;
+
+        typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
+    };
+
+    template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
+    {
+        __device__ __forceinline__ BlockScan() {}
+        __device__ __forceinline__ BlockScan(const BlockScan& other) { CV_UNUSED(other); }
+
+        __device__ __forceinline__ T operator()(volatile T *ptr)
+        {
+            const unsigned int tid  = threadIdx.x;
+            const unsigned int lane = tid & warp_mask;
+            const unsigned int warp = tid >> warp_log;
+
+            Sc scan;
+            typename Sc::merge merge_scan;
+            const unsigned int idx = scan.index(tid);
+
+            T val = scan(ptr, idx);
+            __syncthreads ();
+
+            if( warp == 0)
+                scan.init(ptr);
+            __syncthreads ();
+
+            if( lane == 31 )
+                ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
+            __syncthreads ();
+
+            if( warp == 0 )
+                merge_scan(ptr, idx);
+            __syncthreads();
+
+            if ( warp > 0)
+                val = ptr [scan.warp_offset + warp - 1] + val;
+            __syncthreads ();
+
+            ptr[idx] = val;
+            __syncthreads ();
+
+            return val ;
+        }
+
+        static const int warp_log  = 5;
+        static const int warp_mask = 31;
+    };
+
+    template <typename T>
+    __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
+    {
+    #if __CUDA_ARCH__ >= 300
+        const unsigned int laneId = cv::cuda::device::Warp::laneId();
+
+        // scan on shuffl functions
+        #pragma unroll
+        for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
+        {
+            const T n = cv::cuda::device::shfl_up(idata, i);
+            if (laneId >= i)
+                  idata += n;
+        }
+
+        return idata;
+    #else
+        unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
+        s_Data[pos] = 0;
+        pos += OPENCV_CUDA_WARP_SIZE;
+        s_Data[pos] = idata;
+
+        s_Data[pos] += s_Data[pos - 1];
+        s_Data[pos] += s_Data[pos - 2];
+        s_Data[pos] += s_Data[pos - 4];
+        s_Data[pos] += s_Data[pos - 8];
+        s_Data[pos] += s_Data[pos - 16];
+
+        return s_Data[pos];
+    #endif
+    }
+
+    template <typename T>
+    __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
+    {
+        return warpScanInclusive(idata, s_Data, tid) - idata;
+    }
+
+    template <int tiNumScanThreads, typename T>
+    __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
+    {
+        if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
+        {
+            //Bottom-level inclusive warp scan
+            T warpResult = warpScanInclusive(idata, s_Data, tid);
+
+            //Save top elements of each warp for exclusive warp scan
+            //sync to wait for warp scans to complete (because s_Data is being overwritten)
+            __syncthreads();
+            if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
+            {
+                s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
+            }
+
+            //wait for warp scans to complete
+            __syncthreads();
+
+            if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
+            {
+                //grab top warp elements
+                T val = s_Data[tid];
+                //calculate exclusive scan and write back to shared memory
+                s_Data[tid] = warpScanExclusive(val, s_Data, tid);
+            }
+
+            //return updated warp scans with exclusive scan results
+            __syncthreads();
+
+            return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
+        }
+        else
+        {
+            return warpScanInclusive(idata, s_Data, tid);
+        }
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_SCAN_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/simd_functions.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/simd_functions.hpp
@@ -0,0 +1,869 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+ * Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ *   Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ *   Neither the name of NVIDIA Corporation nor the names of its contributors
+ *   may be used to endorse or promote products derived from this software
+ *   without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP
+#define OPENCV_CUDA_SIMD_FUNCTIONS_HPP
+
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    // 2
+
+    static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = a ^ b;          // sum bits
+        r = a + b;          // actual sum
+        s = s ^ r;          // determine carry-ins for each bit position
+        s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
+        r = r - s;          // subtract out carry-out from low word
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = a ^ b;          // sum bits
+        r = a - b;          // actual sum
+        s = s ^ r;          // determine carry-ins for each bit position
+        s = s & 0x00010000; // borrow to high word
+        r = r + s;          // compensate for borrow from low word
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t, u, v;
+        s = a & 0x0000ffff; // extract low halfword
+        r = b & 0x0000ffff; // extract low halfword
+        u = ::max(r, s);    // maximum of low halfwords
+        v = ::min(r, s);    // minimum of low halfwords
+        s = a & 0xffff0000; // extract high halfword
+        r = b & 0xffff0000; // extract high halfword
+        t = ::max(r, s);    // maximum of high halfwords
+        s = ::min(r, s);    // minimum of high halfwords
+        r = u | t;          // maximum of both halfwords
+        s = v | s;          // minimum of both halfwords
+        r = r - s;          // |a - b| = max(a,b) - min(a,b);
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, s;
+
+        // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
+        // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
+        s = a ^ b;
+        r = a & b;
+        s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
+        s = s >> 1;
+        s = r + s;
+
+        return s;
+    }
+
+    static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
+        // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
+        unsigned int s;
+        s = a ^ b;
+        r = a | b;
+        s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
+        s = s >> 1;
+        r = r - s;
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        unsigned int c;
+        r = a ^ b;          // 0x0000 if a == b
+        c = r | 0x80008000; // set msbs, to catch carry out
+        r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
+        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+        c = r & ~c;         // msb = 1, if r was 0x0000
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vseteq2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        r = a ^ b;          // 0x0000 if a == b
+        c = r | 0x80008000; // set msbs, to catch carry out
+        r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
+        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+        c = r & ~c;         // msb = 1, if r was 0x0000
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetge2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+        c = c & 0x80008000; // msbs = carry-outs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetgt2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+        c = c & 0x80008000; // msbs = carry-outs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetle2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetlt2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+        c = c & 0x80008000; // msb = carry-outs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        unsigned int c;
+        r = a ^ b;          // 0x0000 if a == b
+        c = r | 0x80008000; // set msbs, to catch carry out
+        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+        c = r | c;          // msb = 1, if r was not 0x0000
+        c = c & 0x80008000; // extract msbs
+        r = c >> 15;        // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetne2(a, b);
+        c = r << 16;        // convert bool
+        r = c - r;          //  into mask
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        r = a ^ b;          // 0x0000 if a == b
+        c = r | 0x80008000; // set msbs, to catch carry out
+        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
+        c = r | c;          // msb = 1, if r was not 0x0000
+        c = c & 0x80008000; // extract msbs
+        r = c >> 15;        // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t, u;
+        r = a & 0x0000ffff; // extract low halfword
+        s = b & 0x0000ffff; // extract low halfword
+        t = ::max(r, s);    // maximum of low halfwords
+        r = a & 0xffff0000; // extract high halfword
+        s = b & 0xffff0000; // extract high halfword
+        u = ::max(r, s);    // maximum of high halfwords
+        r = t | u;          // combine halfword maximums
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t, u;
+        r = a & 0x0000ffff; // extract low halfword
+        s = b & 0x0000ffff; // extract low halfword
+        t = ::min(r, s);    // minimum of low halfwords
+        r = a & 0xffff0000; // extract high halfword
+        s = b & 0xffff0000; // extract high halfword
+        u = ::min(r, s);    // minimum of high halfwords
+        r = t | u;          // combine halfword minimums
+    #endif
+
+        return r;
+    }
+
+    // 4
+
+    static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t;
+        s = a ^ b;          // sum bits
+        r = a & 0x7f7f7f7f; // clear msbs
+        t = b & 0x7f7f7f7f; // clear msbs
+        s = s & 0x80808080; // msb sum bits
+        r = r + t;          // add without msbs, record carry-out in msbs
+        r = r ^ s;          // sum of msb sum and carry-in bits, w/o carry-out
+    #endif /* __CUDA_ARCH__ >= 300 */
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s, t;
+        s = a ^ ~b;         // inverted sum bits
+        r = a | 0x80808080; // set msbs
+        t = b & 0x7f7f7f7f; // clear msbs
+        s = s & 0x80808080; // inverted msb sum bits
+        r = r - t;          // subtract w/o msbs, record inverted borrows in msb
+        r = r ^ s;          // combine inverted msb sum bits and borrows
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, s;
+
+        // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
+        // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
+        s = a ^ b;
+        r = a & b;
+        s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
+        s = s >> 1;
+        s = r + s;
+
+        return s;
+    }
+
+    static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
+        // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
+        unsigned int c;
+        c = a ^ b;
+        r = a | b;
+        c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
+        c = c >> 1;
+        r = r - c;
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        unsigned int c;
+        r = a ^ b;          // 0x00 if a == b
+        c = r | 0x80808080; // set msbs, to catch carry out
+        r = r ^ c;          // extract msbs, msb = 1 if r < 0x80
+        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+        c = r & ~c;         // msb = 1, if r was 0x00
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, t;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vseteq4(a, b);
+        t = r << 8;         // convert bool
+        r = t - r;          //  to mask
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        t = a ^ b;          // 0x00 if a == b
+        r = t | 0x80808080; // set msbs, to catch carry out
+        t = t ^ r;          // extract msbs, msb = 1 if t < 0x80
+        r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
+        r = t & ~r;         // msb = 1, if t was 0x00
+        t = r >> 7;         // build mask
+        t = r - t;          //  from
+        r = t | r;          //   msbs
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetle4(a, b);
+        c = r << 8;         // convert bool
+        r = c - r;          //  to mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
+        c = c & 0x80808080; // msbs = carry-outs
+        r = c >> 7;         // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetlt4(a, b);
+        c = r << 8;         // convert bool
+        r = c - r;          //  to mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(a));
+        c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
+        c = c & 0x80808080; // msbs = carry-outs
+        r = c >> 7;         // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavrg4(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, s;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetge4(a, b);
+        s = r << 8;         // convert bool
+        r = s - r;          //  to mask
+    #else
+        asm ("not.b32 %0,%0;" : "+r"(b));
+        r = vavrg4 (a, b);  // (a + ~b + 1) / 2 = (a - b) / 2
+        r = r & 0x80808080; // msb = carry-outs
+        s = r >> 7;         // build mask
+        s = r - s;          //  from
+        r = s | r;          //   msbs
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int c;
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetgt4(a, b);
+        c = r << 8;         // convert bool
+        r = c - r;          //  to mask
+    #else
+        asm("not.b32 %0, %0;" : "+r"(b));
+        c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
+        c = c & 0x80808080; // msb = carry-outs
+        r = c >> 7;         // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        unsigned int c;
+        r = a ^ b;          // 0x00 if a == b
+        c = r | 0x80808080; // set msbs, to catch carry out
+        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+        c = r | c;          // msb = 1, if r was not 0x00
+        c = c & 0x80808080; // extract msbs
+        r = c >> 7;         // convert to bool
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
+    {
+        unsigned int r, c;
+
+    #if __CUDA_ARCH__ >= 300
+        r = vsetne4(a, b);
+        c = r << 8;         // convert bool
+        r = c - r;          //  to mask
+    #else
+        // inspired by Alan Mycroft's null-byte detection algorithm:
+        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
+        r = a ^ b;          // 0x00 if a == b
+        c = r | 0x80808080; // set msbs, to catch carry out
+        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
+        c = r | c;          // msb = 1, if r was not 0x00
+        c = c & 0x80808080; // extract msbs
+        r = c >> 7;         // convert
+        r = c - r;          //  msbs to
+        r = c | r;          //   mask
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = vcmpge4(a, b);  // mask = 0xff if a >= b
+        r = a ^ b;          //
+        s = (r &  s) ^ b;   // select a when a >= b, else select b => max(a,b)
+        r = s ^ r;          // select a when b >= a, else select b => min(a,b)
+        r = s - r;          // |a - b| = max(a,b) - min(a,b);
+    #endif
+
+        return r;
+    }
+
+    static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = vcmpge4(a, b);  // mask = 0xff if a >= b
+        r = a & s;          // select a when b >= a
+        s = b & ~s;         // select b when b < a
+        r = r | s;          // combine byte selections
+    #endif
+
+        return r;           // byte-wise unsigned maximum
+    }
+
+    static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
+    {
+        unsigned int r = 0;
+
+    #if __CUDA_ARCH__ >= 300
+        asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #elif __CUDA_ARCH__ >= 200
+        asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+        asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
+    #else
+        unsigned int s;
+        s = vcmpge4(b, a);  // mask = 0xff if a >= b
+        r = a & s;          // select a when b >= a
+        s = b & ~s;         // select b when b < a
+        r = r | s;          // combine byte selections
+    #endif
+
+        return r;
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/transform.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/transform.hpp
@@ -0,0 +1,75 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_TRANSFORM_HPP
+#define OPENCV_CUDA_TRANSFORM_HPP
+
+#include "common.hpp"
+#include "utility.hpp"
+#include "detail/transform_detail.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename T, typename D, typename UnOp, typename Mask>
+    static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
+    {
+        typedef TransformFunctorTraits<UnOp> ft;
+        transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
+    }
+
+    template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
+    static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
+    {
+        typedef TransformFunctorTraits<BinOp> ft;
+        transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
+    }
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_TRANSFORM_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/type_traits.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/type_traits.hpp
@@ -0,0 +1,90 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_TYPE_TRAITS_HPP
+#define OPENCV_CUDA_TYPE_TRAITS_HPP
+
+#include "detail/type_traits_detail.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename T> struct IsSimpleParameter
+    {
+        enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
+            type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
+    };
+
+    template <typename T> struct TypeTraits
+    {
+        typedef typename type_traits_detail::UnConst<T>::type                                                NonConstType;
+        typedef typename type_traits_detail::UnVolatile<T>::type                                             NonVolatileType;
+        typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
+        typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type                            PointeeType;
+        typedef typename type_traits_detail::ReferenceTraits<T>::type                                        ReferredType;
+
+        enum { isConst          = type_traits_detail::UnConst<T>::value };
+        enum { isVolatile       = type_traits_detail::UnVolatile<T>::value };
+
+        enum { isReference      = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
+        enum { isPointer        = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
+
+        enum { isUnsignedInt    = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
+        enum { isSignedInt      = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
+        enum { isIntegral       = type_traits_detail::IsIntegral<UnqualifiedType>::value };
+        enum { isFloat          = type_traits_detail::IsFloat<UnqualifiedType>::value };
+        enum { isArith          = isIntegral || isFloat };
+        enum { isVec            = type_traits_detail::IsVec<UnqualifiedType>::value };
+
+        typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
+            T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
+    };
+}}}
+
+//! @endcond
+
+#endif // OPENCV_CUDA_TYPE_TRAITS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/utility.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/utility.hpp
@@ -0,0 +1,230 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_UTILITY_HPP
+#define OPENCV_CUDA_UTILITY_HPP
+
+#include "saturate_cast.hpp"
+#include "datamov_utils.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    struct CV_EXPORTS ThrustAllocator
+    {
+        typedef uchar value_type;
+        virtual ~ThrustAllocator();
+        virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
+        virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
+        static ThrustAllocator& getAllocator();
+        static void setAllocator(ThrustAllocator* allocator);
+    };
+    #define OPENCV_CUDA_LOG_WARP_SIZE        (5)
+    #define OPENCV_CUDA_WARP_SIZE            (1 << OPENCV_CUDA_LOG_WARP_SIZE)
+    #define OPENCV_CUDA_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
+    #define OPENCV_CUDA_MEM_BANKS            (1 << OPENCV_CUDA_LOG_MEM_BANKS)
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // swap
+
+    template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
+    {
+        const T temp = a;
+        a = b;
+        b = temp;
+    }
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Mask Reader
+
+    struct SingleMask
+    {
+        explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
+        __host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
+
+        __device__ __forceinline__ bool operator()(int y, int x) const
+        {
+            return mask.ptr(y)[x] != 0;
+        }
+
+        PtrStepb mask;
+    };
+
+    struct SingleMaskChannels
+    {
+        __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
+        : mask(mask_), channels(channels_) {}
+        __host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
+            :mask(mask_.mask), channels(mask_.channels){}
+
+        __device__ __forceinline__ bool operator()(int y, int x) const
+        {
+            return mask.ptr(y)[x / channels] != 0;
+        }
+
+        PtrStepb mask;
+        int channels;
+    };
+
+    struct MaskCollection
+    {
+        explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
+            : maskCollection(maskCollection_) {}
+
+        __device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
+            : maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
+
+        __device__ __forceinline__ void next()
+        {
+            curMask = *maskCollection++;
+        }
+        __device__ __forceinline__ void setMask(int z)
+        {
+            curMask = maskCollection[z];
+        }
+
+        __device__ __forceinline__ bool operator()(int y, int x) const
+        {
+            uchar val;
+            return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
+        }
+
+        const PtrStepb* maskCollection;
+        PtrStepb curMask;
+    };
+
+    struct WithOutMask
+    {
+        __host__ __device__ __forceinline__ WithOutMask(){}
+        __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
+
+        __device__ __forceinline__ void next() const
+        {
+        }
+        __device__ __forceinline__ void setMask(int) const
+        {
+        }
+
+        __device__ __forceinline__ bool operator()(int, int) const
+        {
+            return true;
+        }
+
+        __device__ __forceinline__ bool operator()(int, int, int) const
+        {
+            return true;
+        }
+
+        static __device__ __forceinline__ bool check(int, int)
+        {
+            return true;
+        }
+
+        static __device__ __forceinline__ bool check(int, int, int)
+        {
+            return true;
+        }
+    };
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Solve linear system
+
+    // solve 2x2 linear system Ax=b
+    template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
+    {
+        T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
+
+        if (det != 0)
+        {
+            double invdet = 1.0 / det;
+
+            x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
+
+            x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
+
+            return true;
+        }
+
+        return false;
+    }
+
+    // solve 3x3 linear system Ax=b
+    template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
+    {
+        T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
+              - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
+              + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
+
+        if (det != 0)
+        {
+            double invdet = 1.0 / det;
+
+            x[0] = saturate_cast<T>(invdet *
+                (b[0]    * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
+                 A[0][1] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) +
+                 A[0][2] * (b[1]    * A[2][1] - A[1][1] * b[2]   )));
+
+            x[1] = saturate_cast<T>(invdet *
+                (A[0][0] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) -
+                 b[0]    * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
+                 A[0][2] * (A[1][0] * b[2]    - b[1]    * A[2][0])));
+
+            x[2] = saturate_cast<T>(invdet *
+                (A[0][0] * (A[1][1] * b[2]    - b[1]    * A[2][1]) -
+                 A[0][1] * (A[1][0] * b[2]    - b[1]    * A[2][0]) +
+                 b[0]    * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
+
+            return true;
+        }
+
+        return false;
+    }
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_UTILITY_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/vec_distance.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/vec_distance.hpp
@@ -0,0 +1,232 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
+#define OPENCV_CUDA_VEC_DISTANCE_HPP
+
+#include "reduce.hpp"
+#include "functional.hpp"
+#include "detail/vec_distance_detail.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <typename T> struct L1Dist
+    {
+        typedef int value_type;
+        typedef int result_type;
+
+        __device__ __forceinline__ L1Dist() : mySum(0) {}
+
+        __device__ __forceinline__ void reduceIter(int val1, int val2)
+        {
+            mySum = __sad(val1, val2, mySum);
+        }
+
+        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
+        {
+            reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
+        }
+
+        __device__ __forceinline__ operator int() const
+        {
+            return mySum;
+        }
+
+        int mySum;
+    };
+    template <> struct L1Dist<float>
+    {
+        typedef float value_type;
+        typedef float result_type;
+
+        __device__ __forceinline__ L1Dist() : mySum(0.0f) {}
+
+        __device__ __forceinline__ void reduceIter(float val1, float val2)
+        {
+            mySum += ::fabs(val1 - val2);
+        }
+
+        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
+        {
+            reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
+        }
+
+        __device__ __forceinline__ operator float() const
+        {
+            return mySum;
+        }
+
+        float mySum;
+    };
+
+    struct L2Dist
+    {
+        typedef float value_type;
+        typedef float result_type;
+
+        __device__ __forceinline__ L2Dist() : mySum(0.0f) {}
+
+        __device__ __forceinline__ void reduceIter(float val1, float val2)
+        {
+            float reg = val1 - val2;
+            mySum += reg * reg;
+        }
+
+        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
+        {
+            reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
+        }
+
+        __device__ __forceinline__ operator float() const
+        {
+            return sqrtf(mySum);
+        }
+
+        float mySum;
+    };
+
+    struct HammingDist
+    {
+        typedef int value_type;
+        typedef int result_type;
+
+        __device__ __forceinline__ HammingDist() : mySum(0) {}
+
+        __device__ __forceinline__ void reduceIter(int val1, int val2)
+        {
+            mySum += __popc(val1 ^ val2);
+        }
+
+        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
+        {
+            reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
+        }
+
+        __device__ __forceinline__ operator int() const
+        {
+            return mySum;
+        }
+
+        int mySum;
+    };
+
+    // calc distance between two vectors in global memory
+    template <int THREAD_DIM, typename Dist, typename T1, typename T2>
+    __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
+    {
+        for (int i = tid; i < len; i += THREAD_DIM)
+        {
+            T1 val1;
+            ForceGlob<T1>::Load(vec1, i, val1);
+
+            T2 val2;
+            ForceGlob<T2>::Load(vec2, i, val2);
+
+            dist.reduceIter(val1, val2);
+        }
+
+        dist.reduceAll<THREAD_DIM>(smem, tid);
+    }
+
+    // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
+    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
+    __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
+    {
+        vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
+
+        dist.reduceAll<THREAD_DIM>(smem, tid);
+    }
+
+    // calc distance between two vectors in global memory
+    template <int THREAD_DIM, typename T1> struct VecDiffGlobal
+    {
+        explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
+        {
+            vec1 = vec1_;
+        }
+
+        template <typename T2, typename Dist>
+        __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
+        {
+            calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
+        }
+
+        const T1* vec1;
+    };
+
+    // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
+    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
+    {
+        template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
+        {
+            if (glob_tid < len)
+                smem[glob_tid] = vec1[glob_tid];
+            __syncthreads();
+
+            U* vec1ValsPtr = vec1Vals;
+
+            #pragma unroll
+            for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
+                *vec1ValsPtr++ = smem[i];
+
+            __syncthreads();
+        }
+
+        template <typename T2, typename Dist>
+        __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
+        {
+            calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
+        }
+
+        U vec1Vals[MAX_LEN / THREAD_DIM];
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_VEC_DISTANCE_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/vec_math.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/vec_math.hpp
@@ -0,0 +1,923 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_VECMATH_HPP
+#define OPENCV_CUDA_VECMATH_HPP
+
+#include "vec_traits.hpp"
+#include "saturate_cast.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+
+// saturate_cast
+
+namespace vec_math_detail
+{
+    template <int cn, typename VecD> struct SatCastHelper;
+    template <typename VecD> struct SatCastHelper<1, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<2, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<3, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
+        }
+    };
+    template <typename VecD> struct SatCastHelper<4, VecD>
+    {
+        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
+        {
+            typedef typename VecTraits<VecD>::elem_type D;
+            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
+        }
+    };
+
+    template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
+    {
+        return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
+    }
+}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
+
+// unary operators
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
+    { \
+        return VecTraits<output_type ## 1>::make(op (a.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
+    { \
+        return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
+    { \
+        return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
+    { \
+        return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
+
+// unary functions
+
+#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
+    { \
+        return VecTraits<output_type ## 1>::make(func (a.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
+    { \
+        return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
+    { \
+        return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
+    { \
+        return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
+
+// binary operators (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(a.x op b.x); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
+
+// binary operators (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 1>::make(a.x op s); \
+    } \
+    __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(s op b.x); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
+    } \
+    __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
+    } \
+    __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
+    } \
+    __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
+    }
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
+
+// binary function (vec & vec)
+
+#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
+
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
+CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
+
+// binary function (vec & scalar)
+
+#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
+    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
+    { \
+        return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
+    { \
+        return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
+    { \
+        return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
+    { \
+        return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
+    } \
+    __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
+    { \
+        return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
+    }
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
+
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
+CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
+
+#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
+
+}}} // namespace cv { namespace cuda { namespace device
+
+//! @endcond
+
+#endif // OPENCV_CUDA_VECMATH_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/vec_traits.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/vec_traits.hpp
@@ -0,0 +1,288 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_VEC_TRAITS_HPP
+#define OPENCV_CUDA_VEC_TRAITS_HPP
+
+#include "common.hpp"
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template<typename T, int N> struct TypeVec;
+
+    struct __align__(8) uchar8
+    {
+        uchar a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
+    {
+        uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(8) char8
+    {
+        schar a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
+    {
+        char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(16) ushort8
+    {
+        ushort a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
+    {
+        ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(16) short8
+    {
+        short a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
+    {
+        short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(32) uint8
+    {
+        uint a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
+    {
+        uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(32) int8
+    {
+        int a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
+    {
+        int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct __align__(32) float8
+    {
+        float a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
+    {
+        float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+    struct double8
+    {
+        double a0, a1, a2, a3, a4, a5, a6, a7;
+    };
+    static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
+    {
+        double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
+        return val;
+    }
+
+#define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
+    template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
+    template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
+    template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
+    template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
+    template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
+    template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
+    template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
+    template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
+    template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
+    template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
+
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
+    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)
+
+    #undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC
+
+    template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
+    template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
+    template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
+    template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
+    template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
+
+    template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
+    template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
+    template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
+    template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
+    template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
+
+    template<typename T> struct VecTraits;
+
+#define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
+    template<> struct VecTraits<type> \
+    { \
+        typedef type elem_type; \
+        enum {cn=1}; \
+        static __device__ __host__ __forceinline__ type all(type v) {return v;} \
+        static __device__ __host__ __forceinline__ type make(type x) {return x;} \
+        static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
+    }; \
+    template<> struct VecTraits<type ## 1> \
+    { \
+        typedef type elem_type; \
+        enum {cn=1}; \
+        static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
+        static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
+        static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
+    }; \
+    template<> struct VecTraits<type ## 2> \
+    { \
+        typedef type elem_type; \
+        enum {cn=2}; \
+        static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
+        static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
+        static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
+    }; \
+    template<> struct VecTraits<type ## 3> \
+    { \
+        typedef type elem_type; \
+        enum {cn=3}; \
+        static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
+        static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
+        static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
+    }; \
+    template<> struct VecTraits<type ## 4> \
+    { \
+        typedef type elem_type; \
+        enum {cn=4}; \
+        static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
+        static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
+        static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
+    }; \
+    template<> struct VecTraits<type ## 8> \
+    { \
+        typedef type elem_type; \
+        enum {cn=8}; \
+        static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
+        static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
+        static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
+    };
+
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
+    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)
+
+    #undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS
+
+    template<> struct VecTraits<char>
+    {
+        typedef char elem_type;
+        enum {cn=1};
+        static __device__ __host__ __forceinline__ char all(char v) {return v;}
+        static __device__ __host__ __forceinline__ char make(char x) {return x;}
+        static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
+    };
+    template<> struct VecTraits<schar>
+    {
+        typedef schar elem_type;
+        enum {cn=1};
+        static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
+        static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
+        static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
+    };
+    template<> struct VecTraits<char1>
+    {
+        typedef schar elem_type;
+        enum {cn=1};
+        static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
+        static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
+        static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
+    };
+    template<> struct VecTraits<char2>
+    {
+        typedef schar elem_type;
+        enum {cn=2};
+        static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
+        static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
+        static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
+    };
+    template<> struct VecTraits<char3>
+    {
+        typedef schar elem_type;
+        enum {cn=3};
+        static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
+        static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
+        static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
+    };
+    template<> struct VecTraits<char4>
+    {
+        typedef schar elem_type;
+        enum {cn=4};
+        static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
+        static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
+        static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
+    };
+    template<> struct VecTraits<char8>
+    {
+        typedef schar elem_type;
+        enum {cn=8};
+        static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
+        static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
+        static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif // OPENCV_CUDA_VEC_TRAITS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda/warp.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/warp.hpp
@@ -0,0 +1,139 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_DEVICE_WARP_HPP
+#define OPENCV_CUDA_DEVICE_WARP_HPP
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    struct Warp
+    {
+        enum
+        {
+            LOG_WARP_SIZE = 5,
+            WARP_SIZE     = 1 << LOG_WARP_SIZE,
+            STRIDE        = WARP_SIZE
+        };
+
+        /** \brief Returns the warp lane ID of the calling thread. */
+        static __device__ __forceinline__ unsigned int laneId()
+        {
+            unsigned int ret;
+            asm("mov.u32 %0, %%laneid;" : "=r"(ret) );
+            return ret;
+        }
+
+        template<typename It, typename T>
+        static __device__ __forceinline__ void fill(It beg, It end, const T& value)
+        {
+            for(It t = beg + laneId(); t < end; t += STRIDE)
+                *t = value;
+        }
+
+        template<typename InIt, typename OutIt>
+        static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
+        {
+            for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
+                *out = *t;
+            return out;
+        }
+
+        template<typename InIt, typename OutIt, class UnOp>
+        static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
+        {
+            for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
+                *out = op(*t);
+            return out;
+        }
+
+        template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
+        static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
+        {
+            unsigned int lane = laneId();
+
+            InIt1 t1 = beg1 + lane;
+            InIt2 t2 = beg2 + lane;
+            for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
+                *out = op(*t1, *t2);
+            return out;
+        }
+
+        template <class T, class BinOp>
+        static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
+        {
+            const unsigned int lane = laneId();
+
+            if (lane < 16)
+            {
+                T partial = ptr[lane];
+
+                ptr[lane] = partial = op(partial, ptr[lane + 16]);
+                ptr[lane] = partial = op(partial, ptr[lane + 8]);
+                ptr[lane] = partial = op(partial, ptr[lane + 4]);
+                ptr[lane] = partial = op(partial, ptr[lane + 2]);
+                ptr[lane] = partial = op(partial, ptr[lane + 1]);
+            }
+
+            return *ptr;
+        }
+
+        template<typename OutIt, typename T>
+        static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
+        {
+            unsigned int lane = laneId();
+            value += lane;
+
+            for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
+                *t = value;
+        }
+    };
+}}} // namespace cv { namespace cuda { namespace cudev
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_DEVICE_WARP_HPP */
--- a/3rdparty/opencv/inc/opencv2/core/cuda/warp_reduce.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/warp_reduce.hpp
@@ -0,0 +1,76 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
+#define OPENCV_CUDA_WARP_REDUCE_HPP__
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+    template <class T>
+    __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
+    {
+        const unsigned int lane = tid & 31; // index of thread in warp (0..31)
+
+        if (lane < 16)
+        {
+            T partial = ptr[tid];
+
+            ptr[tid] = partial = partial + ptr[tid + 16];
+            ptr[tid] = partial = partial + ptr[tid + 8];
+            ptr[tid] = partial = partial + ptr[tid + 4];
+            ptr[tid] = partial = partial + ptr[tid + 2];
+            ptr[tid] = partial = partial + ptr[tid + 1];
+        }
+
+        return ptr[tid - lane];
+    }
+}}} // namespace cv { namespace cuda { namespace cudev {
+
+//! @endcond
+
+#endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */
--- a/3rdparty/opencv/inc/opencv2/core/cuda/warp_shuffle.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda/warp_shuffle.hpp
@@ -0,0 +1,162 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
+#define OPENCV_CUDA_WARP_SHUFFLE_HPP
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+namespace cv { namespace cuda { namespace device
+{
+#if __CUDACC_VER_MAJOR__ >= 9
+#  define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
+#  define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
+#  define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
+#endif
+    template <typename T>
+    __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return __shfl(val, srcLane, width);
+    #else
+        return T();
+    #endif
+    }
+    __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return (unsigned int) __shfl((int) val, srcLane, width);
+    #else
+        return 0;
+    #endif
+    }
+    __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        int lo = __double2loint(val);
+        int hi = __double2hiint(val);
+
+        lo = __shfl(lo, srcLane, width);
+        hi = __shfl(hi, srcLane, width);
+
+        return __hiloint2double(hi, lo);
+    #else
+        return 0.0;
+    #endif
+    }
+
+    template <typename T>
+    __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return __shfl_down(val, delta, width);
+    #else
+        return T();
+    #endif
+    }
+    __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return (unsigned int) __shfl_down((int) val, delta, width);
+    #else
+        return 0;
+    #endif
+    }
+    __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        int lo = __double2loint(val);
+        int hi = __double2hiint(val);
+
+        lo = __shfl_down(lo, delta, width);
+        hi = __shfl_down(hi, delta, width);
+
+        return __hiloint2double(hi, lo);
+    #else
+        return 0.0;
+    #endif
+    }
+
+    template <typename T>
+    __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return __shfl_up(val, delta, width);
+    #else
+        return T();
+    #endif
+    }
+    __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        return (unsigned int) __shfl_up((int) val, delta, width);
+    #else
+        return 0;
+    #endif
+    }
+    __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
+    {
+    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
+        int lo = __double2loint(val);
+        int hi = __double2hiint(val);
+
+        lo = __shfl_up(lo, delta, width);
+        hi = __shfl_up(hi, delta, width);
+
+        return __hiloint2double(hi, lo);
+    #else
+        return 0.0;
+    #endif
+    }
+}}}
+
+#  undef __shfl
+#  undef __shfl_up
+#  undef __shfl_down
+
+//! @endcond
+
+#endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cuda_stream_accessor.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda_stream_accessor.hpp
@@ -0,0 +1,86 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
+#define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
+
+#ifndef __cplusplus
+#  error cuda_stream_accessor.hpp header must be compiled as C++
+#endif
+
+/** @file cuda_stream_accessor.hpp
+ * This is only header file that depends on CUDA Runtime API. All other headers are independent.
+ */
+
+#include <cuda_runtime.h>
+#include "opencv2/core/cuda.hpp"
+
+namespace cv
+{
+    namespace cuda
+    {
+
+//! @addtogroup cudacore_struct
+//! @{
+
+        /** @brief Class that enables getting cudaStream_t from cuda::Stream
+         */
+        struct StreamAccessor
+        {
+            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
+            CV_EXPORTS static Stream wrapStream(cudaStream_t stream);
+        };
+
+        /** @brief Class that enables getting cudaEvent_t from cuda::Event
+         */
+        struct EventAccessor
+        {
+            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
+            CV_EXPORTS static Event wrapEvent(cudaEvent_t event);
+        };
+
+//! @}
+
+    }
+}
+
+#endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */
--- a/3rdparty/opencv/inc/opencv2/core/cuda_types.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cuda_types.hpp
@@ -0,0 +1,144 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_TYPES_HPP
+#define OPENCV_CORE_CUDA_TYPES_HPP
+
+#ifndef __cplusplus
+#  error cuda_types.hpp header must be compiled as C++
+#endif
+
+#if defined(__OPENCV_BUILD) && defined(__clang__)
+#pragma clang diagnostic ignored "-Winconsistent-missing-override"
+#endif
+#if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5
+#pragma GCC diagnostic ignored "-Wsuggest-override"
+#endif
+
+/** @file
+ * @deprecated Use @ref cudev instead.
+ */
+
+//! @cond IGNORED
+
+#ifdef __CUDACC__
+    #define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
+#else
+    #define __CV_CUDA_HOST_DEVICE__
+#endif
+
+namespace cv
+{
+    namespace cuda
+    {
+
+        // Simple lightweight structures that encapsulates information about an image on device.
+        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
+
+        template <typename T> struct DevPtr
+        {
+            typedef T elem_type;
+            typedef int index_type;
+
+            enum { elem_size = sizeof(elem_type) };
+
+            T* data;
+
+            __CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
+            __CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
+
+            __CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
+            __CV_CUDA_HOST_DEVICE__ operator       T*()       { return data; }
+            __CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
+        };
+
+        template <typename T> struct PtrSz : public DevPtr<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
+
+            size_t size;
+        };
+
+        template <typename T> struct PtrStep : public DevPtr<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
+
+            size_t step;
+
+            __CV_CUDA_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)(((DevPtr<T>*)this)->data) + y * step); }
+            __CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)(((DevPtr<T>*)this)->data) + y * step); }
+
+            __CV_CUDA_HOST_DEVICE__       T& operator ()(int y, int x)       { return ptr(y)[x]; }
+            __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
+        };
+
+        template <typename T> struct PtrStepSz : public PtrStep<T>
+        {
+            __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
+            __CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
+                : PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
+
+            template <typename U>
+            explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
+
+            int cols;
+            int rows;
+        };
+
+        typedef PtrStepSz<unsigned char> PtrStepSzb;
+        typedef PtrStepSz<unsigned short> PtrStepSzus;
+        typedef PtrStepSz<float> PtrStepSzf;
+        typedef PtrStepSz<int> PtrStepSzi;
+
+        typedef PtrStep<unsigned char> PtrStepb;
+        typedef PtrStep<unsigned short> PtrStepus;
+        typedef PtrStep<float> PtrStepf;
+        typedef PtrStep<int> PtrStepi;
+
+    }
+}
+
+//! @endcond
+
+#endif /* OPENCV_CORE_CUDA_TYPES_HPP */
--- a/3rdparty/opencv/inc/opencv2/core/cv_cpu_dispatch.h
+++ b/3rdparty/opencv/inc/opencv2/core/cv_cpu_dispatch.h
@@ -0,0 +1,368 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#if defined __OPENCV_BUILD \
+
+#include "cv_cpu_config.h"
+#include "cv_cpu_helper.h"
+
+#ifdef CV_CPU_DISPATCH_MODE
+#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
+#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
+#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
+#else
+#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
+#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
+#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
+#define CV_CPU_BASELINE_MODE 1
+#endif
+
+
+#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)  /* done */
+#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
+#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
+
+
+#if defined CV_ENABLE_INTRINSICS \
+    && !defined CV_DISABLE_OPTIMIZATION \
+    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
+
+#ifdef CV_CPU_COMPILE_SSE2
+#  include <emmintrin.h>
+#  define CV_MMX 1
+#  define CV_SSE 1
+#  define CV_SSE2 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE3
+#  include <pmmintrin.h>
+#  define CV_SSE3 1
+#endif
+#ifdef CV_CPU_COMPILE_SSSE3
+#  include <tmmintrin.h>
+#  define CV_SSSE3 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE4_1
+#  include <smmintrin.h>
+#  define CV_SSE4_1 1
+#endif
+#ifdef CV_CPU_COMPILE_SSE4_2
+#  include <nmmintrin.h>
+#  define CV_SSE4_2 1
+#endif
+#ifdef CV_CPU_COMPILE_POPCNT
+#  ifdef _MSC_VER
+#    include <nmmintrin.h>
+#    if defined(_M_X64)
+#      define CV_POPCNT_U64 (int)_mm_popcnt_u64
+#    endif
+#    define CV_POPCNT_U32 _mm_popcnt_u32
+#  else
+#    include <popcntintrin.h>
+#    if defined(__x86_64__)
+#      define CV_POPCNT_U64 __builtin_popcountll
+#    endif
+#    define CV_POPCNT_U32 __builtin_popcount
+#  endif
+#  define CV_POPCNT 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX
+#  include <immintrin.h>
+#  define CV_AVX 1
+#endif
+#ifdef CV_CPU_COMPILE_FP16
+#  if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
+#    include <arm_neon.h>
+#  else
+#    include <immintrin.h>
+#  endif
+#  define CV_FP16 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX2
+#  include <immintrin.h>
+#  define CV_AVX2 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX_512F
+#  include <immintrin.h>
+#  define CV_AVX_512F 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_COMMON
+#  define CV_AVX512_COMMON 1
+#  define CV_AVX_512CD 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_KNL
+#  define CV_AVX512_KNL 1
+#  define CV_AVX_512ER 1
+#  define CV_AVX_512PF 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_KNM
+#  define CV_AVX512_KNM 1
+#  define CV_AVX_5124FMAPS 1
+#  define CV_AVX_5124VNNIW 1
+#  define CV_AVX_512VPOPCNTDQ 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_SKX
+#  define CV_AVX512_SKX 1
+#  define CV_AVX_512VL 1
+#  define CV_AVX_512BW 1
+#  define CV_AVX_512DQ 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_CNL
+#  define CV_AVX512_CNL 1
+#  define CV_AVX_512IFMA 1
+#  define CV_AVX_512VBMI 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_CLX
+#  define CV_AVX512_CLX 1
+#  define CV_AVX_512VNNI 1
+#endif
+#ifdef CV_CPU_COMPILE_AVX512_ICL
+#  define CV_AVX512_ICL 1
+#  undef CV_AVX_512IFMA
+#  define CV_AVX_512IFMA 1
+#  undef CV_AVX_512VBMI
+#  define CV_AVX_512VBMI 1
+#  undef CV_AVX_512VNNI
+#  define CV_AVX_512VNNI 1
+#  define CV_AVX_512VBMI2 1
+#  define CV_AVX_512BITALG 1
+#  define CV_AVX_512VPOPCNTDQ 1
+#endif
+#ifdef CV_CPU_COMPILE_FMA3
+#  define CV_FMA3 1
+#endif
+
+#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+#  include <arm_neon.h>
+#  define CV_NEON 1
+#endif
+
+#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
+# include<riscv-vector.h>
+# define CV_RVV071 1
+#endif
+
+#if defined(__ARM_NEON__) || defined(__aarch64__)
+#  include <arm_neon.h>
+#endif
+
+#ifdef CV_CPU_COMPILE_VSX
+#  include <altivec.h>
+#  undef vector
+#  undef pixel
+#  undef bool
+#  define CV_VSX 1
+#endif
+
+#ifdef CV_CPU_COMPILE_VSX3
+#  define CV_VSX3 1
+#endif
+
+#ifdef CV_CPU_COMPILE_MSA
+#  include "hal/msa_macros.h"
+#  define CV_MSA 1
+#endif
+
+#ifdef __EMSCRIPTEN__
+#  define CV_WASM_SIMD 1
+#  include <wasm_simd128.h>
+#endif
+
+#if defined CV_CPU_COMPILE_RVV
+#  define CV_RVV 1
+#  include <riscv_vector.h>
+#endif
+
+#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
+
+#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
+struct VZeroUpperGuard {
+#ifdef __GNUC__
+    __attribute__((always_inline))
+#endif
+    inline VZeroUpperGuard() { _mm256_zeroupper(); }
+#ifdef __GNUC__
+    __attribute__((always_inline))
+#endif
+    inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
+};
+#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; CV_UNUSED(__vzeroupper_guard);
+#endif
+
+#ifdef __CV_AVX_GUARD
+#define CV_AVX_GUARD __CV_AVX_GUARD
+#else
+#define CV_AVX_GUARD
+#endif
+
+#endif // __OPENCV_BUILD
+
+
+
+#if !defined __OPENCV_BUILD /* Compatibility code */ \
+    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
+#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
+#  include <emmintrin.h>
+#  define CV_MMX 1
+#  define CV_SSE 1
+#  define CV_SSE2 1
+#elif defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64)) && (defined(CV_CPU_COMPILE_NEON) || !defined(_MSC_VER))
+# include <Intrin.h>
+# include <arm_neon.h>
+# define CV_NEON 1
+#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
+#  include <arm_neon.h>
+#  define CV_NEON 1
+#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
+#  include <altivec.h>
+#  undef vector
+#  undef pixel
+#  undef bool
+#  define CV_VSX 1
+#endif
+
+#ifdef __F16C__
+#  include <immintrin.h>
+#  define CV_FP16 1
+#endif
+
+#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
+
+
+
+#ifndef CV_MMX
+#  define CV_MMX 0
+#endif
+#ifndef CV_SSE
+#  define CV_SSE 0
+#endif
+#ifndef CV_SSE2
+#  define CV_SSE2 0
+#endif
+#ifndef CV_SSE3
+#  define CV_SSE3 0
+#endif
+#ifndef CV_SSSE3
+#  define CV_SSSE3 0
+#endif
+#ifndef CV_SSE4_1
+#  define CV_SSE4_1 0
+#endif
+#ifndef CV_SSE4_2
+#  define CV_SSE4_2 0
+#endif
+#ifndef CV_POPCNT
+#  define CV_POPCNT 0
+#endif
+#ifndef CV_AVX
+#  define CV_AVX 0
+#endif
+#ifndef CV_FP16
+#  define CV_FP16 0
+#endif
+#ifndef CV_AVX2
+#  define CV_AVX2 0
+#endif
+#ifndef CV_FMA3
+#  define CV_FMA3 0
+#endif
+#ifndef CV_AVX_512F
+#  define CV_AVX_512F 0
+#endif
+#ifndef CV_AVX_512BW
+#  define CV_AVX_512BW 0
+#endif
+#ifndef CV_AVX_512CD
+#  define CV_AVX_512CD 0
+#endif
+#ifndef CV_AVX_512DQ
+#  define CV_AVX_512DQ 0
+#endif
+#ifndef CV_AVX_512ER
+#  define CV_AVX_512ER 0
+#endif
+#ifndef CV_AVX_512IFMA
+#  define CV_AVX_512IFMA 0
+#endif
+#define CV_AVX_512IFMA512 CV_AVX_512IFMA // deprecated
+#ifndef CV_AVX_512PF
+#  define CV_AVX_512PF 0
+#endif
+#ifndef CV_AVX_512VBMI
+#  define CV_AVX_512VBMI 0
+#endif
+#ifndef CV_AVX_512VL
+#  define CV_AVX_512VL 0
+#endif
+#ifndef CV_AVX_5124FMAPS
+#  define CV_AVX_5124FMAPS 0
+#endif
+#ifndef CV_AVX_5124VNNIW
+#  define CV_AVX_5124VNNIW 0
+#endif
+#ifndef CV_AVX_512VPOPCNTDQ
+#  define CV_AVX_512VPOPCNTDQ 0
+#endif
+#ifndef CV_AVX_512VNNI
+#  define CV_AVX_512VNNI 0
+#endif
+#ifndef CV_AVX_512VBMI2
+#  define CV_AVX_512VBMI2 0
+#endif
+#ifndef CV_AVX_512BITALG
+#  define CV_AVX_512BITALG 0
+#endif
+#ifndef CV_AVX512_COMMON
+#  define CV_AVX512_COMMON 0
+#endif
+#ifndef CV_AVX512_KNL
+#  define CV_AVX512_KNL 0
+#endif
+#ifndef CV_AVX512_KNM
+#  define CV_AVX512_KNM 0
+#endif
+#ifndef CV_AVX512_SKX
+#  define CV_AVX512_SKX 0
+#endif
+#ifndef CV_AVX512_CNL
+#  define CV_AVX512_CNL 0
+#endif
+#ifndef CV_AVX512_CLX
+#  define CV_AVX512_CLX 0
+#endif
+#ifndef CV_AVX512_ICL
+#  define CV_AVX512_ICL 0
+#endif
+
+#ifndef CV_NEON
+#  define CV_NEON 0
+#endif
+
+#ifndef CV_RVV071
+#  define CV_RVV071 0
+#endif
+
+#ifndef CV_VSX
+#  define CV_VSX 0
+#endif
+
+#ifndef CV_VSX3
+#  define CV_VSX3 0
+#endif
+
+#ifndef CV_MSA
+#  define CV_MSA 0
+#endif
+
+#ifndef CV_WASM_SIMD
+#  define CV_WASM_SIMD 0
+#endif
+
+#ifndef CV_RVV
+#  define CV_RVV 0
+#endif
--- a/3rdparty/opencv/inc/opencv2/core/cv_cpu_helper.h
+++ b/3rdparty/opencv/inc/opencv2/core/cv_cpu_helper.h
@@ -0,0 +1,508 @@
+// AUTOGENERATED, DO NOT EDIT
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
+#  define CV_TRY_SSE 1
+#  define CV_CPU_FORCE_SSE 1
+#  define CV_CPU_HAS_SUPPORT_SSE 1
+#  define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
+#  define CV_TRY_SSE 1
+#  define CV_CPU_FORCE_SSE 0
+#  define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
+#  define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
+#  define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
+#else
+#  define CV_TRY_SSE 0
+#  define CV_CPU_FORCE_SSE 0
+#  define CV_CPU_HAS_SUPPORT_SSE 0
+#  define CV_CPU_CALL_SSE(fn, args)
+#  define CV_CPU_CALL_SSE_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...)  CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
+#  define CV_TRY_SSE2 1
+#  define CV_CPU_FORCE_SSE2 1
+#  define CV_CPU_HAS_SUPPORT_SSE2 1
+#  define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
+#  define CV_TRY_SSE2 1
+#  define CV_CPU_FORCE_SSE2 0
+#  define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
+#  define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
+#  define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
+#else
+#  define CV_TRY_SSE2 0
+#  define CV_CPU_FORCE_SSE2 0
+#  define CV_CPU_HAS_SUPPORT_SSE2 0
+#  define CV_CPU_CALL_SSE2(fn, args)
+#  define CV_CPU_CALL_SSE2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...)  CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
+#  define CV_TRY_SSE3 1
+#  define CV_CPU_FORCE_SSE3 1
+#  define CV_CPU_HAS_SUPPORT_SSE3 1
+#  define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
+#  define CV_TRY_SSE3 1
+#  define CV_CPU_FORCE_SSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
+#  define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
+#  define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
+#else
+#  define CV_TRY_SSE3 0
+#  define CV_CPU_FORCE_SSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSE3 0
+#  define CV_CPU_CALL_SSE3(fn, args)
+#  define CV_CPU_CALL_SSE3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...)  CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
+#  define CV_TRY_SSSE3 1
+#  define CV_CPU_FORCE_SSSE3 1
+#  define CV_CPU_HAS_SUPPORT_SSSE3 1
+#  define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
+#  define CV_TRY_SSSE3 1
+#  define CV_CPU_FORCE_SSSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
+#  define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
+#  define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
+#else
+#  define CV_TRY_SSSE3 0
+#  define CV_CPU_FORCE_SSSE3 0
+#  define CV_CPU_HAS_SUPPORT_SSSE3 0
+#  define CV_CPU_CALL_SSSE3(fn, args)
+#  define CV_CPU_CALL_SSSE3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...)  CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
+#  define CV_TRY_SSE4_1 1
+#  define CV_CPU_FORCE_SSE4_1 1
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 1
+#  define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
+#  define CV_TRY_SSE4_1 1
+#  define CV_CPU_FORCE_SSE4_1 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
+#  define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
+#else
+#  define CV_TRY_SSE4_1 0
+#  define CV_CPU_FORCE_SSE4_1 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_1 0
+#  define CV_CPU_CALL_SSE4_1(fn, args)
+#  define CV_CPU_CALL_SSE4_1_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...)  CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
+#  define CV_TRY_SSE4_2 1
+#  define CV_CPU_FORCE_SSE4_2 1
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 1
+#  define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
+#  define CV_TRY_SSE4_2 1
+#  define CV_CPU_FORCE_SSE4_2 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
+#  define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
+#else
+#  define CV_TRY_SSE4_2 0
+#  define CV_CPU_FORCE_SSE4_2 0
+#  define CV_CPU_HAS_SUPPORT_SSE4_2 0
+#  define CV_CPU_CALL_SSE4_2(fn, args)
+#  define CV_CPU_CALL_SSE4_2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...)  CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
+#  define CV_TRY_POPCNT 1
+#  define CV_CPU_FORCE_POPCNT 1
+#  define CV_CPU_HAS_SUPPORT_POPCNT 1
+#  define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
+#  define CV_TRY_POPCNT 1
+#  define CV_CPU_FORCE_POPCNT 0
+#  define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
+#  define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
+#  define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
+#else
+#  define CV_TRY_POPCNT 0
+#  define CV_CPU_FORCE_POPCNT 0
+#  define CV_CPU_HAS_SUPPORT_POPCNT 0
+#  define CV_CPU_CALL_POPCNT(fn, args)
+#  define CV_CPU_CALL_POPCNT_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...)  CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
+#  define CV_TRY_AVX 1
+#  define CV_CPU_FORCE_AVX 1
+#  define CV_CPU_HAS_SUPPORT_AVX 1
+#  define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
+#  define CV_TRY_AVX 1
+#  define CV_CPU_FORCE_AVX 0
+#  define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
+#  define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
+#  define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
+#else
+#  define CV_TRY_AVX 0
+#  define CV_CPU_FORCE_AVX 0
+#  define CV_CPU_HAS_SUPPORT_AVX 0
+#  define CV_CPU_CALL_AVX(fn, args)
+#  define CV_CPU_CALL_AVX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...)  CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
+#  define CV_TRY_FP16 1
+#  define CV_CPU_FORCE_FP16 1
+#  define CV_CPU_HAS_SUPPORT_FP16 1
+#  define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
+#  define CV_TRY_FP16 1
+#  define CV_CPU_FORCE_FP16 0
+#  define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
+#  define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
+#  define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
+#else
+#  define CV_TRY_FP16 0
+#  define CV_CPU_FORCE_FP16 0
+#  define CV_CPU_HAS_SUPPORT_FP16 0
+#  define CV_CPU_CALL_FP16(fn, args)
+#  define CV_CPU_CALL_FP16_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...)  CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
+#  define CV_TRY_AVX2 1
+#  define CV_CPU_FORCE_AVX2 1
+#  define CV_CPU_HAS_SUPPORT_AVX2 1
+#  define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
+#  define CV_TRY_AVX2 1
+#  define CV_CPU_FORCE_AVX2 0
+#  define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
+#  define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
+#  define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
+#else
+#  define CV_TRY_AVX2 0
+#  define CV_CPU_FORCE_AVX2 0
+#  define CV_CPU_HAS_SUPPORT_AVX2 0
+#  define CV_CPU_CALL_AVX2(fn, args)
+#  define CV_CPU_CALL_AVX2_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...)  CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
+#  define CV_TRY_FMA3 1
+#  define CV_CPU_FORCE_FMA3 1
+#  define CV_CPU_HAS_SUPPORT_FMA3 1
+#  define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
+#  define CV_TRY_FMA3 1
+#  define CV_CPU_FORCE_FMA3 0
+#  define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
+#  define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
+#  define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
+#else
+#  define CV_TRY_FMA3 0
+#  define CV_CPU_FORCE_FMA3 0
+#  define CV_CPU_HAS_SUPPORT_FMA3 0
+#  define CV_CPU_CALL_FMA3(fn, args)
+#  define CV_CPU_CALL_FMA3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...)  CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F
+#  define CV_TRY_AVX_512F 1
+#  define CV_CPU_FORCE_AVX_512F 1
+#  define CV_CPU_HAS_SUPPORT_AVX_512F 1
+#  define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F
+#  define CV_TRY_AVX_512F 1
+#  define CV_CPU_FORCE_AVX_512F 0
+#  define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F))
+#  define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
+#else
+#  define CV_TRY_AVX_512F 0
+#  define CV_CPU_FORCE_AVX_512F 0
+#  define CV_CPU_HAS_SUPPORT_AVX_512F 0
+#  define CV_CPU_CALL_AVX_512F(fn, args)
+#  define CV_CPU_CALL_AVX_512F_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...)  CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_COMMON
+#  define CV_TRY_AVX512_COMMON 1
+#  define CV_CPU_FORCE_AVX512_COMMON 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_COMMON 1
+#  define CV_CPU_CALL_AVX512_COMMON(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_COMMON_(fn, args) return (opt_AVX512_COMMON::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_COMMON
+#  define CV_TRY_AVX512_COMMON 1
+#  define CV_CPU_FORCE_AVX512_COMMON 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_COMMON (cv::checkHardwareSupport(CV_CPU_AVX512_COMMON))
+#  define CV_CPU_CALL_AVX512_COMMON(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
+#  define CV_CPU_CALL_AVX512_COMMON_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_COMMON) return (opt_AVX512_COMMON::fn args)
+#else
+#  define CV_TRY_AVX512_COMMON 0
+#  define CV_CPU_FORCE_AVX512_COMMON 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_COMMON 0
+#  define CV_CPU_CALL_AVX512_COMMON(fn, args)
+#  define CV_CPU_CALL_AVX512_COMMON_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_COMMON(fn, args, mode, ...)  CV_CPU_CALL_AVX512_COMMON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNL
+#  define CV_TRY_AVX512_KNL 1
+#  define CV_CPU_FORCE_AVX512_KNL 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNL 1
+#  define CV_CPU_CALL_AVX512_KNL(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_KNL_(fn, args) return (opt_AVX512_KNL::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNL
+#  define CV_TRY_AVX512_KNL 1
+#  define CV_CPU_FORCE_AVX512_KNL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNL (cv::checkHardwareSupport(CV_CPU_AVX512_KNL))
+#  define CV_CPU_CALL_AVX512_KNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
+#  define CV_CPU_CALL_AVX512_KNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNL) return (opt_AVX512_KNL::fn args)
+#else
+#  define CV_TRY_AVX512_KNL 0
+#  define CV_CPU_FORCE_AVX512_KNL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNL 0
+#  define CV_CPU_CALL_AVX512_KNL(fn, args)
+#  define CV_CPU_CALL_AVX512_KNL_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_KNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_KNM
+#  define CV_TRY_AVX512_KNM 1
+#  define CV_CPU_FORCE_AVX512_KNM 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNM 1
+#  define CV_CPU_CALL_AVX512_KNM(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_KNM_(fn, args) return (opt_AVX512_KNM::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_KNM
+#  define CV_TRY_AVX512_KNM 1
+#  define CV_CPU_FORCE_AVX512_KNM 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNM (cv::checkHardwareSupport(CV_CPU_AVX512_KNM))
+#  define CV_CPU_CALL_AVX512_KNM(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
+#  define CV_CPU_CALL_AVX512_KNM_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_KNM) return (opt_AVX512_KNM::fn args)
+#else
+#  define CV_TRY_AVX512_KNM 0
+#  define CV_CPU_FORCE_AVX512_KNM 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_KNM 0
+#  define CV_CPU_CALL_AVX512_KNM(fn, args)
+#  define CV_CPU_CALL_AVX512_KNM_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_KNM(fn, args, mode, ...)  CV_CPU_CALL_AVX512_KNM(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
+#  define CV_TRY_AVX512_SKX 1
+#  define CV_CPU_FORCE_AVX512_SKX 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
+#  define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
+#  define CV_TRY_AVX512_SKX 1
+#  define CV_CPU_FORCE_AVX512_SKX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
+#  define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
+#else
+#  define CV_TRY_AVX512_SKX 0
+#  define CV_CPU_FORCE_AVX512_SKX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
+#  define CV_CPU_CALL_AVX512_SKX(fn, args)
+#  define CV_CPU_CALL_AVX512_SKX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...)  CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CNL
+#  define CV_TRY_AVX512_CNL 1
+#  define CV_CPU_FORCE_AVX512_CNL 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_CNL 1
+#  define CV_CPU_CALL_AVX512_CNL(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_CNL_(fn, args) return (opt_AVX512_CNL::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CNL
+#  define CV_TRY_AVX512_CNL 1
+#  define CV_CPU_FORCE_AVX512_CNL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_CNL (cv::checkHardwareSupport(CV_CPU_AVX512_CNL))
+#  define CV_CPU_CALL_AVX512_CNL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
+#  define CV_CPU_CALL_AVX512_CNL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CNL) return (opt_AVX512_CNL::fn args)
+#else
+#  define CV_TRY_AVX512_CNL 0
+#  define CV_CPU_FORCE_AVX512_CNL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_CNL 0
+#  define CV_CPU_CALL_AVX512_CNL(fn, args)
+#  define CV_CPU_CALL_AVX512_CNL_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_CNL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_CNL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_CLX
+#  define CV_TRY_AVX512_CLX 1
+#  define CV_CPU_FORCE_AVX512_CLX 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_CLX 1
+#  define CV_CPU_CALL_AVX512_CLX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_CLX_(fn, args) return (opt_AVX512_CLX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_CLX
+#  define CV_TRY_AVX512_CLX 1
+#  define CV_CPU_FORCE_AVX512_CLX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_CLX (cv::checkHardwareSupport(CV_CPU_AVX512_CLX))
+#  define CV_CPU_CALL_AVX512_CLX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
+#  define CV_CPU_CALL_AVX512_CLX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_CLX) return (opt_AVX512_CLX::fn args)
+#else
+#  define CV_TRY_AVX512_CLX 0
+#  define CV_CPU_FORCE_AVX512_CLX 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_CLX 0
+#  define CV_CPU_CALL_AVX512_CLX(fn, args)
+#  define CV_CPU_CALL_AVX512_CLX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_CLX(fn, args, mode, ...)  CV_CPU_CALL_AVX512_CLX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_ICL
+#  define CV_TRY_AVX512_ICL 1
+#  define CV_CPU_FORCE_AVX512_ICL 1
+#  define CV_CPU_HAS_SUPPORT_AVX512_ICL 1
+#  define CV_CPU_CALL_AVX512_ICL(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_AVX512_ICL_(fn, args) return (opt_AVX512_ICL::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_ICL
+#  define CV_TRY_AVX512_ICL 1
+#  define CV_CPU_FORCE_AVX512_ICL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_ICL (cv::checkHardwareSupport(CV_CPU_AVX512_ICL))
+#  define CV_CPU_CALL_AVX512_ICL(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
+#  define CV_CPU_CALL_AVX512_ICL_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_ICL) return (opt_AVX512_ICL::fn args)
+#else
+#  define CV_TRY_AVX512_ICL 0
+#  define CV_CPU_FORCE_AVX512_ICL 0
+#  define CV_CPU_HAS_SUPPORT_AVX512_ICL 0
+#  define CV_CPU_CALL_AVX512_ICL(fn, args)
+#  define CV_CPU_CALL_AVX512_ICL_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_AVX512_ICL(fn, args, mode, ...)  CV_CPU_CALL_AVX512_ICL(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
+#  define CV_TRY_NEON 1
+#  define CV_CPU_FORCE_NEON 1
+#  define CV_CPU_HAS_SUPPORT_NEON 1
+#  define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
+#  define CV_TRY_NEON 1
+#  define CV_CPU_FORCE_NEON 0
+#  define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
+#  define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
+#  define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
+#else
+#  define CV_TRY_NEON 0
+#  define CV_CPU_FORCE_NEON 0
+#  define CV_CPU_HAS_SUPPORT_NEON 0
+#  define CV_CPU_CALL_NEON(fn, args)
+#  define CV_CPU_CALL_NEON_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...)  CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_MSA
+#  define CV_TRY_MSA 1
+#  define CV_CPU_FORCE_MSA 1
+#  define CV_CPU_HAS_SUPPORT_MSA 1
+#  define CV_CPU_CALL_MSA(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_MSA_(fn, args) return (opt_MSA::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_MSA
+#  define CV_TRY_MSA 1
+#  define CV_CPU_FORCE_MSA 0
+#  define CV_CPU_HAS_SUPPORT_MSA (cv::checkHardwareSupport(CV_CPU_MSA))
+#  define CV_CPU_CALL_MSA(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
+#  define CV_CPU_CALL_MSA_(fn, args) if (CV_CPU_HAS_SUPPORT_MSA) return (opt_MSA::fn args)
+#else
+#  define CV_TRY_MSA 0
+#  define CV_CPU_FORCE_MSA 0
+#  define CV_CPU_HAS_SUPPORT_MSA 0
+#  define CV_CPU_CALL_MSA(fn, args)
+#  define CV_CPU_CALL_MSA_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_MSA(fn, args, mode, ...)  CV_CPU_CALL_MSA(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
+#  define CV_TRY_VSX 1
+#  define CV_CPU_FORCE_VSX 1
+#  define CV_CPU_HAS_SUPPORT_VSX 1
+#  define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
+#  define CV_TRY_VSX 1
+#  define CV_CPU_FORCE_VSX 0
+#  define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
+#  define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
+#  define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
+#else
+#  define CV_TRY_VSX 0
+#  define CV_CPU_FORCE_VSX 0
+#  define CV_CPU_HAS_SUPPORT_VSX 0
+#  define CV_CPU_CALL_VSX(fn, args)
+#  define CV_CPU_CALL_VSX_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...)  CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
+#  define CV_TRY_VSX3 1
+#  define CV_CPU_FORCE_VSX3 1
+#  define CV_CPU_HAS_SUPPORT_VSX3 1
+#  define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
+#  define CV_TRY_VSX3 1
+#  define CV_CPU_FORCE_VSX3 0
+#  define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
+#  define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
+#  define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
+#else
+#  define CV_TRY_VSX3 0
+#  define CV_CPU_FORCE_VSX3 0
+#  define CV_CPU_HAS_SUPPORT_VSX3 0
+#  define CV_CPU_CALL_VSX3(fn, args)
+#  define CV_CPU_CALL_VSX3_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...)  CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_RVV
+#  define CV_TRY_RVV 1
+#  define CV_CPU_FORCE_RVV 1
+#  define CV_CPU_HAS_SUPPORT_RVV 1
+#  define CV_CPU_CALL_RVV(fn, args) return (cpu_baseline::fn args)
+#  define CV_CPU_CALL_RVV_(fn, args) return (opt_RVV::fn args)
+#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_RVV
+#  define CV_TRY_RVV 1
+#  define CV_CPU_FORCE_RVV 0
+#  define CV_CPU_HAS_SUPPORT_RVV (cv::checkHardwareSupport(CV_CPU_RVV))
+#  define CV_CPU_CALL_RVV(fn, args) if (CV_CPU_HAS_SUPPORT_RVV) return (opt_RVV::fn args)
+#  define CV_CPU_CALL_RVV_(fn, args) if (CV_CPU_HAS_SUPPORT_RVV) return (opt_RVV::fn args)
+#else
+#  define CV_TRY_RVV 0
+#  define CV_CPU_FORCE_RVV 0
+#  define CV_CPU_HAS_SUPPORT_RVV 0
+#  define CV_CPU_CALL_RVV(fn, args)
+#  define CV_CPU_CALL_RVV_(fn, args)
+#endif
+#define __CV_CPU_DISPATCH_CHAIN_RVV(fn, args, mode, ...)  CV_CPU_CALL_RVV(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
+
+#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
+#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...)  CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
--- a/3rdparty/opencv/inc/opencv2/core/cvdef.h
+++ b/3rdparty/opencv/inc/opencv2/core/cvdef.h
@@ -0,0 +1,967 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVDEF_H
+#define OPENCV_CORE_CVDEF_H
+
+#include "opencv2/core/version.hpp"
+
+//! @addtogroup core_utils
+//! @{
+
+#ifdef OPENCV_INCLUDE_PORT_FILE  // User-provided header file with custom platform configuration
+#include OPENCV_INCLUDE_PORT_FILE
+#endif
+
+#if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
+#if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
+    (defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
+// Guard to prevent using of binary incompatible binaries / runtimes
+// https://github.com/opencv/opencv/pull/9161
+#define CV__DEBUG_NS_BEGIN namespace debug_build_guard {
+#define CV__DEBUG_NS_END }
+namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; }
+#endif
+#endif
+
+#ifndef CV__DEBUG_NS_BEGIN
+#define CV__DEBUG_NS_BEGIN
+#define CV__DEBUG_NS_END
+#endif
+
+
+#ifdef __OPENCV_BUILD
+#include "cvconfig.h"
+#endif
+
+#ifndef __CV_EXPAND
+#define __CV_EXPAND(x) x
+#endif
+
+#ifndef __CV_CAT
+#define __CV_CAT__(x, y) x ## y
+#define __CV_CAT_(x, y) __CV_CAT__(x, y)
+#define __CV_CAT(x, y) __CV_CAT_(x, y)
+#endif
+
+#define __CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
+#define __CV_VA_NUM_ARGS(...) __CV_EXPAND(__CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
+
+#ifdef CV_Func
+// keep current value (through OpenCV port file)
+#elif defined __GNUC__ || (defined (__cpluscplus) && (__cpluscplus >= 201103))
+#define CV_Func __func__
+#elif defined __clang__ && (__clang_minor__ * 100 + __clang_major__ >= 305)
+#define CV_Func __func__
+#elif defined(__STDC_VERSION__) && (__STDC_VERSION >= 199901)
+#define CV_Func __func__
+#elif defined _MSC_VER
+#define CV_Func __FUNCTION__
+#elif defined(__INTEL_COMPILER) && (_INTEL_COMPILER >= 600)
+#define CV_Func __FUNCTION__
+#elif defined __IBMCPP__ && __IBMCPP__ >=500
+#define CV_Func __FUNCTION__
+#elif defined __BORLAND__ && (__BORLANDC__ >= 0x550)
+#define CV_Func __FUNC__
+#else
+#define CV_Func "<unknown>"
+#endif
+
+//! @cond IGNORED
+
+//////////////// static assert /////////////////
+#define CVAUX_CONCAT_EXP(a, b) a##b
+#define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
+
+#if defined(__clang__)
+#  ifndef __has_extension
+#    define __has_extension __has_feature /* compatibility, for older versions of clang */
+#  endif
+#  if __has_extension(cxx_static_assert)
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  elif __has_extension(c_static_assert)
+#    define CV_StaticAssert(condition, reason)    _Static_assert((condition), reason " " #condition)
+#  endif
+#elif defined(__GNUC__)
+#  if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  endif
+#elif defined(_MSC_VER)
+#  if _MSC_VER >= 1600 /* MSVC 10 */
+#    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
+#  endif
+#endif
+#ifndef CV_StaticAssert
+#  if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
+#    define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
+#  else
+namespace cv {
+     template <bool x> struct CV_StaticAssert_failed;
+     template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
+     template<int x> struct CV_StaticAssert_test {};
+}
+#    define CV_StaticAssert(condition, reason)\
+       typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
+#  endif
+#endif
+
+// Suppress warning "-Wdeprecated-declarations" / C4996
+#if defined(_MSC_VER)
+    #define CV_DO_PRAGMA(x) __pragma(x)
+#elif defined(__GNUC__)
+    #define CV_DO_PRAGMA(x) _Pragma (#x)
+#else
+    #define CV_DO_PRAGMA(x)
+#endif
+
+#ifdef _MSC_VER
+#define CV_SUPPRESS_DEPRECATED_START \
+    CV_DO_PRAGMA(warning(push)) \
+    CV_DO_PRAGMA(warning(disable: 4996))
+#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
+#elif defined (__clang__) || ((__GNUC__)  && (__GNUC__*100 + __GNUC_MINOR__ > 405))
+#define CV_SUPPRESS_DEPRECATED_START \
+    CV_DO_PRAGMA(GCC diagnostic push) \
+    CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
+#define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
+#else
+#define CV_SUPPRESS_DEPRECATED_START
+#define CV_SUPPRESS_DEPRECATED_END
+#endif
+
+#define CV_UNUSED(name) (void)name
+
+//! @endcond
+
+// undef problematic defines sometimes defined by system headers (windows.h in particular)
+#undef small
+#undef min
+#undef max
+#undef abs
+#undef Complex
+
+#if defined __cplusplus
+#include <limits>
+#else
+#include <limits.h>
+#endif
+
+#include "opencv2/core/hal/interface.h"
+
+#if defined __ICL
+#  define CV_ICC   __ICL
+#elif defined __ICC
+#  define CV_ICC   __ICC
+#elif defined __ECL
+#  define CV_ICC   __ECL
+#elif defined __ECC
+#  define CV_ICC   __ECC
+#elif defined __INTEL_COMPILER
+#  define CV_ICC   __INTEL_COMPILER
+#endif
+
+#ifndef CV_INLINE
+#  if defined __cplusplus
+#    define CV_INLINE static inline
+#  elif defined _MSC_VER
+#    define CV_INLINE __inline
+#  else
+#    define CV_INLINE static
+#  endif
+#endif
+
+#ifndef CV_ALWAYS_INLINE
+#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+#define CV_ALWAYS_INLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#define CV_ALWAYS_INLINE __forceinline
+#else
+#define CV_ALWAYS_INLINE inline
+#endif
+#endif
+
+#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
+#  define CV_ENABLE_UNROLLED 0
+#else
+#  define CV_ENABLE_UNROLLED 1
+#endif
+
+#ifdef __GNUC__
+#  define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
+#elif defined _MSC_VER
+#  define CV_DECL_ALIGNED(x) __declspec(align(x))
+#else
+#  define CV_DECL_ALIGNED(x)
+#endif
+
+/* CPU features and intrinsics support */
+#define CV_CPU_NONE             0
+#define CV_CPU_MMX              1
+#define CV_CPU_SSE              2
+#define CV_CPU_SSE2             3
+#define CV_CPU_SSE3             4
+#define CV_CPU_SSSE3            5
+#define CV_CPU_SSE4_1           6
+#define CV_CPU_SSE4_2           7
+#define CV_CPU_POPCNT           8
+#define CV_CPU_FP16             9
+#define CV_CPU_AVX              10
+#define CV_CPU_AVX2             11
+#define CV_CPU_FMA3             12
+
+#define CV_CPU_AVX_512F         13
+#define CV_CPU_AVX_512BW        14
+#define CV_CPU_AVX_512CD        15
+#define CV_CPU_AVX_512DQ        16
+#define CV_CPU_AVX_512ER        17
+#define CV_CPU_AVX_512IFMA512   18 // deprecated
+#define CV_CPU_AVX_512IFMA      18
+#define CV_CPU_AVX_512PF        19
+#define CV_CPU_AVX_512VBMI      20
+#define CV_CPU_AVX_512VL        21
+#define CV_CPU_AVX_512VBMI2     22
+#define CV_CPU_AVX_512VNNI      23
+#define CV_CPU_AVX_512BITALG    24
+#define CV_CPU_AVX_512VPOPCNTDQ 25
+#define CV_CPU_AVX_5124VNNIW    26
+#define CV_CPU_AVX_5124FMAPS    27
+
+#define CV_CPU_NEON             100
+
+#define CV_CPU_MSA              150
+
+#define CV_CPU_RISCVV           170
+
+#define CV_CPU_VSX              200
+#define CV_CPU_VSX3             201
+
+#define CV_CPU_RVV              210
+
+// CPU features groups
+#define CV_CPU_AVX512_SKX       256
+#define CV_CPU_AVX512_COMMON    257
+#define CV_CPU_AVX512_KNL       258
+#define CV_CPU_AVX512_KNM       259
+#define CV_CPU_AVX512_CNL       260
+#define CV_CPU_AVX512_CLX       261
+#define CV_CPU_AVX512_ICL       262
+
+// when adding to this list remember to update the following enum
+#define CV_HARDWARE_MAX_FEATURE 512
+
+/** @brief Available CPU features.
+*/
+enum CpuFeatures {
+    CPU_MMX             = 1,
+    CPU_SSE             = 2,
+    CPU_SSE2            = 3,
+    CPU_SSE3            = 4,
+    CPU_SSSE3           = 5,
+    CPU_SSE4_1          = 6,
+    CPU_SSE4_2          = 7,
+    CPU_POPCNT          = 8,
+    CPU_FP16            = 9,
+    CPU_AVX             = 10,
+    CPU_AVX2            = 11,
+    CPU_FMA3            = 12,
+
+    CPU_AVX_512F        = 13,
+    CPU_AVX_512BW       = 14,
+    CPU_AVX_512CD       = 15,
+    CPU_AVX_512DQ       = 16,
+    CPU_AVX_512ER       = 17,
+    CPU_AVX_512IFMA512  = 18, // deprecated
+    CPU_AVX_512IFMA     = 18,
+    CPU_AVX_512PF       = 19,
+    CPU_AVX_512VBMI     = 20,
+    CPU_AVX_512VL       = 21,
+    CPU_AVX_512VBMI2    = 22,
+    CPU_AVX_512VNNI     = 23,
+    CPU_AVX_512BITALG   = 24,
+    CPU_AVX_512VPOPCNTDQ= 25,
+    CPU_AVX_5124VNNIW   = 26,
+    CPU_AVX_5124FMAPS   = 27,
+
+    CPU_NEON            = 100,
+
+    CPU_MSA             = 150,
+
+    CPU_RISCVV          = 170,
+
+    CPU_VSX             = 200,
+    CPU_VSX3            = 201,
+
+    CPU_RVV             = 210,
+
+    CPU_AVX512_SKX      = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
+    CPU_AVX512_COMMON   = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512
+    CPU_AVX512_KNL      = 258, //!< Knights Landing with AVX-512F/CD/ER/PF
+    CPU_AVX512_KNM      = 259, //!< Knights Mill with AVX-512F/CD/ER/PF/4FMAPS/4VNNIW/VPOPCNTDQ
+    CPU_AVX512_CNL      = 260, //!< Cannon Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI
+    CPU_AVX512_CLX      = 261, //!< Cascade Lake with AVX-512F/CD/BW/DQ/VL/VNNI
+    CPU_AVX512_ICL      = 262, //!< Ice Lake with AVX-512F/CD/BW/DQ/VL/IFMA/VBMI/VNNI/VBMI2/BITALG/VPOPCNTDQ
+
+    CPU_MAX_FEATURE     = 512  // see CV_HARDWARE_MAX_FEATURE
+};
+
+
+#include "cv_cpu_dispatch.h"
+
+#if !defined(CV_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
+// int*, int64* should be propertly aligned pointers on ARMv7
+#define CV_STRONG_ALIGNMENT 1
+#endif
+#if !defined(CV_STRONG_ALIGNMENT)
+#define CV_STRONG_ALIGNMENT 0
+#endif
+
+/* fundamental constants */
+#define CV_PI   3.1415926535897932384626433832795
+#define CV_2PI  6.283185307179586476925286766559
+#define CV_LOG2 0.69314718055994530941723212145818
+
+#if defined __ARM_FP16_FORMAT_IEEE \
+    && !defined __CUDACC__
+#  define CV_FP16_TYPE 1
+#else
+#  define CV_FP16_TYPE 0
+#endif
+
+typedef union Cv16suf
+{
+    short i;
+    ushort u;
+#if CV_FP16_TYPE
+    __fp16 h;
+#endif
+}
+Cv16suf;
+
+typedef union Cv32suf
+{
+    int i;
+    unsigned u;
+    float f;
+}
+Cv32suf;
+
+typedef union Cv64suf
+{
+    int64 i;
+    uint64 u;
+    double f;
+}
+Cv64suf;
+
+#ifndef OPENCV_ABI_COMPATIBILITY
+#define OPENCV_ABI_COMPATIBILITY 400
+#endif
+
+#ifdef __OPENCV_BUILD
+#  define DISABLE_OPENCV_3_COMPATIBILITY
+#  define OPENCV_DISABLE_DEPRECATED_COMPATIBILITY
+#endif
+
+#ifndef CV_EXPORTS
+# if (defined _WIN32 || defined WINCE || defined __CYGWIN__) && defined(CVAPI_EXPORTS)
+#   define CV_EXPORTS __declspec(dllexport)
+# elif defined __GNUC__ && __GNUC__ >= 4 && (defined(CVAPI_EXPORTS) || defined(__APPLE__))
+#   define CV_EXPORTS __attribute__ ((visibility ("default")))
+# endif
+#endif
+
+#ifndef CV_EXPORTS
+# define CV_EXPORTS
+#endif
+
+#ifdef _MSC_VER
+#   define CV_EXPORTS_TEMPLATE
+#else
+#   define CV_EXPORTS_TEMPLATE CV_EXPORTS
+#endif
+
+#ifndef CV_DEPRECATED
+#  if defined(__GNUC__)
+#    define CV_DEPRECATED __attribute__ ((deprecated))
+#  elif defined(_MSC_VER)
+#    define CV_DEPRECATED __declspec(deprecated)
+#  else
+#    define CV_DEPRECATED
+#  endif
+#endif
+
+#ifndef CV_DEPRECATED_EXTERNAL
+#  if defined(__OPENCV_BUILD)
+#    define CV_DEPRECATED_EXTERNAL /* nothing */
+#  else
+#    define CV_DEPRECATED_EXTERNAL CV_DEPRECATED
+#  endif
+#endif
+
+
+#ifndef CV_EXTERN_C
+#  ifdef __cplusplus
+#    define CV_EXTERN_C extern "C"
+#  else
+#    define CV_EXTERN_C
+#  endif
+#endif
+
+/* special informative macros for wrapper generators */
+#define CV_EXPORTS_W CV_EXPORTS
+#define CV_EXPORTS_W_SIMPLE CV_EXPORTS
+#define CV_EXPORTS_AS(synonym) CV_EXPORTS
+#define CV_EXPORTS_W_MAP CV_EXPORTS
+#define CV_IN_OUT
+#define CV_OUT
+#define CV_PROP
+#define CV_PROP_RW
+#define CV_WRAP
+#define CV_WRAP_AS(synonym)
+#define CV_WRAP_MAPPABLE(mappable)
+#define CV_WRAP_PHANTOM(phantom_header)
+#define CV_WRAP_DEFAULT(val)
+
+/****************************************************************************************\
+*                                  Matrix type (Mat)                                     *
+\****************************************************************************************/
+
+#define CV_MAT_CN_MASK          ((CV_CN_MAX - 1) << CV_CN_SHIFT)
+#define CV_MAT_CN(flags)        ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
+#define CV_MAT_TYPE_MASK        (CV_DEPTH_MAX*CV_CN_MAX - 1)
+#define CV_MAT_TYPE(flags)      ((flags) & CV_MAT_TYPE_MASK)
+#define CV_MAT_CONT_FLAG_SHIFT  14
+#define CV_MAT_CONT_FLAG        (1 << CV_MAT_CONT_FLAG_SHIFT)
+#define CV_IS_MAT_CONT(flags)   ((flags) & CV_MAT_CONT_FLAG)
+#define CV_IS_CONT_MAT          CV_IS_MAT_CONT
+#define CV_SUBMAT_FLAG_SHIFT    15
+#define CV_SUBMAT_FLAG          (1 << CV_SUBMAT_FLAG_SHIFT)
+#define CV_IS_SUBMAT(flags)     ((flags) & CV_MAT_SUBMAT_FLAG)
+
+/** Size of each channel item,
+   0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
+#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)
+
+#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))
+
+#ifndef MIN
+#  define MIN(a,b)  ((a) > (b) ? (b) : (a))
+#endif
+
+#ifndef MAX
+#  define MAX(a,b)  ((a) < (b) ? (b) : (a))
+#endif
+
+///////////////////////////////////////// Enum operators ///////////////////////////////////////
+
+/**
+
+Provides compatibility operators for both classical and C++11 enum classes,
+as well as exposing the C++11 enum class members for backwards compatibility
+
+@code
+    // Provides operators required for flag enums
+    CV_ENUM_FLAGS(AccessFlag)
+
+    // Exposes the listed members of the enum class AccessFlag to the current namespace
+    CV_ENUM_CLASS_EXPOSE(AccessFlag, ACCESS_READ [, ACCESS_WRITE [, ...] ]);
+@endcode
+*/
+
+#define __CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST)                                              \
+static const EnumType MEMBER_CONST = EnumType::MEMBER_CONST;                                          \
+
+#define __CV_ENUM_CLASS_EXPOSE_2(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_1(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_3(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_2(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_4(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_3(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_5(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_4(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_6(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_5(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_7(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_6(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_8(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_7(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_CLASS_EXPOSE_9(EnumType, MEMBER_CONST, ...)                                         \
+__CV_ENUM_CLASS_EXPOSE_1(EnumType, MEMBER_CONST);                                                     \
+__CV_EXPAND(__CV_ENUM_CLASS_EXPOSE_8(EnumType, __VA_ARGS__));                                         \
+
+#define __CV_ENUM_FLAGS_LOGICAL_NOT(EnumType)                                                         \
+static inline bool operator!(const EnumType& val)                                                     \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return !static_cast<UnderlyingType>(val);                                                         \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_LOGICAL_NOT_EQ(Arg1Type, Arg2Type)                                            \
+static inline bool operator!=(const Arg1Type& a, const Arg2Type& b)                                   \
+{                                                                                                     \
+    return static_cast<int>(a) != static_cast<int>(b);                                                \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_LOGICAL_EQ(Arg1Type, Arg2Type)                                                \
+static inline bool operator==(const Arg1Type& a, const Arg2Type& b)                                   \
+{                                                                                                     \
+    return static_cast<int>(a) == static_cast<int>(b);                                                \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_NOT(EnumType)                                                         \
+static inline EnumType operator~(const EnumType& val)                                                 \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return static_cast<EnumType>(~static_cast<UnderlyingType>(val));                                  \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_OR(EnumType, Arg1Type, Arg2Type)                                      \
+static inline EnumType operator|(const Arg1Type& a, const Arg2Type& b)                                \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return static_cast<EnumType>(static_cast<UnderlyingType>(a) | static_cast<UnderlyingType>(b));    \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_AND(EnumType, Arg1Type, Arg2Type)                                     \
+static inline EnumType operator&(const Arg1Type& a, const Arg2Type& b)                                \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return static_cast<EnumType>(static_cast<UnderlyingType>(a) & static_cast<UnderlyingType>(b));    \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_XOR(EnumType, Arg1Type, Arg2Type)                                     \
+static inline EnumType operator^(const Arg1Type& a, const Arg2Type& b)                                \
+{                                                                                                     \
+    typedef std::underlying_type<EnumType>::type UnderlyingType;                                      \
+    return static_cast<EnumType>(static_cast<UnderlyingType>(a) ^ static_cast<UnderlyingType>(b));    \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_OR_EQ(EnumType, Arg1Type)                                             \
+static inline EnumType& operator|=(EnumType& _this, const Arg1Type& val)                              \
+{                                                                                                     \
+    _this = static_cast<EnumType>(static_cast<int>(_this) | static_cast<int>(val));                   \
+    return _this;                                                                                     \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_AND_EQ(EnumType, Arg1Type)                                            \
+static inline EnumType& operator&=(EnumType& _this, const Arg1Type& val)                              \
+{                                                                                                     \
+    _this = static_cast<EnumType>(static_cast<int>(_this) & static_cast<int>(val));                   \
+    return _this;                                                                                     \
+}                                                                                                     \
+
+#define __CV_ENUM_FLAGS_BITWISE_XOR_EQ(EnumType, Arg1Type)                                            \
+static inline EnumType& operator^=(EnumType& _this, const Arg1Type& val)                              \
+{                                                                                                     \
+    _this = static_cast<EnumType>(static_cast<int>(_this) ^ static_cast<int>(val));                   \
+    return _this;                                                                                     \
+}                                                                                                     \
+
+#define CV_ENUM_CLASS_EXPOSE(EnumType, ...)                                                           \
+__CV_EXPAND(__CV_CAT(__CV_ENUM_CLASS_EXPOSE_, __CV_VA_NUM_ARGS(__VA_ARGS__))(EnumType, __VA_ARGS__)); \
+
+#define CV_ENUM_FLAGS(EnumType)                                                                       \
+__CV_ENUM_FLAGS_LOGICAL_NOT      (EnumType)                                                           \
+__CV_ENUM_FLAGS_LOGICAL_EQ       (EnumType, int)                                                      \
+__CV_ENUM_FLAGS_LOGICAL_NOT_EQ   (EnumType, int)                                                      \
+                                                                                                      \
+__CV_ENUM_FLAGS_BITWISE_NOT      (EnumType)                                                           \
+__CV_ENUM_FLAGS_BITWISE_OR       (EnumType, EnumType, EnumType)                                       \
+__CV_ENUM_FLAGS_BITWISE_AND      (EnumType, EnumType, EnumType)                                       \
+__CV_ENUM_FLAGS_BITWISE_XOR      (EnumType, EnumType, EnumType)                                       \
+                                                                                                      \
+__CV_ENUM_FLAGS_BITWISE_OR_EQ    (EnumType, EnumType)                                                 \
+__CV_ENUM_FLAGS_BITWISE_AND_EQ   (EnumType, EnumType)                                                 \
+__CV_ENUM_FLAGS_BITWISE_XOR_EQ   (EnumType, EnumType)                                                 \
+
+/****************************************************************************************\
+*                                    static analysys                                     *
+\****************************************************************************************/
+
+// In practice, some macro are not processed correctly (noreturn is not detected).
+// We need to use simplified definition for them.
+#ifndef CV_STATIC_ANALYSIS
+# if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__)
+#   define CV_STATIC_ANALYSIS 1
+# endif
+#else
+# if defined(CV_STATIC_ANALYSIS) && !(__CV_CAT(1, CV_STATIC_ANALYSIS) == 1)  // defined and not empty
+#   if 0 == CV_STATIC_ANALYSIS
+#     undef CV_STATIC_ANALYSIS
+#   endif
+# endif
+#endif
+
+/****************************************************************************************\
+*                                    Thread sanitizer                                    *
+\****************************************************************************************/
+#ifndef CV_THREAD_SANITIZER
+# if defined(__has_feature)
+#   if __has_feature(thread_sanitizer)
+#     define CV_THREAD_SANITIZER
+#   endif
+# endif
+#endif
+
+/****************************************************************************************\
+*          exchange-add operation for atomic operations on reference counters            *
+\****************************************************************************************/
+
+#ifdef CV_XADD
+  // allow to use user-defined macro
+#elif defined __GNUC__ || defined __clang__
+#  if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)  && !defined __INTEL_COMPILER
+#    ifdef __ATOMIC_ACQ_REL
+#      define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
+#    else
+#      define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
+#    endif
+#  else
+#    if defined __ATOMIC_ACQ_REL && !defined __clang__
+       // version for gcc >= 4.7
+#      define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
+#    else
+#      define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
+#    endif
+#  endif
+#elif defined _MSC_VER && !defined RC_INVOKED
+#  include <intrin.h>
+#  define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
+#else
+  #ifdef OPENCV_FORCE_UNSAFE_XADD
+    CV_INLINE int CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
+  #else
+    #error "OpenCV: can't define safe CV_XADD macro for current platform (unsupported). Define CV_XADD macro through custom port header (see OPENCV_INCLUDE_PORT_FILE)"
+  #endif
+#endif
+
+
+/****************************************************************************************\
+*                                  CV_NORETURN attribute                                 *
+\****************************************************************************************/
+
+#ifndef CV_NORETURN
+#  if defined(__GNUC__)
+#    define CV_NORETURN __attribute__((__noreturn__))
+#  elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+#    define CV_NORETURN __declspec(noreturn)
+#  else
+#    define CV_NORETURN /* nothing by default */
+#  endif
+#endif
+
+/****************************************************************************************\
+*                       CV_NODISCARD_STD attribute (C++17)                               *
+* encourages the compiler to issue a warning if the return value is discarded            *
+\****************************************************************************************/
+#ifndef CV_NODISCARD_STD
+#  ifndef __has_cpp_attribute
+//   workaround preprocessor non-compliance https://reviews.llvm.org/D57851
+#    define __has_cpp_attribute(__x) 0
+#  endif
+#  if __has_cpp_attribute(nodiscard)
+#    define CV_NODISCARD_STD [[nodiscard]]
+#  elif __cplusplus >= 201703L
+//   available when compiler is C++17 compliant
+#    define CV_NODISCARD_STD [[nodiscard]]
+#  elif defined(__INTEL_COMPILER)
+     // see above, available when C++17 is enabled
+#  elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L
+//   available with VS2017 v15.3+ with /std:c++17 or higher; works on functions and classes
+#    define CV_NODISCARD_STD [[nodiscard]]
+#  elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 700) && (__cplusplus >= 201103L)
+//   available with GCC 7.0+; works on functions, works or silently fails on classes
+#    define CV_NODISCARD_STD [[nodiscard]]
+#  elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 408) && (__cplusplus >= 201103L)
+//   available with GCC 4.8+ but it usually does nothing and can fail noisily -- therefore not used
+//   define CV_NODISCARD_STD [[gnu::warn_unused_result]]
+#  endif
+#endif
+#ifndef CV_NODISCARD_STD
+#  define CV_NODISCARD_STD /* nothing by default */
+#endif
+
+
+/****************************************************************************************\
+*                      CV_NODISCARD attribute (deprecated, GCC only)                     *
+* DONT USE: use instead the standard CV_NODISCARD_STD macro above                        *
+*           this legacy method silently fails to issue warning until some version        *
+*           after gcc 6.3.0. Yet with gcc 7+ you can use the above standard method       *
+*           which makes this method useless. Don't use it.                               *
+* @deprecated use instead CV_NODISCARD_STD                                               *
+\****************************************************************************************/
+#ifndef CV_NODISCARD
+#  if defined(__GNUC__)
+#    define CV_NODISCARD __attribute__((__warn_unused_result__))
+#  elif defined(__clang__) && defined(__has_attribute)
+#    if __has_attribute(__warn_unused_result__)
+#      define CV_NODISCARD __attribute__((__warn_unused_result__))
+#    endif
+#  endif
+#endif
+#ifndef CV_NODISCARD
+#  define CV_NODISCARD /* nothing by default */
+#endif
+
+
+/****************************************************************************************\
+*                                    C++ 11                                              *
+\****************************************************************************************/
+#ifndef CV_CXX11
+#  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
+#    define CV_CXX11 1
+#  endif
+#else
+#  if CV_CXX11 == 0
+#    undef CV_CXX11
+#  endif
+#endif
+#ifndef CV_CXX11
+#  error "OpenCV 4.x+ requires enabled C++11 support"
+#endif
+
+#define CV_CXX_MOVE_SEMANTICS 1
+#define CV_CXX_MOVE(x) std::move(x)
+#define CV_CXX_STD_ARRAY 1
+#include <array>
+#ifndef CV_OVERRIDE
+#  define CV_OVERRIDE override
+#endif
+#ifndef CV_FINAL
+#  define CV_FINAL final
+#endif
+
+#ifndef CV_NOEXCEPT
+#  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+#    define CV_NOEXCEPT noexcept
+#  endif
+#endif
+#ifndef CV_NOEXCEPT
+#  define CV_NOEXCEPT
+#endif
+
+#ifndef CV_CONSTEXPR
+#  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+#    define CV_CONSTEXPR constexpr
+#  endif
+#endif
+#ifndef CV_CONSTEXPR
+#  define CV_CONSTEXPR
+#endif
+
+// Integer types portatibility
+#ifdef OPENCV_STDINT_HEADER
+#include OPENCV_STDINT_HEADER
+#elif defined(__cplusplus)
+#if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
+namespace cv {
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef signed __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+}
+#elif defined(_MSC_VER) || __cplusplus >= 201103L
+#include <cstdint>
+namespace cv {
+using std::int8_t;
+using std::uint8_t;
+using std::int16_t;
+using std::uint16_t;
+using std::int32_t;
+using std::uint32_t;
+using std::int64_t;
+using std::uint64_t;
+}
+#else
+#include <stdint.h>
+namespace cv {
+typedef ::int8_t int8_t;
+typedef ::uint8_t uint8_t;
+typedef ::int16_t int16_t;
+typedef ::uint16_t uint16_t;
+typedef ::int32_t int32_t;
+typedef ::uint32_t uint32_t;
+typedef ::int64_t int64_t;
+typedef ::uint64_t uint64_t;
+}
+#endif
+#else // pure C
+#include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+namespace cv
+{
+
+class float16_t
+{
+public:
+#if CV_FP16_TYPE
+
+    float16_t() : h(0) {}
+    explicit float16_t(float x) { h = (__fp16)x; }
+    operator float() const { return (float)h; }
+    static float16_t fromBits(ushort w)
+    {
+        Cv16suf u;
+        u.u = w;
+        float16_t result;
+        result.h = u.h;
+        return result;
+    }
+    static float16_t zero()
+    {
+        float16_t result;
+        result.h = (__fp16)0;
+        return result;
+    }
+    ushort bits() const
+    {
+        Cv16suf u;
+        u.h = h;
+        return u.u;
+    }
+protected:
+    __fp16 h;
+
+#else
+    float16_t() : w(0) {}
+    explicit float16_t(float x)
+    {
+    #if CV_FP16
+        __m128 v = _mm_load_ss(&x);
+        w = (ushort)_mm_cvtsi128_si32(_mm_cvtps_ph(v, 0));
+    #else
+        Cv32suf in;
+        in.f = x;
+        unsigned sign = in.u & 0x80000000;
+        in.u ^= sign;
+
+        if( in.u >= 0x47800000 )
+            w = (ushort)(in.u > 0x7f800000 ? 0x7e00 : 0x7c00);
+        else
+        {
+            if (in.u < 0x38800000)
+            {
+                in.f += 0.5f;
+                w = (ushort)(in.u - 0x3f000000);
+            }
+            else
+            {
+                unsigned t = in.u + 0xc8000fff;
+                w = (ushort)((t + ((in.u >> 13) & 1)) >> 13);
+            }
+        }
+
+        w = (ushort)(w | (sign >> 16));
+    #endif
+    }
+
+    operator float() const
+    {
+    #if CV_FP16
+        float f;
+        _mm_store_ss(&f, _mm_cvtph_ps(_mm_cvtsi32_si128(w)));
+        return f;
+    #else
+        Cv32suf out;
+
+        unsigned t = ((w & 0x7fff) << 13) + 0x38000000;
+        unsigned sign = (w & 0x8000) << 16;
+        unsigned e = w & 0x7c00;
+
+        out.u = t + (1 << 23);
+        out.u = (e >= 0x7c00 ? t + 0x38000000 :
+                 e == 0 ? (static_cast<void>(out.f -= 6.103515625e-05f), out.u) : t) | sign;
+        return out.f;
+    #endif
+    }
+
+    static float16_t fromBits(ushort b)
+    {
+        float16_t result;
+        result.w = b;
+        return result;
+    }
+    static float16_t zero()
+    {
+        float16_t result;
+        result.w = (ushort)0;
+        return result;
+    }
+    ushort bits() const { return w; }
+protected:
+    ushort w;
+
+#endif
+};
+
+}
+#endif
+
+//! @}
+
+#ifndef __cplusplus
+#include "opencv2/core/fast_math.hpp" // define cvRound(double)
+#endif
+
+#endif // OPENCV_CORE_CVDEF_H
--- a/3rdparty/opencv/inc/opencv2/core/cvstd.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cvstd.hpp
@@ -0,0 +1,190 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVSTD_HPP
+#define OPENCV_CORE_CVSTD_HPP
+
+#ifndef __cplusplus
+#  error cvstd.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core/cvdef.h"
+#include <cstddef>
+#include <cstring>
+#include <cctype>
+
+#include <string>
+
+// import useful primitives from stl
+#  include <algorithm>
+#  include <utility>
+#  include <cstdlib> //for abs(int)
+#  include <cmath>
+
+namespace cv
+{
+    static inline uchar abs(uchar a) { return a; }
+    static inline ushort abs(ushort a) { return a; }
+    static inline unsigned abs(unsigned a) { return a; }
+    static inline uint64 abs(uint64 a) { return a; }
+
+    using std::min;
+    using std::max;
+    using std::abs;
+    using std::swap;
+    using std::sqrt;
+    using std::exp;
+    using std::pow;
+    using std::log;
+}
+
+#include "cvstd_wrapper.hpp"
+
+namespace cv {
+
+//! @addtogroup core_utils
+//! @{
+
+//////////////////////////// memory management functions ////////////////////////////
+
+/** @brief Allocates an aligned memory buffer.
+
+The function allocates the buffer of the specified size and returns it. When the buffer size is 16
+bytes or more, the returned buffer is aligned to 16 bytes.
+@param bufSize Allocated buffer size.
+ */
+CV_EXPORTS void* fastMalloc(size_t bufSize);
+
+/** @brief Deallocates a memory buffer.
+
+The function deallocates the buffer allocated with fastMalloc . If NULL pointer is passed, the
+function does nothing. C version of the function clears the pointer *pptr* to avoid problems with
+double memory deallocation.
+@param ptr Pointer to the allocated buffer.
+ */
+CV_EXPORTS void fastFree(void* ptr);
+
+/*!
+  The STL-compliant memory Allocator based on cv::fastMalloc() and cv::fastFree()
+*/
+template<typename _Tp> class Allocator
+{
+public:
+    typedef _Tp value_type;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    template<typename U> class rebind { typedef Allocator<U> other; };
+
+    explicit Allocator() {}
+    ~Allocator() {}
+    explicit Allocator(Allocator const&) {}
+    template<typename U>
+    explicit Allocator(Allocator<U> const&) {}
+
+    // address
+    pointer address(reference r) { return &r; }
+    const_pointer address(const_reference r) { return &r; }
+
+    pointer allocate(size_type count, const void* =0) { return reinterpret_cast<pointer>(fastMalloc(count * sizeof (_Tp))); }
+    void deallocate(pointer p, size_type) { fastFree(p); }
+
+    void construct(pointer p, const _Tp& v) { new(static_cast<void*>(p)) _Tp(v); }
+    void destroy(pointer p) { p->~_Tp(); }
+
+    size_type max_size() const { return cv::max(static_cast<_Tp>(-1)/sizeof(_Tp), 1); }
+};
+
+//! @} core_utils
+
+//! @endcond
+
+//! @addtogroup core_basic
+//! @{
+
+//////////////////////////////// string class ////////////////////////////////
+
+class CV_EXPORTS FileNode; //for string constructor from FileNode
+
+typedef std::string String;
+
+#ifndef OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
+
+//! @cond IGNORED
+namespace details {
+// std::tolower is int->int
+static inline char char_tolower(char ch)
+{
+    return (char)std::tolower((int)ch);
+}
+// std::toupper is int->int
+static inline char char_toupper(char ch)
+{
+    return (char)std::toupper((int)ch);
+}
+} // namespace details
+//! @endcond
+
+static inline std::string toLowerCase(const std::string& str)
+{
+    std::string result(str);
+    std::transform(result.begin(), result.end(), result.begin(), details::char_tolower);
+    return result;
+}
+
+static inline std::string toUpperCase(const std::string& str)
+{
+    std::string result(str);
+    std::transform(result.begin(), result.end(), result.begin(), details::char_toupper);
+    return result;
+}
+
+#endif // OPENCV_DISABLE_STRING_LOWER_UPPER_CONVERSIONS
+
+//! @} core_basic
+} // cv
+
+#endif //OPENCV_CORE_CVSTD_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cvstd.inl.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cvstd.inl.hpp
@@ -0,0 +1,197 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CVSTDINL_HPP
+#define OPENCV_CORE_CVSTDINL_HPP
+
+#include <complex>
+#include <ostream>
+#include <sstream>
+
+//! @cond IGNORED
+
+#ifdef _MSC_VER
+#pragma warning( push )
+#pragma warning( disable: 4127 )
+#endif
+
+namespace cv
+{
+
+template<typename _Tp> class DataType< std::complex<_Tp> >
+{
+public:
+    typedef std::complex<_Tp>  value_type;
+    typedef value_type         work_type;
+    typedef _Tp                channel_type;
+
+    enum { generic_type = 0,
+           depth        = DataType<channel_type>::depth,
+           channels     = 2,
+           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
+           type         = CV_MAKETYPE(depth, channels) };
+
+    typedef Vec<channel_type, channels> vec_type;
+};
+
+static inline
+std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
+{
+    fmtd->reset();
+    for(const char* str = fmtd->next(); str; str = fmtd->next())
+        out << str;
+    return out;
+}
+
+static inline
+std::ostream& operator << (std::ostream& out, const Mat& mtx)
+{
+    return out << Formatter::get()->format(mtx);
+}
+
+static inline
+std::ostream& operator << (std::ostream& out, const UMat& m)
+{
+    return out << m.getMat(ACCESS_READ);
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c)
+{
+    return out << "(" << c.re << "," << c.im << ")";
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
+{
+    return out << Formatter::get()->format(Mat(vec));
+}
+
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
+{
+    return out << Formatter::get()->format(Mat(vec));
+}
+
+
+template<typename _Tp, int m, int n> static inline
+std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
+{
+    return out << Formatter::get()->format(Mat(matx));
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
+{
+    out << "[" << p.x << ", " << p.y << "]";
+    return out;
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
+{
+    out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
+    return out;
+}
+
+template<typename _Tp, int n> static inline
+std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
+{
+    out << "[";
+    if (cv::traits::Depth<_Tp>::value <= CV_32S)
+    {
+        for (int i = 0; i < n - 1; ++i) {
+            out << (int)vec[i] << ", ";
+        }
+        out << (int)vec[n-1] << "]";
+    }
+    else
+    {
+        for (int i = 0; i < n - 1; ++i) {
+            out << vec[i] << ", ";
+        }
+        out << vec[n-1] << "]";
+    }
+
+    return out;
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
+{
+    return out << "[" << size.width << " x " << size.height << "]";
+}
+
+template<typename _Tp> static inline
+std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
+{
+    return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
+}
+
+static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
+{
+    int i, dims = msize.dims();
+    for( i = 0; i < dims; i++ )
+    {
+        out << msize[i];
+        if( i < dims-1 )
+            out << " x ";
+    }
+    return out;
+}
+
+static inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
+{
+    return s << "[" << r.start << " : " << r.end << ")";
+}
+
+} // cv
+
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
+//! @endcond
+
+#endif // OPENCV_CORE_CVSTDINL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/cvstd_wrapper.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/cvstd_wrapper.hpp
@@ -0,0 +1,154 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_CVSTD_WRAPPER_HPP
+#define OPENCV_CORE_CVSTD_WRAPPER_HPP
+
+#include "opencv2/core/cvdef.h"
+
+#include <string>
+#include <memory>  // std::shared_ptr
+#include <type_traits>  // std::enable_if
+
+namespace cv {
+
+using std::nullptr_t;
+
+//! @addtogroup core_basic
+//! @{
+
+#ifdef CV_DOXYGEN
+
+template <typename _Tp> using Ptr = std::shared_ptr<_Tp>;  // In ideal world it should look like this, but we need some compatibility workarounds below
+
+template<typename _Tp, typename ... A1> static inline
+Ptr<_Tp> makePtr(const A1&... a1) { return std::make_shared<_Tp>(a1...); }
+
+#else  // cv::Ptr with compatibility workarounds
+
+// It should be defined for C-API types only.
+// C++ types should use regular "delete" operator.
+template<typename Y> struct DefaultDeleter;
+#if 0
+{
+    void operator()(Y* p) const;
+};
+#endif
+
+namespace sfinae {
+template<typename C, typename Ret, typename... Args>
+struct has_parenthesis_operator
+{
+private:
+    template<typename T>
+    static CV_CONSTEXPR std::true_type has_parenthesis_operator_check(typename std::is_same<typename std::decay<decltype(std::declval<T>().operator()(std::declval<Args>()...))>::type, Ret>::type*);
+
+    template<typename> static CV_CONSTEXPR std::false_type has_parenthesis_operator_check(...);
+
+    typedef decltype(has_parenthesis_operator_check<C>(0)) type;
+
+public:
+#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
+    static CV_CONSTEXPR bool value = type::value;
+#else
+    // support MSVS 2013
+    static const int value = type::value;
+#endif
+};
+} // namespace sfinae
+
+template <typename T, typename = void>
+struct has_custom_delete
+        : public std::false_type {};
+
+// Force has_custom_delete to std::false_type when NVCC is compiling CUDA source files
+#ifndef __CUDACC__
+template <typename T>
+struct has_custom_delete<T, typename std::enable_if< sfinae::has_parenthesis_operator<DefaultDeleter<T>, void, T*>::value >::type >
+        : public std::true_type {};
+#endif
+
+template<typename T>
+struct Ptr : public std::shared_ptr<T>
+{
+#if 0
+    using std::shared_ptr<T>::shared_ptr;  // GCC 5.x can't handle this
+#else
+    inline Ptr() CV_NOEXCEPT : std::shared_ptr<T>() {}
+    inline Ptr(nullptr_t) CV_NOEXCEPT : std::shared_ptr<T>(nullptr) {}
+    template<typename Y, typename D> inline Ptr(Y* p, D d) : std::shared_ptr<T>(p, d) {}
+    template<typename D> inline Ptr(nullptr_t, D d) : std::shared_ptr<T>(nullptr, d) {}
+
+    template<typename Y> inline Ptr(const Ptr<Y>& r, T* ptr) CV_NOEXCEPT : std::shared_ptr<T>(r, ptr) {}
+
+    inline Ptr(const Ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
+    inline Ptr(Ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
+
+    template<typename Y> inline Ptr(const Ptr<Y>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
+    template<typename Y> inline Ptr(Ptr<Y>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
+#endif
+    inline Ptr(const std::shared_ptr<T>& o) CV_NOEXCEPT : std::shared_ptr<T>(o) {}
+    inline Ptr(std::shared_ptr<T>&& o) CV_NOEXCEPT : std::shared_ptr<T>(std::move(o)) {}
+
+    // Overload with custom DefaultDeleter: Ptr<IplImage>(...)
+    template<typename Y>
+    inline Ptr(const std::true_type&, Y* ptr) : std::shared_ptr<T>(ptr, DefaultDeleter<Y>()) {}
+
+    // Overload without custom deleter: Ptr<std::string>(...);
+    template<typename Y>
+    inline Ptr(const std::false_type&, Y* ptr) : std::shared_ptr<T>(ptr) {}
+
+    template<typename Y = T>
+    inline Ptr(Y* ptr) : Ptr(has_custom_delete<Y>(), ptr) {}
+
+    // Overload with custom DefaultDeleter: Ptr<IplImage>(...)
+    template<typename Y>
+    inline void reset(const std::true_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr, DefaultDeleter<Y>()); }
+
+    // Overload without custom deleter: Ptr<std::string>(...);
+    template<typename Y>
+    inline void reset(const std::false_type&, Y* ptr) { std::shared_ptr<T>::reset(ptr); }
+
+    template<typename Y>
+    inline void reset(Y* ptr) { Ptr<T>::reset(has_custom_delete<Y>(), ptr); }
+
+    template<class Y, class Deleter>
+    void reset(Y* ptr, Deleter d) { std::shared_ptr<T>::reset(ptr, d); }
+
+    void reset() CV_NOEXCEPT { std::shared_ptr<T>::reset(); }
+
+    Ptr& operator=(const Ptr& o) { std::shared_ptr<T>::operator =(o); return *this; }
+    template<typename Y> inline Ptr& operator=(const Ptr<Y>& o) { std::shared_ptr<T>::operator =(o); return *this; }
+
+    T* operator->() const CV_NOEXCEPT { return std::shared_ptr<T>::get();}
+    typename std::add_lvalue_reference<T>::type operator*() const CV_NOEXCEPT { return *std::shared_ptr<T>::get(); }
+
+    // OpenCV 3.x methods (not a part of standard C++ library)
+    inline void release() { std::shared_ptr<T>::reset(); }
+    inline operator T* () const { return std::shared_ptr<T>::get(); }
+    inline bool empty() const { return std::shared_ptr<T>::get() == nullptr; }
+
+    template<typename Y> inline
+    Ptr<Y> staticCast() const CV_NOEXCEPT { return std::static_pointer_cast<Y>(*this); }
+
+    template<typename Y> inline
+    Ptr<Y> constCast() const CV_NOEXCEPT { return std::const_pointer_cast<Y>(*this); }
+
+    template<typename Y> inline
+    Ptr<Y> dynamicCast() const CV_NOEXCEPT { return std::dynamic_pointer_cast<Y>(*this); }
+};
+
+template<typename _Tp, typename ... A1> static inline
+Ptr<_Tp> makePtr(const A1&... a1)
+{
+    static_assert( !has_custom_delete<_Tp>::value, "Can't use this makePtr with custom DefaultDeleter");
+    return (Ptr<_Tp>)std::make_shared<_Tp>(a1...);
+}
+
+#endif // CV_DOXYGEN
+
+//! @} core_basic
+} // cv
+
+#endif //OPENCV_CORE_CVSTD_WRAPPER_HPP
--- a/3rdparty/opencv/inc/opencv2/core/detail/async_promise.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/detail/async_promise.hpp
@@ -0,0 +1,71 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_ASYNC_PROMISE_HPP
+#define OPENCV_CORE_ASYNC_PROMISE_HPP
+
+#include "../async.hpp"
+
+#include "exception_ptr.hpp"
+
+namespace cv {
+
+/** @addtogroup core_async
+@{
+*/
+
+
+/** @brief Provides result of asynchronous operations
+
+*/
+class CV_EXPORTS AsyncPromise
+{
+public:
+    ~AsyncPromise() CV_NOEXCEPT;
+    AsyncPromise() CV_NOEXCEPT;
+    explicit AsyncPromise(const AsyncPromise& o) CV_NOEXCEPT;
+    AsyncPromise& operator=(const AsyncPromise& o) CV_NOEXCEPT;
+    void release() CV_NOEXCEPT;
+
+    /** Returns associated AsyncArray
+    @note Can be called once
+    */
+    AsyncArray getArrayResult();
+
+    /** Stores asynchronous result.
+    @param[in] value result
+    */
+    void setValue(InputArray value);
+
+    // TODO "move" setters
+
+#if CV__EXCEPTION_PTR
+    /** Stores exception.
+    @param[in] exception exception to be raised in AsyncArray
+    */
+    void setException(std::exception_ptr exception);
+#endif
+
+    /** Stores exception.
+    @param[in] exception exception to be raised in AsyncArray
+    */
+    void setException(const cv::Exception& exception);
+
+#ifdef CV_CXX11
+    explicit AsyncPromise(AsyncPromise&& o) { p = o.p; o.p = NULL; }
+    AsyncPromise& operator=(AsyncPromise&& o) CV_NOEXCEPT { std::swap(p, o.p); return *this; }
+#endif
+
+
+    // PImpl
+    typedef struct AsyncArray::Impl Impl; friend struct AsyncArray::Impl;
+    inline void* _getImpl() const CV_NOEXCEPT { return p; }
+protected:
+    Impl* p;
+};
+
+
+//! @}
+} // namespace
+#endif // OPENCV_CORE_ASYNC_PROMISE_HPP
--- a/3rdparty/opencv/inc/opencv2/core/detail/dispatch_helper.impl.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/detail/dispatch_helper.impl.hpp
@@ -0,0 +1,49 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP
+#define OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP
+
+//! @cond IGNORED
+
+namespace cv {
+namespace detail {
+
+template<template<typename> class Functor, typename... Args>
+static inline void depthDispatch(const int depth, Args&&... args)
+{
+    switch (depth)
+    {
+        case CV_8U:
+            Functor<uint8_t>{}(std::forward<Args>(args)...);
+            break;
+        case CV_8S:
+            Functor<int8_t>{}(std::forward<Args>(args)...);
+            break;
+        case CV_16U:
+            Functor<uint16_t>{}(std::forward<Args>(args)...);
+            break;
+        case CV_16S:
+            Functor<int16_t>{}(std::forward<Args>(args)...);
+            break;
+        case CV_32S:
+            Functor<int32_t>{}(std::forward<Args>(args)...);
+            break;
+        case CV_32F:
+            Functor<float>{}(std::forward<Args>(args)...);
+            break;
+        case CV_64F:
+            Functor<double>{}(std::forward<Args>(args)...);
+            break;
+        case CV_16F:
+        default:
+            CV_Error(cv::Error::BadDepth, "Unsupported matrix type.");
+    };
+}
+
+}}
+
+//! @endcond
+
+#endif //OPENCV_CORE_DETAIL_DISPATCH_HELPER_IMPL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/detail/exception_ptr.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/detail/exception_ptr.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
+#define OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
+
+#ifndef CV__EXCEPTION_PTR
+#  if defined(__ANDROID__) && defined(ATOMIC_INT_LOCK_FREE) && ATOMIC_INT_LOCK_FREE < 2
+#    define CV__EXCEPTION_PTR 0  // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
+#  elif defined(CV_CXX11)
+#    define CV__EXCEPTION_PTR 1
+#  elif defined(_MSC_VER)
+#    define CV__EXCEPTION_PTR (_MSC_VER >= 1600)
+#  elif defined(__clang__)
+#    define CV__EXCEPTION_PTR 0  // C++11 only (see above)
+#  elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__)
+#    define CV__EXCEPTION_PTR (__GXX_EXPERIMENTAL_CXX0X__ > 0)
+#  endif
+#endif
+#ifndef CV__EXCEPTION_PTR
+#  define CV__EXCEPTION_PTR 0
+#elif CV__EXCEPTION_PTR
+#  include <exception>  // std::exception_ptr
+#endif
+
+#endif // OPENCV_CORE_DETAILS_EXCEPTION_PTR_H
--- a/3rdparty/opencv/inc/opencv2/core/directx.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/directx.hpp
@@ -0,0 +1,184 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the copyright holders or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_DIRECTX_HPP
+#define OPENCV_CORE_DIRECTX_HPP
+
+#include "mat.hpp"
+#include "ocl.hpp"
+
+#if !defined(__d3d11_h__)
+struct ID3D11Device;
+struct ID3D11Texture2D;
+#endif
+
+#if !defined(__d3d10_h__)
+struct ID3D10Device;
+struct ID3D10Texture2D;
+#endif
+
+#if !defined(_D3D9_H_)
+struct IDirect3DDevice9;
+struct IDirect3DDevice9Ex;
+struct IDirect3DSurface9;
+#endif
+
+
+namespace cv { namespace directx {
+
+namespace ocl {
+using namespace cv::ocl;
+
+//! @addtogroup core_directx
+// This section describes OpenCL and DirectX interoperability.
+//
+// To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is
+// supported only on Windows.
+//
+// To use OpenCL functionality you should first initialize OpenCL context from DirectX resource.
+//
+//! @{
+
+// TODO static functions in the Context class
+//! @brief Creates OpenCL context from D3D11 device
+//
+//! @param pD3D11Device - pointer to D3D11 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
+
+//! @brief Creates OpenCL context from D3D10 device
+//
+//! @param pD3D10Device - pointer to D3D10 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
+
+//! @brief Creates OpenCL context from Direct3DDevice9Ex device
+//
+//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
+
+//! @brief Creates OpenCL context from Direct3DDevice9 device
+//
+//! @param pDirect3DDevice9 - pointer to Direct3Device9 device
+//! @return Returns reference to OpenCL Context
+CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
+
+//! @}
+
+} // namespace cv::directx::ocl
+
+//! @addtogroup core_directx
+//! @{
+
+//! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then
+//!        input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12.
+//
+//! @note Note: Destination texture must be allocated by application. Function does memory copy from src to
+//!             pD3D11Texture2D
+//
+//! @param src - source InputArray
+//! @param pD3D11Texture2D - destination D3D11 texture
+CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
+
+//! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then
+//!        data will be upsampled and color-converted to BGR format.
+//
+//! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size.
+//!             function does memory copy from pD3D11Texture2D to dst
+//
+//! @param pD3D11Texture2D - source D3D11 texture
+//! @param dst             - destination OutputArray
+CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
+
+//! @brief Converts InputArray to ID3D10Texture2D
+//
+//! @note Note: function does memory copy from src to
+//!             pD3D10Texture2D
+//
+//! @param src             - source InputArray
+//! @param pD3D10Texture2D - destination D3D10 texture
+CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
+
+//! @brief Converts ID3D10Texture2D to OutputArray
+//
+//! @note Note: function does memory copy from pD3D10Texture2D
+//!             to dst
+//
+//! @param pD3D10Texture2D - source D3D10 texture
+//! @param dst             - destination OutputArray
+CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
+
+//! @brief Converts InputArray to IDirect3DSurface9
+//
+//! @note Note: function does memory copy from src to
+//!             pDirect3DSurface9
+//
+//! @param src                 - source InputArray
+//! @param pDirect3DSurface9   - destination D3D10 texture
+//! @param surfaceSharedHandle - shared handle
+CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
+
+//! @brief Converts IDirect3DSurface9 to OutputArray
+//
+//! @note Note: function does memory copy from pDirect3DSurface9
+//!             to dst
+//
+//! @param pDirect3DSurface9   - source D3D10 texture
+//! @param dst                 - destination OutputArray
+//! @param surfaceSharedHandle - shared handle
+CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
+
+//! @brief Get OpenCV type from DirectX type
+//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
+//! @return OpenCV type or -1 if there is no equivalent
+CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
+
+//! @brief Get OpenCV type from DirectX type
+//! @param iD3DFORMAT - enum D3DTYPE for D3D9
+//! @return OpenCV type or -1 if there is no equivalent
+CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
+
+//! @}
+
+} } // namespace cv::directx
+
+#endif // OPENCV_CORE_DIRECTX_HPP
--- a/3rdparty/opencv/inc/opencv2/core/dualquaternion.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/dualquaternion.hpp
@@ -0,0 +1,979 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Liangqian Kong <kongliangqian@huawei.com>
+//         Longbu Wang <wanglongbu@huawei.com>
+#ifndef OPENCV_CORE_DUALQUATERNION_HPP
+#define OPENCV_CORE_DUALQUATERNION_HPP
+
+#include <opencv2/core/quaternion.hpp>
+#include <opencv2/core/affine.hpp>
+
+namespace cv{
+//! @addtogroup core
+//! @{
+
+template <typename _Tp> class DualQuat;
+template <typename _Tp> std::ostream& operator<<(std::ostream&, const DualQuat<_Tp>&);
+
+/**
+ * Dual quaternions were introduced to describe rotation together with translation while ordinary
+ * quaternions can only describe rotation. It can be used for shortest path pose interpolation,
+ * local pose optimization or volumetric deformation. More details can be found
+ * - https://en.wikipedia.org/wiki/Dual_quaternion
+ * - ["A beginners guide to dual-quaternions: what they are, how they work, and how to use them for 3D character hierarchies", Ben Kenwright, 2012](https://borodust.org/public/shared/beginner_dual_quats.pdf)
+ * - ["Dual Quaternions", Yan-Bin Jia, 2013](http://web.cs.iastate.edu/~cs577/handouts/dual-quaternion.pdf)
+ * - ["Geometric Skinning with Approximate Dual Quaternion Blending", Kavan, 2008](https://www.cs.utah.edu/~ladislav/kavan08geometric/kavan08geometric)
+ * - http://rodolphe-vaillant.fr/?e=29
+ *
+ * A unit dual quaternion can be classically represented as:
+ * \f[
+ * \begin{equation}
+ * \begin{split}
+ * \sigma &= \left(r+\frac{\epsilon}{2}tr\right)\\
+ * &= [w, x, y, z, w\_, x\_, y\_, z\_]
+ * \end{split}
+ * \end{equation}
+ * \f]
+ * where \f$r, t\f$ represents the rotation (ordinary unit quaternion) and translation (pure ordinary quaternion) respectively.
+ *
+ * A general dual quaternions which consist of two quaternions is usually represented in form of:
+ * \f[
+ * \sigma = p + \epsilon q
+ * \f]
+ * where the introduced dual unit \f$\epsilon\f$ satisfies \f$\epsilon^2 = \epsilon^3 =...=0\f$, and \f$p, q\f$ are quaternions.
+ *
+ * Alternatively, dual quaternions can also be interpreted as four components which are all [dual numbers](https://www.cs.utah.edu/~ladislav/kavan08geometric/kavan08geometric):
+ * \f[
+ * \sigma = \hat{q}_w + \hat{q}_xi + \hat{q}_yj + \hat{q}_zk
+ * \f]
+ * If we set \f$\hat{q}_x, \hat{q}_y\f$ and \f$\hat{q}_z\f$ equal to 0, a dual quaternion is transformed to a dual number. see normalize().
+ *
+ * If you want to create a dual quaternion, you can use:
+ *
+ * ```
+ * using namespace cv;
+ * double angle = CV_PI;
+ *
+ * // create from eight number
+ * DualQuatd dq1(1, 2, 3, 4, 5, 6, 7, 8); //p = [1,2,3,4]. q=[5,6,7,8]
+ *
+ * // create from Vec
+ * Vec<double, 8> v{1,2,3,4,5,6,7,8};
+ * DualQuatd dq_v{v};
+ *
+ * // create from two quaternion
+ * Quatd p(1, 2, 3, 4);
+ * Quatd q(5, 6, 7, 8);
+ * DualQuatd dq2 = DualQuatd::createFromQuat(p, q);
+ *
+ * // create from an angle, an axis and a translation
+ * Vec3d axis{0, 0, 1};
+ * Vec3d trans{3, 4, 5};
+ * DualQuatd dq3 = DualQuatd::createFromAngleAxisTrans(angle, axis, trans);
+ *
+ * // If you already have an instance of class Affine3, then you can use
+ * Affine3d R = dq3.toAffine3();
+ * DualQuatd dq4 = DualQuatd::createFromAffine3(R);
+ *
+ * // or create directly by affine transformation matrix Rt
+ * // see createFromMat() in detail for the form of Rt
+ * Matx44d Rt = dq3.toMat();
+ * DualQuatd dq5 = DualQuatd::createFromMat(Rt);
+ *
+ * // Any rotation + translation movement can
+ * // be expressed as a rotation + translation around the same line in space (expressed by Plucker
+ * // coords), and here's a way to represent it this way.
+ * Vec3d axis{1, 1, 1}; // axis will be normalized in createFromPitch
+ * Vec3d trans{3, 4 ,5};
+ * axis = axis / std::sqrt(axis.dot(axis));// The formula for computing moment that I use below requires a normalized axis
+ * Vec3d moment = 1.0 / 2 * (trans.cross(axis) + axis.cross(trans.cross(axis)) *
+ *                            std::cos(rotation_angle / 2) / std::sin(rotation_angle / 2));
+ * double d = trans.dot(qaxis);
+ * DualQuatd dq6 = DualQuatd::createFromPitch(angle, d, axis, moment);
+ * ```
+ *
+ * A point \f$v=(x, y, z)\f$ in form of dual quaternion is \f$[1+\epsilon v]=[1,0,0,0,0,x,y,z]\f$.
+ * The transformation of a point \f$v_1\f$ to another point \f$v_2\f$ under the dual quaternion \f$\sigma\f$ is
+ * \f[
+ * 1 + \epsilon v_2 = \sigma * (1 + \epsilon v_1) * \sigma^{\star}
+ * \f]
+ * where \f$\sigma^{\star}=p^*-\epsilon q^*.\f$
+ *
+ * A line in the \f$Pl\ddot{u}cker\f$ coordinates \f$(\hat{l}, m)\f$ defined by the dual quaternion \f$l=\hat{l}+\epsilon m\f$.
+ * To transform a line, \f[l_2 = \sigma * l_1 * \sigma^*,\f] where \f$\sigma=r+\frac{\epsilon}{2}rt\f$ and
+ * \f$\sigma^*=p^*+\epsilon q^*\f$.
+ *
+ * To extract the Vec<double, 8> or Vec<float, 8>, see toVec();
+ *
+ * To extract the affine transformation matrix, see toMat();
+ *
+ * To extract the instance of Affine3, see toAffine3();
+ *
+ * If two quaternions \f$q_0, q_1\f$ are needed to be interpolated, you can use sclerp()
+ * ```
+ * DualQuatd::sclerp(q0, q1, t)
+ * ```
+ * or dqblend().
+ * ```
+ * DualQuatd::dqblend(q0, q1, t)
+ * ```
+ * With more than two dual quaternions to be blended, you can use generalize linear dual quaternion blending
+ * with the corresponding weights, i.e. gdqblend().
+ *
+ */
+template <typename _Tp>
+class CV_EXPORTS DualQuat{
+    static_assert(std::is_floating_point<_Tp>::value, "Dual quaternion only make sense with type of float or double");
+    using value_type = _Tp;
+
+public:
+    static constexpr _Tp CV_DUAL_QUAT_EPS = (_Tp)1.e-6;
+
+    DualQuat();
+
+    /**
+     * @brief create from eight same type numbers.
+     */
+    DualQuat(const _Tp w, const _Tp x, const _Tp y, const _Tp z, const _Tp w_, const _Tp x_, const _Tp y_, const _Tp z_);
+
+    /**
+     * @brief create from a double or float vector.
+     */
+    DualQuat(const Vec<_Tp, 8> &q);
+
+    _Tp w, x, y, z, w_, x_, y_, z_;
+
+    /**
+     * @brief create Dual Quaternion from two same type quaternions p and q.
+     * A Dual Quaternion \f$\sigma\f$ has the form:
+     * \f[\sigma = p + \epsilon q\f]
+     * where p and q are defined as follows:
+     * \f[\begin{equation}
+     *    \begin{split}
+     *    p &= w + x\boldsymbol{i} + y\boldsymbol{j} + z\boldsymbol{k}\\
+     *    q &= w\_ + x\_\boldsymbol{i} + y\_\boldsymbol{j} + z\_\boldsymbol{k}.
+     *    \end{split}
+     *   \end{equation}
+     * \f]
+     * The p and q are the real part and dual part respectively.
+     * @param realPart a quaternion, real part of dual quaternion.
+     * @param dualPart a quaternion, dual part of dual quaternion.
+     * @sa Quat
+    */
+    static DualQuat<_Tp> createFromQuat(const Quat<_Tp> &realPart, const Quat<_Tp> &dualPart);
+
+    /**
+     * @brief create a dual quaternion from a rotation angle \f$\theta\f$, a rotation axis
+     * \f$\boldsymbol{u}\f$ and a translation \f$\boldsymbol{t}\f$.
+     * It generates a dual quaternion \f$\sigma\f$ in the form of
+     * \f[\begin{equation}
+     *    \begin{split}
+     *    \sigma &= r + \frac{\epsilon}{2}\boldsymbol{t}r \\
+     *           &= [\cos(\frac{\theta}{2}), \boldsymbol{u}\sin(\frac{\theta}{2})]
+     *           + \frac{\epsilon}{2}[0, \boldsymbol{t}][[\cos(\frac{\theta}{2}),
+     *           \boldsymbol{u}\sin(\frac{\theta}{2})]]\\
+     *           &= \cos(\frac{\theta}{2}) + \boldsymbol{u}\sin(\frac{\theta}{2})
+     *           + \frac{\epsilon}{2}(-(\boldsymbol{t} \cdot \boldsymbol{u})\sin(\frac{\theta}{2})
+     *           + \boldsymbol{t}\cos(\frac{\theta}{2}) + \boldsymbol{u} \times \boldsymbol{t} \sin(\frac{\theta}{2})).
+     *    \end{split}
+     *    \end{equation}\f]
+     * @param angle rotation angle.
+     * @param axis rotation axis.
+     * @param translation a vector of length 3.
+     * @note Axis will be normalized in this function. And translation is applied
+     * after the rotation. Use @ref createFromQuat(r, r * t / 2) to create a dual quaternion
+     * which translation is applied before rotation.
+     * @sa Quat
+     */
+    static DualQuat<_Tp> createFromAngleAxisTrans(const _Tp angle, const Vec<_Tp, 3> &axis, const Vec<_Tp, 3> &translation);
+
+    /**
+     * @brief Transform this dual quaternion to an affine transformation matrix \f$M\f$.
+     * Dual quaternion consists of a rotation \f$r=[a,b,c,d]\f$ and a translation \f$t=[\Delta x,\Delta y,\Delta z]\f$. The
+     * affine transformation matrix \f$M\f$ has the form
+     * \f[
+     * \begin{bmatrix}
+     * 1-2(e_2^2 +e_3^2) &2(e_1e_2-e_0e_3) &2(e_0e_2+e_1e_3) &\Delta x\\
+     * 2(e_0e_3+e_1e_2)  &1-2(e_1^2+e_3^2) &2(e_2e_3-e_0e_1) &\Delta y\\
+     * 2(e_1e_3-e_0e_2)  &2(e_0e_1+e_2e_3) &1-2(e_1^2-e_2^2) &\Delta z\\
+     * 0&0&0&1
+     * \end{bmatrix}
+     * \f]
+     *  if A is a matrix consisting of  n points to be transformed, this could be achieved by
+     * \f[
+     *  new\_A = M * A
+     * \f]
+     * where A has the form
+     * \f[
+     * \begin{bmatrix}
+     * x_0& x_1& x_2&...&x_n\\
+     * y_0& y_1& y_2&...&y_n\\
+     * z_0& z_1& z_2&...&z_n\\
+     * 1&1&1&...&1
+     * \end{bmatrix}
+     * \f]
+     * where the same subscript represent the same point. The size of A should be \f$[4,n]\f$.
+     * and the same size for matrix new_A.
+     * @param _R 4x4 matrix that represents rotations and translation.
+     * @note Translation is applied after the rotation. Use createFromQuat(r, r * t / 2) to create
+     * a dual quaternion which translation is applied before rotation.
+     */
+    static DualQuat<_Tp> createFromMat(InputArray _R);
+
+    /**
+     * @brief create dual quaternion from an affine matrix. The definition of affine matrix can refer to  createFromMat()
+     */
+    static DualQuat<_Tp> createFromAffine3(const Affine3<_Tp> &R);
+
+    /**
+     * @brief A dual quaternion is a vector in form of
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * \sigma &=\boldsymbol{p} + \epsilon \boldsymbol{q}\\
+     * &= \cos\hat{\frac{\theta}{2}}+\overline{\hat{l}}\sin\frac{\hat{\theta}}{2}
+     * \end{split}
+     * \end{equation}
+     * \f]
+     * where \f$\hat{\theta}\f$ is dual angle and \f$\overline{\hat{l}}\f$ is dual axis:
+     * \f[
+     * \hat{\theta}=\theta + \epsilon d,\\
+     * \overline{\hat{l}}= \hat{l} +\epsilon m.
+     * \f]
+     * In this representation, \f$\theta\f$ is rotation angle and \f$(\hat{l},m)\f$ is the screw axis, d is the translation distance along the axis.
+     *
+     * @param angle rotation angle.
+     * @param d translation along the rotation axis.
+     * @param axis rotation axis represented by quaternion with w = 0.
+     * @param moment the moment of line, and it should be orthogonal to axis.
+     * @note Translation is applied after the rotation. Use createFromQuat(r, r * t / 2) to create
+     * a dual quaternion which translation is applied before rotation.
+     */
+    static DualQuat<_Tp> createFromPitch(const _Tp angle, const _Tp d, const Vec<_Tp, 3> &axis, const Vec<_Tp, 3> &moment);
+
+    /**
+     * @brief return a quaternion which represent the real part of dual quaternion.
+     * The definition of real part is in createFromQuat().
+     * @sa createFromQuat, getDualPart
+     */
+    Quat<_Tp> getRealPart() const;
+
+    /**
+     * @brief return a quaternion which represent the dual part of dual quaternion.
+     * The definition of dual part is in createFromQuat().
+     * @sa createFromQuat, getRealPart
+     */
+    Quat<_Tp> getDualPart() const;
+
+    /**
+     * @brief return the conjugate of a dual quaternion.
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * \sigma^* &= (p + \epsilon q)^*
+     *          &= (p^* + \epsilon q^*)
+     * \end{split}
+     * \end{equation}
+     * \f]
+     * @param dq a dual quaternion.
+     */
+    template <typename T>
+    friend DualQuat<T> conjugate(const DualQuat<T> &dq);
+
+    /**
+     * @brief return the conjugate of a dual quaternion.
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * \sigma^* &= (p + \epsilon q)^*
+     *          &= (p^* + \epsilon q^*)
+     * \end{split}
+     * \end{equation}
+     * \f]
+     */
+    DualQuat<_Tp> conjugate() const;
+
+    /**
+     * @brief return the rotation in quaternion form.
+     */
+    Quat<_Tp> getRotation(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the translation vector.
+     * The rotation \f$r\f$ in this dual quaternion \f$\sigma\f$ is applied before translation \f$t\f$.
+     * The dual quaternion \f$\sigma\f$ is defined as
+     * \f[\begin{equation}
+     * \begin{split}
+     * \sigma &= p + \epsilon q \\
+     *        &= r + \frac{\epsilon}{2}{t}r.
+     * \end{split}
+     * \end{equation}\f]
+     * Thus, the translation can be obtained as follows
+     * \f[t = 2qp^*.\f]
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
+     * and this function will save some computations.
+     * @note This dual quaternion's translation is applied after the rotation.
+     */
+    Vec<_Tp, 3> getTranslation(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the norm \f$||\sigma||\f$ of dual quaternion \f$\sigma = p + \epsilon q\f$.
+     * \f[
+     *  \begin{equation}
+     *  \begin{split}
+     *  ||\sigma|| &= \sqrt{\sigma * \sigma^*} \\
+     *        &= ||p|| + \epsilon \frac{p \cdot q}{||p||}.
+     *  \end{split}
+     *  \end{equation}
+     *  \f]
+     * Generally speaking, the norm of a not unit dual
+     * quaternion is a dual number. For convenience, we return it in the form of a dual quaternion
+     * , i.e.
+     * \f[ ||\sigma|| = [||p||, 0, 0, 0, \frac{p \cdot q}{||p||}, 0, 0, 0].\f]
+     *
+     * @note The data type of dual number is dual quaternion.
+     */
+    DualQuat<_Tp> norm() const;
+
+    /**
+     * @brief return a normalized dual quaternion.
+     * A dual quaternion can be expressed as
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * \sigma &= p + \epsilon q\\
+     * &=||\sigma||\left(r+\frac{1}{2}tr\right)
+     * \end{split}
+     * \end{equation}
+     * \f]
+     * where \f$r, t\f$ represents the rotation (ordinary quaternion) and translation (pure ordinary quaternion) respectively,
+     * and \f$||\sigma||\f$ is the norm of dual quaternion(a dual number).
+     * A dual quaternion is unit if and only if
+     * \f[
+     * ||p||=1, p \cdot q=0
+     * \f]
+     * where \f$\cdot\f$ means dot product.
+     * The process of normalization is
+     * \f[
+     * \sigma_{u}=\frac{\sigma}{||\sigma||}
+     * \f]
+     * Next, we simply proof \f$\sigma_u\f$ is a unit dual quaternion:
+     * \f[
+     * \renewcommand{\Im}{\operatorname{Im}}
+     * \begin{equation}
+     * \begin{split}
+     * \sigma_{u}=\frac{\sigma}{||\sigma||}&=\frac{p + \epsilon q}{||p||+\epsilon\frac{p\cdot q}{||p||}}\\
+     * &=\frac{p}{||p||}+\epsilon\left(\frac{q}{||p||}-p\frac{p\cdot q}{||p||^3}\right)\\
+     * &=\frac{p}{||p||}+\epsilon\frac{1}{||p||^2}\left(qp^{*}-p\cdot q\right)\frac{p}{||p||}\\
+     * &=\frac{p}{||p||}+\epsilon\frac{1}{||p||^2}\Im(qp^*)\frac{p}{||p||}.\\
+     * \end{split}
+     * \end{equation}
+     * \f]
+     * As expected, the real part is a rotation and dual part is a pure quaternion.
+     */
+    DualQuat<_Tp> normalize() const;
+
+    /**
+     * @brief if \f$\sigma = p + \epsilon q\f$ is a dual quaternion, p is not zero,
+     * the inverse dual quaternion is
+     * \f[\sigma^{-1} = \frac{\sigma^*}{||\sigma||^2}, \f]
+     * or equivalentlly,
+     * \f[\sigma^{-1} = p^{-1} - \epsilon p^{-1}qp^{-1}.\f]
+     * @param dq a dual quaternion.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion
+     * and this function will save some computations.
+     */
+    template <typename T>
+    friend DualQuat<T> inv(const DualQuat<T> &dq, QuatAssumeType assumeUnit);
+
+    /**
+     * @brief if \f$\sigma = p + \epsilon q\f$ is a dual quaternion, p is not zero,
+     * the inverse dual quaternion is
+     * \f[\sigma^{-1} = \frac{\sigma^*}{||\sigma||^2}, \f]
+     * or equivalentlly,
+     * \f[\sigma^{-1} = p^{-1} - \epsilon p^{-1}qp^{-1}.\f]
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
+     * and this function will save some computations.
+     */
+    DualQuat<_Tp> inv(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the dot product of two dual quaternion.
+     * @param p other dual quaternion.
+     */
+    _Tp dot(DualQuat<_Tp> p) const;
+
+    /**
+     ** @brief return the value of \f$p^t\f$ where p is a dual quaternion.
+     * This could be calculated as:
+     * \f[
+     * p^t = \exp(t\ln p)
+     * \f]
+     * @param dq a dual quaternion.
+     * @param t index of power function.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion
+     * and this function will save some computations.
+     */
+    template <typename T>
+    friend DualQuat<T> power(const DualQuat<T> &dq, const T t, QuatAssumeType assumeUnit);
+
+    /**
+     ** @brief return the value of \f$p^t\f$ where p is a dual quaternion.
+     * This could be calculated as:
+     * \f[
+     * p^t = \exp(t\ln p)
+     * \f]
+     *
+     * @param t index of power function.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
+     * and this function will save some computations.
+     */
+    DualQuat<_Tp> power(const _Tp t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the value of \f$p^q\f$ where p and q are dual quaternions.
+     * This could be calculated as:
+     * \f[
+     * p^q = \exp(q\ln p)
+     * \f]
+     * @param p a dual quaternion.
+     * @param q a dual quaternion.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion p assume to be a dual unit quaternion
+     * and this function will save some computations.
+     */
+    template <typename T>
+    friend DualQuat<T> power(const DualQuat<T>& p, const DualQuat<T>& q, QuatAssumeType assumeUnit);
+
+    /**
+     * @brief return the value of \f$p^q\f$ where p and q are dual quaternions.
+     * This could be calculated as:
+     * \f[
+     * p^q = \exp(q\ln p)
+     * \f]
+     *
+     * @param q a dual quaternion
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a dual unit quaternion
+     * and this function will save some computations.
+     */
+    DualQuat<_Tp> power(const DualQuat<_Tp>& q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief return the value of exponential function value
+     * @param dq a dual quaternion.
+     */
+    template <typename T>
+    friend DualQuat<T> exp(const DualQuat<T> &dq);
+
+    /**
+     * @brief return the value of exponential function value
+     */
+    DualQuat<_Tp> exp() const;
+
+    /**
+     * @brief return the value of logarithm function value
+     *
+     * @param dq a dual quaternion.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, dual quaternion dq assume to be a unit dual quaternion
+     * and this function will save some computations.
+     */
+    template <typename T>
+    friend DualQuat<T> log(const DualQuat<T> &dq, QuatAssumeType assumeUnit);
+
+    /**
+     * @brief return the value of logarithm function value
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
+     * and this function will save some computations.
+     */
+    DualQuat<_Tp> log(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief Transform this dual quaternion to a vector.
+     */
+    Vec<_Tp, 8> toVec() const;
+
+    /**
+     * @brief Transform this dual quaternion to a affine transformation matrix
+     * the form of matrix, see createFromMat().
+     */
+    Matx<_Tp, 4, 4> toMat(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+      * @brief Transform this dual quaternion to a instance of Affine3.
+      */
+    Affine3<_Tp> toAffine3(QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT) const;
+
+    /**
+     * @brief The screw linear interpolation(ScLERP) is an extension of spherical linear interpolation of dual quaternion.
+     * If \f$\sigma_1\f$ and \f$\sigma_2\f$ are two dual quaternions representing the initial and final pose.
+     * The interpolation of ScLERP function can be defined as:
+     * \f[
+     * ScLERP(t;\sigma_1,\sigma_2) = \sigma_1 * (\sigma_1^{-1} * \sigma_2)^t, t\in[0,1]
+     * \f]
+     *
+     * @param q1 a dual quaternion represents a initial pose.
+     * @param q2 a dual quaternion represents a final pose.
+     * @param t interpolation parameter
+     * @param directChange if true, it always return the shortest path.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
+     * and this function will save some computations.
+     *
+     * For example
+     * ```
+     * double angle1 = CV_PI / 2;
+     * Vec3d axis{0, 0, 1};
+     * Vec3d t(0, 0, 3);
+     * DualQuatd initial = DualQuatd::createFromAngleAxisTrans(angle1, axis, t);
+     * double angle2 = CV_PI;
+     * DualQuatd final = DualQuatd::createFromAngleAxisTrans(angle2, axis, t);
+     * DualQuatd inter = DualQuatd::sclerp(initial, final, 0.5);
+     * ```
+     */
+    static DualQuat<_Tp> sclerp(const DualQuat<_Tp> &q1, const DualQuat<_Tp> &q2, const _Tp t,
+                                bool directChange=true, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+    /**
+     * @brief The method of Dual Quaternion linear Blending(DQB) is to compute a transformation between dual quaternion
+     * \f$q_1\f$ and \f$q_2\f$ and can be defined as:
+     * \f[
+     * DQB(t;{\boldsymbol{q}}_1,{\boldsymbol{q}}_2)=
+     * \frac{(1-t){\boldsymbol{q}}_1+t{\boldsymbol{q}}_2}{||(1-t){\boldsymbol{q}}_1+t{\boldsymbol{q}}_2||}.
+     * \f]
+     * where \f$q_1\f$ and \f$q_2\f$ are unit dual quaternions representing the input transformations.
+     * If you want to use DQB that works for more than two rigid transformations, see @ref gdqblend
+     *
+     * @param q1 a unit dual quaternion representing the input transformations.
+     * @param q2 a unit dual quaternion representing the input transformations.
+     * @param t parameter \f$t\in[0,1]\f$.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, this dual quaternion assume to be a unit dual quaternion
+     * and this function will save some computations.
+     *
+     * @sa gdqblend
+     */
+    static DualQuat<_Tp> dqblend(const DualQuat<_Tp> &q1, const DualQuat<_Tp> &q2, const _Tp t,
+                                   QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+    /**
+     * @brief The generalized Dual Quaternion linear Blending works for more than two rigid transformations.
+     * If these transformations are expressed as unit dual quaternions \f$q_1,...,q_n\f$ with convex weights
+     * \f$w = (w_1,...,w_n)\f$, the generalized DQB is simply
+     * \f[
+     * gDQB(\boldsymbol{w};{\boldsymbol{q}}_1,...,{\boldsymbol{q}}_n)=\frac{w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n}
+     * {||w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n||}.
+     * \f]
+     * @param dualquat vector of dual quaternions
+     * @param weights vector of weights, the size of weights should be the same as dualquat, and the weights should
+     * satisfy \f$\sum_0^n w_{i} = 1\f$ and \f$w_i>0\f$.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, these dual quaternions assume to be unit quaternions
+     * and this function will save some computations.
+     * @note the type of weights' element should be the same as the date type of dual quaternion inside the dualquat.
+     */
+    template <int cn>
+    static DualQuat<_Tp> gdqblend(const Vec<DualQuat<_Tp>, cn> &dualquat, InputArray weights,
+                                QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+    /**
+     * @brief The generalized Dual Quaternion linear Blending works for more than two rigid transformations.
+     * If these transformations are expressed as unit dual quaternions \f$q_1,...,q_n\f$ with convex weights
+     * \f$w = (w_1,...,w_n)\f$, the generalized DQB is simply
+     * \f[
+     * gDQB(\boldsymbol{w};{\boldsymbol{q}}_1,...,{\boldsymbol{q}}_n)=\frac{w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n}
+     * {||w_1{\boldsymbol{q}}_1+...+w_n{\boldsymbol{q}}_n||}.
+     * \f]
+     * @param dualquat The dual quaternions which have 8 channels and 1 row or 1 col.
+     * @param weights vector of weights, the size of weights should be the same as dualquat, and the weights should
+     * satisfy \f$\sum_0^n w_{i} = 1\f$ and \f$w_i>0\f$.
+     * @param assumeUnit if @ref QUAT_ASSUME_UNIT, these dual quaternions assume to be unit quaternions
+     * and this function will save some computations.
+     * @note the type of weights' element should be the same as the date type of dual quaternion inside the dualquat.
+     */
+    static DualQuat<_Tp> gdqblend(InputArray dualquat, InputArray weights,
+                                QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT);
+
+    /**
+     * @brief Return opposite dual quaternion \f$-p\f$
+     * which satisfies \f$p + (-p) = 0.\f$
+     *
+     * For example
+     * ```
+     * DualQuatd q{1, 2, 3, 4, 5, 6, 7, 8};
+     * std::cout << -q << std::endl; // [-1, -2, -3, -4, -5, -6, -7, -8]
+     * ```
+     */
+    DualQuat<_Tp> operator-() const;
+
+    /**
+     * @brief return true if two dual quaternions p and q are nearly equal, i.e. when the absolute
+     * value of each \f$p_i\f$ and \f$q_i\f$ is less than CV_DUAL_QUAT_EPS.
+     */
+    bool operator==(const DualQuat<_Tp>&) const;
+
+    /**
+     * @brief Subtraction operator of two dual quaternions p and q.
+     * It returns a new dual quaternion that each value is the sum of \f$p_i\f$ and \f$-q_i\f$.
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
+     * std::cout << p - q << std::endl; //[-4, -4, -4, -4, 4, -4, -4, -4]
+     * ```
+     */
+    DualQuat<_Tp> operator-(const DualQuat<_Tp>&) const;
+
+    /**
+     * @brief Subtraction assignment operator of two dual quaternions p and q.
+     * It subtracts right operand from the left operand and assign the result to left operand.
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
+     * p -= q; // equivalent to p = p - q
+     * std::cout << p << std::endl; //[-4, -4, -4, -4, 4, -4, -4, -4]
+     *
+     * ```
+     */
+    DualQuat<_Tp>& operator-=(const DualQuat<_Tp>&);
+
+    /**
+     * @brief Addition operator of two dual quaternions p and q.
+     * It returns a new dual quaternion that each value is the sum of \f$p_i\f$ and \f$q_i\f$.
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
+     * std::cout << p + q << std::endl; //[6, 8, 10, 12, 14, 16, 18, 20]
+     * ```
+     */
+    DualQuat<_Tp> operator+(const DualQuat<_Tp>&) const;
+
+    /**
+     * @brief Addition assignment operator of two dual quaternions p and q.
+     * It adds right operand to the left operand and assign the result to left operand.
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
+     * p += q; // equivalent to p = p + q
+     * std::cout << p << std::endl; //[6, 8, 10, 12, 14, 16, 18, 20]
+     *
+     * ```
+     */
+    DualQuat<_Tp>& operator+=(const DualQuat<_Tp>&);
+
+    /**
+     * @brief Multiplication assignment operator of two quaternions.
+     * It multiplies right operand with the left operand and assign the result to left operand.
+     *
+     * Rule of dual quaternion multiplication:
+     * The dual quaternion can be written as an ordered pair of quaternions [A, B]. Thus
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p * q &= [A, B][C, D]\\
+     * &=[AC, AD + BC]
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
+     * p *= q;
+     * std::cout << p << std::endl; //[-60, 12, 30, 24, -216, 80, 124, 120]
+     * ```
+     */
+    DualQuat<_Tp>& operator*=(const DualQuat<_Tp>&);
+
+    /**
+     * @brief Multiplication assignment operator of a quaternions and a scalar.
+     * It multiplies right operand with the left operand and assign the result to left operand.
+     *
+     * Rule of dual quaternion multiplication with a scalar:
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\
+     *  &=[w   s, x   s, y   s, z   s, w\_  \space  s, x\_  \space  s, y\_ \space  s, z\_ \space  s].
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double s = 2.0;
+     * p *= s;
+     * std::cout << p << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16]
+     * ```
+     * @note the type of scalar should be equal to the dual quaternion.
+     */
+    DualQuat<_Tp> operator*=(const _Tp s);
+
+
+    /**
+     * @brief Multiplication operator of two dual quaternions q and p.
+     * Multiplies values on either side of the operator.
+     *
+     * Rule of dual quaternion multiplication:
+     * The dual quaternion can be written as an ordered pair of quaternions [A, B]. Thus
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p * q &= [A, B][C, D]\\
+     * &=[AC, AD + BC]
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
+     * std::cout << p * q << std::endl; //[-60, 12, 30, 24, -216, 80, 124, 120]
+     * ```
+     */
+    DualQuat<_Tp> operator*(const DualQuat<_Tp>&) const;
+
+    /**
+     * @brief Division operator of a dual quaternions and a scalar.
+     * It divides left operand with the right operand and assign the result to left operand.
+     *
+     * Rule of dual quaternion division with a scalar:
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p / s &= [w, x, y, z, w\_, x\_, y\_, z\_] / s\\
+     * &=[w/s, x/s, y/s, z/s, w\_/s, x\_/s, y\_/s, z\_/s].
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double s = 2.0;
+     * p /= s; // equivalent to p = p / s
+     * std::cout << p << std::endl; //[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4]
+     * ```
+     * @note the type of scalar should be equal to this dual quaternion.
+     */
+    DualQuat<_Tp> operator/(const _Tp s) const;
+
+    /**
+     * @brief Division operator of two dual quaternions p and q.
+     * Divides left hand operand by right hand operand.
+     *
+     * Rule of dual quaternion division with a dual quaternion:
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p / q &= p * q.inv()\\
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
+     * std::cout << p / q << std::endl; // equivalent to p * q.inv()
+     * ```
+     */
+    DualQuat<_Tp> operator/(const DualQuat<_Tp>&) const;
+
+    /**
+     * @brief Division assignment operator of two dual quaternions p and q;
+     * It divides left operand with the right operand and assign the result to left operand.
+     *
+     * Rule of dual quaternion division with a quaternion:
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p / q&= p * q.inv()\\
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * DualQuatd q{5, 6, 7, 8, 9, 10, 11, 12};
+     * p /= q; // equivalent to p = p * q.inv()
+     * std::cout << p << std::endl;
+     * ```
+     */
+    DualQuat<_Tp>& operator/=(const DualQuat<_Tp>&);
+
+    /**
+     * @brief Division assignment operator of a dual quaternions and a scalar.
+     * It divides left operand with the right operand and assign the result to left operand.
+     *
+     * Rule of dual quaternion division with a scalar:
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p / s &= [w, x, y, z, w\_, x\_, y\_ ,z\_] / s\\
+     * &=[w / s, x / s, y / s, z / s, w\_ / \space s, x\_ / \space s, y\_ / \space s, z\_ / \space s].
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double s = 2.0;;
+     * p /= s; // equivalent to p = p / s
+     * std::cout << p << std::endl; //[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0]
+     * ```
+     * @note the type of scalar should be equal to the dual quaternion.
+     */
+    Quat<_Tp>& operator/=(const _Tp s);
+
+    /**
+     * @brief Addition operator of a scalar and a dual quaternions.
+     * Adds right hand operand from left hand operand.
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double scalar = 2.0;
+     * std::cout << scalar + p << std::endl; //[3.0, 2, 3, 4, 5, 6, 7, 8]
+     * ```
+     * @note the type of scalar should be equal to the dual quaternion.
+     */
+    template <typename T>
+    friend DualQuat<T> cv::operator+(const T s, const DualQuat<T>&);
+
+    /**
+     * @brief Addition operator of a dual quaternions and a scalar.
+     * Adds right hand operand from left hand operand.
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double scalar = 2.0;
+     * std::cout << p + scalar << std::endl; //[3.0, 2, 3, 4, 5, 6, 7, 8]
+     * ```
+     * @note the type of scalar should be equal to the dual quaternion.
+     */
+    template <typename T>
+    friend DualQuat<T> cv::operator+(const DualQuat<T>&, const T s);
+
+    /**
+     * @brief Multiplication operator of a scalar and a dual quaternions.
+     * It multiplies right operand with the left operand and assign the result to left operand.
+     *
+     * Rule of dual quaternion multiplication with a scalar:
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\
+     * &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s].
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double s = 2.0;
+     * std::cout << s * p << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16]
+     * ```
+     * @note the type of scalar should be equal to the dual quaternion.
+     */
+    template <typename T>
+    friend DualQuat<T> cv::operator*(const T s, const DualQuat<T>&);
+
+    /**
+     * @brief Subtraction operator of a dual quaternion and a scalar.
+     * Subtracts right hand operand from left hand operand.
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double scalar = 2.0;
+     * std::cout << p - scalar << std::endl; //[-1, 2, 3, 4, 5, 6, 7, 8]
+     * ```
+     * @note the type of scalar should be equal to the dual quaternion.
+     */
+    template <typename T>
+    friend DualQuat<T> cv::operator-(const DualQuat<T>&, const T s);
+
+    /**
+     * @brief Subtraction operator of a scalar and a dual quaternions.
+     * Subtracts right hand operand from left hand operand.
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double scalar = 2.0;
+     * std::cout << scalar - p << std::endl; //[1.0, -2, -3, -4, -5, -6, -7, -8]
+     * ```
+     * @note the type of scalar should be equal to the dual quaternion.
+     */
+    template <typename T>
+    friend DualQuat<T> cv::operator-(const T s, const DualQuat<T>&);
+
+    /**
+     * @brief Multiplication operator of a dual quaternions and a scalar.
+     * It multiplies right operand with the left operand and assign the result to left operand.
+     *
+     * Rule of dual quaternion multiplication with a scalar:
+     * \f[
+     * \begin{equation}
+     * \begin{split}
+     * p * s &= [w, x, y, z, w\_, x\_, y\_, z\_] * s\\
+     * &=[w s, x s, y s, z s, w\_ \space s, x\_ \space s, y\_ \space s, z\_ \space s].
+     * \end{split}
+     * \end{equation}
+     * \f]
+     *
+     * For example
+     * ```
+     * DualQuatd p{1, 2, 3, 4, 5, 6, 7, 8};
+     * double s = 2.0;
+     * std::cout << p * s << std::endl; //[2, 4, 6, 8, 10, 12, 14, 16]
+     * ```
+     * @note the type of scalar should be equal to the dual quaternion.
+     */
+    template <typename T>
+    friend DualQuat<T> cv::operator*(const DualQuat<T>&, const T s);
+
+    template <typename S>
+    friend std::ostream& cv::operator<<(std::ostream&, const DualQuat<S>&);
+
+};
+
+using DualQuatd = DualQuat<double>;
+using DualQuatf = DualQuat<float>;
+
+//! @} core
+}//namespace
+
+#include "dualquaternion.inl.hpp"
+
+#endif /* OPENCV_CORE_QUATERNION_HPP */
--- a/3rdparty/opencv/inc/opencv2/core/dualquaternion.inl.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/dualquaternion.inl.hpp
@@ -0,0 +1,487 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2020, Huawei Technologies Co., Ltd. All rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Liangqian Kong <kongliangqian@huawei.com>
+//         Longbu Wang <wanglongbu@huawei.com>
+
+#ifndef OPENCV_CORE_DUALQUATERNION_INL_HPP
+#define OPENCV_CORE_DUALQUATERNION_INL_HPP
+
+#ifndef OPENCV_CORE_DUALQUATERNION_HPP
+#error This is not a standalone header. Include dualquaternion.hpp instead.
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////
+//Implementation
+namespace cv {
+
+template <typename T>
+DualQuat<T>::DualQuat():w(0), x(0), y(0), z(0), w_(0), x_(0), y_(0), z_(0){};
+
+template <typename T>
+DualQuat<T>::DualQuat(const T vw, const T vx, const T vy, const T vz, const T _w, const T _x, const T _y, const T _z):
+                      w(vw), x(vx), y(vy), z(vz), w_(_w), x_(_x), y_(_y), z_(_z){};
+
+template <typename T>
+DualQuat<T>::DualQuat(const Vec<T, 8> &q):w(q[0]), x(q[1]), y(q[2]), z(q[3]),
+                                          w_(q[4]), x_(q[5]), y_(q[6]), z_(q[7]){};
+
+template <typename T>
+DualQuat<T> DualQuat<T>::createFromQuat(const Quat<T> &realPart, const Quat<T> &dualPart)
+{
+    T w = realPart.w;
+    T x = realPart.x;
+    T y = realPart.y;
+    T z = realPart.z;
+    T w_ = dualPart.w;
+    T x_ = dualPart.x;
+    T y_ = dualPart.y;
+    T z_ = dualPart.z;
+    return DualQuat<T>(w, x, y, z, w_, x_, y_, z_);
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::createFromAngleAxisTrans(const T angle, const Vec<T, 3> &axis, const Vec<T, 3> &trans)
+{
+    Quat<T> r = Quat<T>::createFromAngleAxis(angle, axis);
+    Quat<T> t{0, trans[0], trans[1], trans[2]};
+    return createFromQuat(r, t * r / 2);
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::createFromMat(InputArray _R)
+{
+    CV_CheckTypeEQ(_R.type(), cv::traits::Type<T>::value, "");
+    if (_R.size() != Size(4, 4))
+    {
+        CV_Error(Error::StsBadArg, "The input matrix must have 4 columns and 4 rows");
+    }
+    Mat R = _R.getMat();
+    Quat<T> r = Quat<T>::createFromRotMat(R.colRange(0, 3).rowRange(0, 3));
+    Quat<T> trans(0, R.at<T>(0, 3), R.at<T>(1, 3), R.at<T>(2, 3));
+    return createFromQuat(r, trans * r / 2);
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::createFromAffine3(const Affine3<T> &R)
+{
+    return createFromMat(R.matrix);
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::createFromPitch(const T angle, const T d, const Vec<T, 3> &axis, const Vec<T, 3> &moment)
+{
+    T half_angle = angle / 2, half_d = d / 2;
+    Quat<T> qaxis = Quat<T>(0, axis[0], axis[1], axis[2]).normalize();
+    Quat<T> qmoment = Quat<T>(0, moment[0], moment[1], moment[2]);
+    qmoment -= qaxis * axis.dot(moment);
+    Quat<T> dual = -half_d * std::sin(half_angle) + std::sin(half_angle) * qmoment +
+        half_d * std::cos(half_angle) * qaxis;
+    return createFromQuat(Quat<T>::createFromAngleAxis(angle, axis), dual);
+}
+
+template <typename T>
+inline bool DualQuat<T>::operator==(const DualQuat<T> &q) const
+{
+    return (abs(w - q.w) < CV_DUAL_QUAT_EPS && abs(x - q.x) < CV_DUAL_QUAT_EPS &&
+            abs(y - q.y) < CV_DUAL_QUAT_EPS && abs(z - q.z) < CV_DUAL_QUAT_EPS &&
+            abs(w_ - q.w_) < CV_DUAL_QUAT_EPS && abs(x_ - q.x_) < CV_DUAL_QUAT_EPS &&
+            abs(y_ - q.y_) < CV_DUAL_QUAT_EPS && abs(z_ - q.z_) < CV_DUAL_QUAT_EPS);
+}
+
+template <typename T>
+inline Quat<T> DualQuat<T>::getRealPart() const
+{
+    return Quat<T>(w, x, y, z);
+}
+
+template <typename T>
+inline Quat<T> DualQuat<T>::getDualPart() const
+{
+    return Quat<T>(w_, x_, y_, z_);
+}
+
+template <typename T>
+inline DualQuat<T> conjugate(const DualQuat<T> &dq)
+{
+    return dq.conjugate();
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::conjugate() const
+{
+    return DualQuat<T>(w, -x, -y, -z, w_, -x_, -y_, -z_);
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::norm() const
+{
+    Quat<T> real = getRealPart();
+    T realNorm = real.norm();
+    Quat<T> dual = getDualPart();
+    if (realNorm < CV_DUAL_QUAT_EPS){
+        return DualQuat<T>(0, 0, 0, 0, 0, 0, 0, 0);
+    }
+    return DualQuat<T>(realNorm, 0, 0, 0, real.dot(dual) / realNorm, 0, 0, 0);
+}
+
+template <typename T>
+inline Quat<T> DualQuat<T>::getRotation(QuatAssumeType assumeUnit) const
+{
+    if (assumeUnit)
+    {
+        return getRealPart();
+    }
+    return getRealPart().normalize();
+}
+
+template <typename T>
+inline Vec<T, 3> DualQuat<T>::getTranslation(QuatAssumeType assumeUnit) const
+{
+    Quat<T> trans = 2.0 * (getDualPart() * getRealPart().inv(assumeUnit));
+    return Vec<T, 3>{trans[1], trans[2], trans[3]};
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::normalize() const
+{
+    Quat<T> p = getRealPart();
+    Quat<T> q = getDualPart();
+    T p_norm = p.norm();
+    if (p_norm < CV_DUAL_QUAT_EPS)
+    {
+        CV_Error(Error::StsBadArg, "Cannot normalize this dual quaternion: the norm is too small.");
+    }
+    Quat<T> p_nr = p / p_norm;
+    Quat<T> q_nr = q / p_norm;
+    return createFromQuat(p_nr, q_nr - p_nr * p_nr.dot(q_nr));
+}
+
+template <typename T>
+inline T DualQuat<T>::dot(DualQuat<T> q) const
+{
+    return q.w * w + q.x * x + q.y * y + q.z * z + q.w_ * w_ + q.x_ * x_ + q.y_ * y_ + q.z_ * z_;
+}
+
+template <typename T>
+inline DualQuat<T> inv(const DualQuat<T> &dq, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
+{
+    return dq.inv(assumeUnit);
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::inv(QuatAssumeType assumeUnit) const
+{
+    Quat<T> real = getRealPart();
+    Quat<T> dual = getDualPart();
+    return createFromQuat(real.inv(assumeUnit), -real.inv(assumeUnit) * dual * real.inv(assumeUnit));
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::operator-(const DualQuat<T> &q) const
+{
+    return DualQuat<T>(w - q.w, x - q.x, y - q.y, z - q.z, w_ - q.w_, x_ - q.x_, y_ - q.y_, z_ - q.z_);
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::operator-() const
+{
+    return DualQuat<T>(-w, -x, -y, -z, -w_, -x_, -y_, -z_);
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::operator+(const DualQuat<T> &q) const
+{
+    return DualQuat<T>(w + q.w, x + q.x, y + q.y, z + q.z, w_ + q.w_, x_ + q.x_, y_ + q.y_, z_ + q.z_);
+}
+
+template <typename T>
+inline DualQuat<T>& DualQuat<T>::operator+=(const DualQuat<T> &q)
+{
+    *this = *this + q;
+    return *this;
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::operator*(const DualQuat<T> &q) const
+{
+    Quat<T> A = getRealPart();
+    Quat<T> B = getDualPart();
+    Quat<T> C = q.getRealPart();
+    Quat<T> D = q.getDualPart();
+    return DualQuat<T>::createFromQuat(A * C, A * D + B * C);
+}
+
+template <typename T>
+inline DualQuat<T>& DualQuat<T>::operator*=(const DualQuat<T> &q)
+{
+    *this = *this * q;
+    return *this;
+}
+
+template <typename T>
+inline DualQuat<T> operator+(const T a, const DualQuat<T> &q)
+{
+    return DualQuat<T>(a + q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_);
+}
+
+template <typename T>
+inline DualQuat<T> operator+(const DualQuat<T> &q, const T a)
+{
+    return DualQuat<T>(a + q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_);
+}
+
+template <typename T>
+inline DualQuat<T> operator-(const DualQuat<T> &q, const T a)
+{
+    return DualQuat<T>(q.w - a, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_);
+}
+
+template <typename T>
+inline DualQuat<T>& DualQuat<T>::operator-=(const DualQuat<T> &q)
+{
+    *this = *this - q;
+    return *this;
+}
+
+template <typename T>
+inline DualQuat<T> operator-(const T a, const DualQuat<T> &q)
+{
+    return DualQuat<T>(a - q.w, -q.x, -q.y, -q.z, -q.w_, -q.x_, -q.y_, -q.z_);
+}
+
+template <typename T>
+inline DualQuat<T> operator*(const T a, const DualQuat<T> &q)
+{
+    return DualQuat<T>(q.w * a, q.x * a, q.y * a, q.z * a, q.w_ * a, q.x_ * a, q.y_ * a, q.z_ * a);
+}
+
+template <typename T>
+inline DualQuat<T> operator*(const DualQuat<T> &q, const T a)
+{
+    return DualQuat<T>(q.w * a, q.x * a, q.y * a, q.z * a, q.w_ * a, q.x_ * a, q.y_ * a, q.z_ * a);
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::operator/(const T a) const
+{
+    return DualQuat<T>(w / a, x / a, y / a, z / a, w_ / a, x_ / a, y_ / a, z_ / a);
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::operator/(const DualQuat<T> &q) const
+{
+    return *this * q.inv();
+}
+
+template <typename T>
+inline DualQuat<T>& DualQuat<T>::operator/=(const DualQuat<T> &q)
+{
+    *this = *this / q;
+    return *this;
+}
+
+template <typename T>
+std::ostream & operator<<(std::ostream &os, const DualQuat<T> &q)
+{
+    os << "DualQuat " << Vec<T, 8>{q.w, q.x, q.y, q.z, q.w_, q.x_, q.y_, q.z_};
+    return os;
+}
+
+template <typename T>
+inline DualQuat<T> exp(const DualQuat<T> &dq)
+{
+    return dq.exp();
+}
+
+namespace detail {
+
+template <typename _Tp>
+Matx<_Tp, 4, 4> jacob_exp(const Quat<_Tp> &q)
+{
+    _Tp nv = std::sqrt(q.x * q.x + q.y * q.y + q.z * q.z);
+    _Tp sinc_nv = abs(nv) < cv::DualQuat<_Tp>::CV_DUAL_QUAT_EPS ? 1 - nv * nv / 6 : std::sin(nv) / nv;
+    _Tp csiii_nv = abs(nv) < cv::DualQuat<_Tp>::CV_DUAL_QUAT_EPS ? -(_Tp)1.0 / 3 : (std::cos(nv) - sinc_nv) / nv / nv;
+    Matx<_Tp, 4, 4> J_exp_quat {
+        std::cos(nv), -sinc_nv * q.x,  -sinc_nv * q.y,  -sinc_nv * q.z,
+        sinc_nv * q.x, csiii_nv * q.x * q.x + sinc_nv, csiii_nv * q.x * q.y, csiii_nv * q.x * q.z,
+        sinc_nv * q.y, csiii_nv * q.y * q.x, csiii_nv * q.y * q.y + sinc_nv, csiii_nv * q.y * q.z,
+        sinc_nv * q.z, csiii_nv * q.z * q.x, csiii_nv * q.z * q.y, csiii_nv * q.z * q.z + sinc_nv
+    };
+    return std::exp(q.w) * J_exp_quat;
+}
+
+} // namespace detail
+
+template <typename T>
+DualQuat<T> DualQuat<T>::exp() const
+{
+    Quat<T> real = getRealPart();
+    return createFromQuat(real.exp(), Quat<T>(detail::jacob_exp(real) * getDualPart().toVec()));
+}
+
+template <typename T>
+DualQuat<T> log(const DualQuat<T> &dq, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
+{
+    return dq.log(assumeUnit);
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::log(QuatAssumeType assumeUnit) const
+{
+    Quat<T> plog = getRealPart().log(assumeUnit);
+    Matx<T, 4, 4> jacob = detail::jacob_exp(plog);
+    return createFromQuat(plog, Quat<T>(jacob.inv() * getDualPart().toVec()));
+}
+
+template <typename T>
+inline DualQuat<T> power(const DualQuat<T> &dq, const T t, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
+{
+    return dq.power(t, assumeUnit);
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::power(const T t, QuatAssumeType assumeUnit) const
+{
+    return (t * log(assumeUnit)).exp();
+}
+
+template <typename T>
+inline DualQuat<T> power(const DualQuat<T> &p, const DualQuat<T> &q, QuatAssumeType assumeUnit=QUAT_ASSUME_NOT_UNIT)
+{
+    return p.power(q, assumeUnit);
+}
+
+template <typename T>
+inline DualQuat<T> DualQuat<T>::power(const DualQuat<T> &q, QuatAssumeType assumeUnit) const
+{
+    return (q * log(assumeUnit)).exp();
+}
+
+template <typename T>
+inline Vec<T, 8> DualQuat<T>::toVec() const
+{
+   return Vec<T, 8>(w, x, y, z, w_, x_, y_, z_);
+}
+
+template <typename T>
+Affine3<T> DualQuat<T>::toAffine3(QuatAssumeType assumeUnit) const
+{
+    return Affine3<T>(toMat(assumeUnit));
+}
+
+template <typename T>
+Matx<T, 4, 4> DualQuat<T>::toMat(QuatAssumeType assumeUnit) const
+{
+    Matx<T, 4, 4> rot44 = getRotation(assumeUnit).toRotMat4x4();
+    Vec<T, 3> translation = getTranslation(assumeUnit);
+    rot44(0, 3) = translation[0];
+    rot44(1, 3) = translation[1];
+    rot44(2, 3) = translation[2];
+    return rot44;
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::sclerp(const DualQuat<T> &q0, const DualQuat<T> &q1, const T t, bool directChange, QuatAssumeType assumeUnit)
+{
+    DualQuat<T> v0(q0), v1(q1);
+    if (!assumeUnit)
+    {
+        v0 = v0.normalize();
+        v1 = v1.normalize();
+    }
+    Quat<T> v0Real = v0.getRealPart();
+    Quat<T> v1Real = v1.getRealPart();
+    if (directChange && v1Real.dot(v0Real) < 0)
+    {
+        v0 = -v0;
+    }
+    DualQuat<T> v0inv1 = v0.inv() * v1;
+    return v0 * v0inv1.power(t, QUAT_ASSUME_UNIT);
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::dqblend(const DualQuat<T> &q1, const DualQuat<T> &q2, const T t, QuatAssumeType assumeUnit)
+{
+    DualQuat<T> v1(q1), v2(q2);
+    if (!assumeUnit)
+    {
+        v1 = v1.normalize();
+        v2 = v2.normalize();
+    }
+    if (v1.getRotation(assumeUnit).dot(v2.getRotation(assumeUnit)) < 0)
+    {
+        return ((1 - t) * v1 - t * v2).normalize();
+    }
+    return ((1 - t) * v1 + t * v2).normalize();
+}
+
+template <typename T>
+DualQuat<T> DualQuat<T>::gdqblend(InputArray _dualquat, InputArray _weight, QuatAssumeType assumeUnit)
+{
+    CV_CheckTypeEQ(_weight.type(), cv::traits::Type<T>::value, "");
+    CV_CheckTypeEQ(_dualquat.type(), CV_MAKETYPE(CV_MAT_DEPTH(cv::traits::Type<T>::value), 8), "");
+    Size dq_s = _dualquat.size();
+    if (dq_s != _weight.size() || (dq_s.height != 1 && dq_s.width != 1))
+    {
+        CV_Error(Error::StsBadArg, "The size of weight must be the same as dualquat, both of them should be (1, n) or (n, 1)");
+    }
+    Mat dualquat = _dualquat.getMat(), weight = _weight.getMat();
+    const int cn = std::max(dq_s.width, dq_s.height);
+    if (!assumeUnit)
+    {
+        for (int i = 0; i < cn; ++i)
+        {
+            dualquat.at<Vec<T, 8>>(i) = DualQuat<T>{dualquat.at<Vec<T, 8>>(i)}.normalize().toVec();
+        }
+    }
+    Vec<T, 8> dq_blend = dualquat.at<Vec<T, 8>>(0) * weight.at<T>(0);
+    Quat<T> q0 = DualQuat<T> {dualquat.at<Vec<T, 8>>(0)}.getRotation(assumeUnit);
+    for (int i = 1; i < cn; ++i)
+    {
+        T k = q0.dot(DualQuat<T>{dualquat.at<Vec<T, 8>>(i)}.getRotation(assumeUnit)) < 0 ? -1: 1;
+        dq_blend = dq_blend + dualquat.at<Vec<T, 8>>(i) * k * weight.at<T>(i);
+    }
+    return DualQuat<T>{dq_blend}.normalize();
+}
+
+template <typename T>
+template <int cn>
+DualQuat<T> DualQuat<T>::gdqblend(const Vec<DualQuat<T>, cn> &_dualquat, InputArray _weight, QuatAssumeType assumeUnit)
+{
+    Vec<DualQuat<T>, cn> dualquat(_dualquat);
+    if (cn == 0)
+    {
+        return DualQuat<T>(1, 0, 0, 0, 0, 0, 0, 0);
+    }
+    Mat dualquat_mat(cn, 1, CV_64FC(8));
+    for (int i = 0; i < cn ; ++i)
+    {
+        dualquat_mat.at<Vec<T, 8>>(i) = dualquat[i].toVec();
+    }
+    return gdqblend(dualquat_mat, _weight, assumeUnit);
+}
+
+} //namespace cv
+
+#endif /*OPENCV_CORE_DUALQUATERNION_INL_HPP*/
--- a/3rdparty/opencv/inc/opencv2/core/eigen.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/eigen.hpp
@@ -0,0 +1,402 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+
+#ifndef OPENCV_CORE_EIGEN_HPP
+#define OPENCV_CORE_EIGEN_HPP
+
+#ifndef EIGEN_WORLD_VERSION
+#error "Wrong usage of OpenCV's Eigen utility header. Include Eigen's headers first. See https://github.com/opencv/opencv/issues/17366"
+#endif
+
+#include "opencv2/core.hpp"
+
+#if defined _MSC_VER && _MSC_VER >= 1200
+#define NOMINMAX // fix https://github.com/opencv/opencv/issues/17548
+#pragma warning( disable: 4714 ) //__forceinline is not inlined
+#pragma warning( disable: 4127 ) //conditional expression is constant
+#pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
+#endif
+
+#if !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
+#if EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3 \
+    && defined(CV_CXX11) && defined(CV_CXX_STD_ARRAY)
+#include <unsupported/Eigen/CXX11/Tensor>
+#define OPENCV_EIGEN_TENSOR_SUPPORT 1
+#endif  // EIGEN_WORLD_VERSION == 3 && EIGEN_MAJOR_VERSION >= 3
+#endif  // !defined(OPENCV_DISABLE_EIGEN_TENSOR_SUPPORT)
+
+namespace cv
+{
+
+/** @addtogroup core_eigen
+These functions are provided for OpenCV-Eigen interoperability. They convert `Mat`
+objects to corresponding `Eigen::Matrix` objects and vice-versa. Consult the [Eigen
+documentation](https://eigen.tuxfamily.org/dox/group__TutorialMatrixClass.html) for
+information about the `Matrix` template type.
+
+@note Using these functions requires the `Eigen/Dense` or similar header to be
+included before this header.
+*/
+//! @{
+
+#if defined(OPENCV_EIGEN_TENSOR_SUPPORT) || defined(CV_DOXYGEN)
+/** @brief Converts an Eigen::Tensor to a cv::Mat.
+
+The method converts an Eigen::Tensor with shape (H x W x C) to a cv::Mat where:
+ H = number of rows
+ W = number of columns
+ C = number of channels
+
+Usage:
+\code
+Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
+// populate tensor with values
+Mat a_mat;
+eigen2cv(a_tensor, a_mat);
+\endcode
+*/
+template <typename _Tp, int _layout> static inline
+void eigen2cv( const Eigen::Tensor<_Tp, 3, _layout> &src, OutputArray dst )
+{
+    if( !(_layout & Eigen::RowMajorBit) )
+    {
+        const std::array<int, 3> shuffle{2, 1, 0};
+        Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor = src.swap_layout().shuffle(shuffle);
+        Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), row_major_tensor.data());
+        _src.copyTo(dst);
+    }
+    else
+    {
+        Mat _src(src.dimension(0), src.dimension(1), CV_MAKETYPE(DataType<_Tp>::type, src.dimension(2)), (void *)src.data());
+        _src.copyTo(dst);
+    }
+}
+
+/** @brief Converts a cv::Mat to an Eigen::Tensor.
+
+The method converts a cv::Mat to an Eigen Tensor with shape (H x W x C) where:
+ H = number of rows
+ W = number of columns
+ C = number of channels
+
+Usage:
+\code
+Mat a_mat(...);
+// populate Mat with values
+Eigen::Tensor<float, 3, Eigen::RowMajor> a_tensor(...);
+cv2eigen(a_mat, a_tensor);
+\endcode
+*/
+template <typename _Tp, int _layout> static inline
+void cv2eigen( const Mat &src, Eigen::Tensor<_Tp, 3, _layout> &dst )
+{
+    if( !(_layout & Eigen::RowMajorBit) )
+    {
+        Eigen::Tensor<_Tp, 3, !_layout> row_major_tensor(src.rows, src.cols, src.channels());
+        Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), row_major_tensor.data());
+        if (src.type() == _dst.type())
+            src.copyTo(_dst);
+        else
+            src.convertTo(_dst, _dst.type());
+        const std::array<int, 3> shuffle{2, 1, 0};
+        dst = row_major_tensor.swap_layout().shuffle(shuffle);
+    }
+    else
+    {
+        dst.resize(src.rows, src.cols, src.channels());
+        Mat _dst(src.rows, src.cols, CV_MAKETYPE(DataType<_Tp>::type, src.channels()), dst.data());
+        if (src.type() == _dst.type())
+            src.copyTo(_dst);
+        else
+            src.convertTo(_dst, _dst.type());
+    }
+}
+
+/** @brief Maps cv::Mat data to an Eigen::TensorMap.
+
+The method wraps an existing Mat data array with an Eigen TensorMap of shape (H x W x C) where:
+ H = number of rows
+ W = number of columns
+ C = number of channels
+
+Explicit instantiation of the return type is required.
+
+@note Caller should be aware of the lifetime of the cv::Mat instance and take appropriate safety measures.
+The cv::Mat instance will retain ownership of the data and the Eigen::TensorMap will lose access when the cv::Mat data is deallocated.
+
+The example below initializes a cv::Mat and produces an Eigen::TensorMap:
+\code
+float arr[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+Mat a_mat(2, 2, CV_32FC3, arr);
+Eigen::TensorMap<Eigen::Tensor<float, 3, Eigen::RowMajor>> a_tensormap = cv2eigen_tensormap<float>(a_mat);
+\endcode
+*/
+template <typename _Tp> static inline
+Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>> cv2eigen_tensormap(InputArray src)
+{
+    Mat mat = src.getMat();
+    CV_CheckTypeEQ(mat.type(), CV_MAKETYPE(traits::Type<_Tp>::value, mat.channels()), "");
+    return Eigen::TensorMap<Eigen::Tensor<_Tp, 3, Eigen::RowMajor>>((_Tp *)mat.data, mat.rows, mat.cols, mat.channels());
+}
+#endif // OPENCV_EIGEN_TENSOR_SUPPORT
+
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, OutputArray dst )
+{
+    if( !(src.Flags & Eigen::RowMajorBit) )
+    {
+        Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value,
+              (void*)src.data(), src.outerStride()*sizeof(_Tp));
+        transpose(_src, dst);
+    }
+    else
+    {
+        Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value,
+                 (void*)src.data(), src.outerStride()*sizeof(_Tp));
+        _src.copyTo(dst);
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
+               Matx<_Tp, _rows, _cols>& dst )
+{
+    if( !(src.Flags & Eigen::RowMajorBit) )
+    {
+        dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
+    }
+    else
+    {
+        dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
+    }
+}
+
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
+{
+    CV_DbgAssert(src.rows == _rows && src.cols == _cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else if( src.cols == src.rows )
+        {
+            src.convertTo(_dst, _dst.type());
+            transpose(_dst, _dst);
+        }
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
+void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
+               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
+{
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+template<typename _Tp>  static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
+{
+    dst.resize(src.rows, src.cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else if( src.cols == src.rows )
+        {
+            src.convertTo(_dst, _dst.type());
+            transpose(_dst, _dst);
+        }
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows, int _cols> static inline
+void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
+{
+    dst.resize(_rows, _cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
+             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+template<typename _Tp> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
+{
+    CV_Assert(src.cols == 1);
+    dst.resize(src.rows);
+
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+// Matx case
+template<typename _Tp, int _rows> static inline
+void cv2eigen( const Matx<_Tp, _rows, 1>& src,
+               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
+{
+    dst.resize(_rows);
+
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(1, _rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(_rows, 1, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.copyTo(_dst);
+    }
+}
+
+
+template<typename _Tp> static inline
+void cv2eigen( const Mat& src,
+               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
+{
+    CV_Assert(src.rows == 1);
+    dst.resize(src.cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        if( src.type() == _dst.type() )
+            transpose(src, _dst);
+        else
+            Mat(src.t()).convertTo(_dst, _dst.type());
+    }
+    else
+    {
+        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        src.convertTo(_dst, _dst.type());
+    }
+}
+
+//Matx
+template<typename _Tp, int _cols> static inline
+void cv2eigen( const Matx<_Tp, 1, _cols>& src,
+               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
+{
+    dst.resize(_cols);
+    if( !(dst.Flags & Eigen::RowMajorBit) )
+    {
+        const Mat _dst(_cols, 1, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        transpose(src, _dst);
+    }
+    else
+    {
+        const Mat _dst(1, _cols, traits::Type<_Tp>::value,
+                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
+        Mat(src).copyTo(_dst);
+    }
+}
+
+//! @}
+
+} // cv
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/core/fast_math.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/fast_math.hpp
@@ -0,0 +1,411 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_FAST_MATH_HPP
+#define OPENCV_CORE_FAST_MATH_HPP
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils
+//! @{
+
+/****************************************************************************************\
+*                                      fast math                                         *
+\****************************************************************************************/
+
+#ifdef __cplusplus
+#  include <cmath>
+#else
+#  ifdef __BORLANDC__
+#    include <fastmath.h>
+#  else
+#    include <math.h>
+#  endif
+#endif
+
+#if defined(__CUDACC__)
+  // nothing, intrinsics/asm code is not supported
+#else
+  #if ((defined _MSC_VER && defined _M_X64) \
+      || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
+      && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
+    #include <emmintrin.h>
+  #endif
+
+  #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
+      && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
+    #include <altivec.h>
+    #undef vector
+    #undef bool
+    #undef pixel
+  #endif
+
+  #if defined(CV_INLINE_ROUND_FLT)
+    // user-specified version
+    // CV_INLINE_ROUND_DBL should be defined too
+  #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
+    // 1. general scheme
+    #define ARM_ROUND(_value, _asm_string) \
+        int res; \
+        float temp; \
+        CV_UNUSED(temp); \
+        __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
+        return res
+    // 2. version for double
+    #ifdef __clang__
+        #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
+    #else
+        #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
+    #endif
+    // 3. version for float
+    #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
+  #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
+    // P8 and newer machines can convert fp32/64 to int quickly.
+    #define CV_INLINE_ROUND_DBL(value) \
+        int out; \
+        double temp; \
+        __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
+        return out;
+
+    // FP32 also works with FP64 routine above
+    #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
+  #endif
+
+  #ifdef CV_INLINE_ISINF_FLT
+    // user-specified version
+    // CV_INLINE_ISINF_DBL should be defined too
+  #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
+    #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
+    #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
+  #endif
+
+  #ifdef CV_INLINE_ISNAN_FLT
+    // user-specified version
+    // CV_INLINE_ISNAN_DBL should be defined too
+  #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
+    #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
+    #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
+  #endif
+
+  #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
+    && ( \
+        defined(__x86_64__) || defined(__i686__) \
+        || defined(__arm__) \
+        || defined(__PPC64__) \
+    )
+    /* Let builtin C math functions when available. Dedicated hardware is available to
+       round and convert FP values. */
+    #define OPENCV_USE_FASTMATH_BUILTINS 1
+  #endif
+
+  /* Enable builtin math functions if possible, desired, and available.
+     Note, not all math functions inline equally. E.g lrint will not inline
+     without the -fno-math-errno option. */
+  #if defined(CV_ICC)
+    // nothing
+  #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
+    #if defined(__clang__)
+      #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
+      #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
+        #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
+        #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
+        #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
+        #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
+      #endif
+    #elif defined(__GNUC__)
+      #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
+      #if !defined(CV_INLINE_ISNAN_DBL)
+        #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT)
+        #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL)
+        #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT)
+        #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
+      #endif
+    #elif defined(_MSC_VER)
+      #if !defined(CV_INLINE_ISNAN_DBL)
+        #define CV_INLINE_ISNAN_DBL(value) return isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISNAN_FLT)
+        #define CV_INLINE_ISNAN_FLT(value) return isnan(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_DBL)
+        #define CV_INLINE_ISINF_DBL(value) return isinf(value);
+      #endif
+      #if !defined(CV_INLINE_ISINF_FLT)
+        #define CV_INLINE_ISINF_FLT(value) return isinf(value);
+      #endif
+    #endif
+  #endif
+
+#endif // defined(__CUDACC__)
+
+/** @brief Rounds floating-point number to the nearest integer
+
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int
+cvRound( double value )
+{
+#if defined CV_INLINE_ROUND_DBL
+    CV_INLINE_ROUND_DBL(value);
+#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
+    && !defined(__CUDACC__)
+    __m128d t = _mm_set_sd( value );
+    return _mm_cvtsd_si32(t);
+#elif defined _MSC_VER && defined _M_IX86
+    int t;
+    __asm
+    {
+        fld value;
+        fistp t;
+    }
+    return t;
+#elif defined CV_ICC || defined __GNUC__
+    return (int)(lrint(value));
+#else
+    /* it's ok if round does not comply with IEEE754 standard;
+       the tests should allow +/-1 difference when the tested functions use round */
+    return (int)(value + (value >= 0 ? 0.5 : -0.5));
+#endif
+}
+
+
+/** @brief Rounds floating-point number to the nearest integer not larger than the original.
+
+ The function computes an integer i such that:
+ \f[i \le \texttt{value} < i+1\f]
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int cvFloor( double value )
+{
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
+    return __builtin_floor(value);
+#else
+    int i = (int)value;
+    return i - (i > value);
+#endif
+}
+
+/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
+
+ The function computes an integer i such that:
+ \f[i \le \texttt{value} < i+1\f]
+ @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
+ result is not defined.
+ */
+CV_INLINE int cvCeil( double value )
+{
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
+    return __builtin_ceil(value);
+#else
+    int i = (int)value;
+    return i + (i < value);
+#endif
+}
+
+/** @brief Determines if the argument is Not A Number.
+
+ @param value The input floating-point value
+
+ The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
+ otherwise. */
+CV_INLINE int cvIsNaN( double value )
+{
+#if defined CV_INLINE_ISNAN_DBL
+    CV_INLINE_ISNAN_DBL(value);
+#else
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
+           ((unsigned)ieee754.u != 0) > 0x7ff00000;
+#endif
+}
+
+/** @brief Determines if the argument is Infinity.
+
+ @param value The input floating-point value
+
+ The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
+ and 0 otherwise. */
+CV_INLINE int cvIsInf( double value )
+{
+#if defined CV_INLINE_ISINF_DBL
+    CV_INLINE_ISINF_DBL(value);
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__)
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff00000000) ==
+                        0x7ff0000000000000;
+#else
+    Cv64suf ieee754;
+    ieee754.f = value;
+    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
+            (unsigned)ieee754.u == 0;
+#endif
+}
+
+#ifdef __cplusplus
+
+/** @overload */
+CV_INLINE int cvRound(float value)
+{
+#if defined CV_INLINE_ROUND_FLT
+    CV_INLINE_ROUND_FLT(value);
+#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
+    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
+    && !defined(__CUDACC__)
+    __m128 t = _mm_set_ss( value );
+    return _mm_cvtss_si32(t);
+#elif defined _MSC_VER && defined _M_IX86
+    int t;
+    __asm
+    {
+        fld value;
+        fistp t;
+    }
+    return t;
+#elif defined CV_ICC || defined __GNUC__
+    return (int)(lrintf(value));
+#else
+    /* it's ok if round does not comply with IEEE754 standard;
+     the tests should allow +/-1 difference when the tested functions use round */
+    return (int)(value + (value >= 0 ? 0.5f : -0.5f));
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvRound( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvFloor( float value )
+{
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
+    return __builtin_floorf(value);
+#else
+    int i = (int)value;
+    return i - (i > value);
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvFloor( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvCeil( float value )
+{
+#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
+    && ( \
+        defined(__PPC64__) \
+    )
+    return __builtin_ceilf(value);
+#else
+    int i = (int)value;
+    return i + (i < value);
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvCeil( int value )
+{
+    return value;
+}
+
+/** @overload */
+CV_INLINE int cvIsNaN( float value )
+{
+#if defined CV_INLINE_ISNAN_FLT
+    CV_INLINE_ISNAN_FLT(value);
+#else
+    Cv32suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff) > 0x7f800000;
+#endif
+}
+
+/** @overload */
+CV_INLINE int cvIsInf( float value )
+{
+#if defined CV_INLINE_ISINF_FLT
+    CV_INLINE_ISINF_FLT(value);
+#else
+    Cv32suf ieee754;
+    ieee754.f = value;
+    return (ieee754.u & 0x7fffffff) == 0x7f800000;
+#endif
+}
+
+#endif // __cplusplus
+
+//! @} core_utils
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/core/hal/hal.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/hal.hpp
@@ -0,0 +1,256 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_HPP
+#define OPENCV_HAL_HPP
+
+#include "opencv2/core/cvdef.h"
+#include "opencv2/core/cvstd.hpp"
+#include "opencv2/core/hal/interface.h"
+
+namespace cv { namespace hal {
+
+//! @addtogroup core_hal_functions
+//! @{
+
+CV_EXPORTS int normHamming(const uchar* a, int n);
+CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
+
+CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
+CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
+
+CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
+CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
+CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
+
+CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
+                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
+                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
+                        int m_a, int n_a, int n_d, int flags);
+
+CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
+CV_EXPORTS float normL1_(const float* a, const float* b, int n);
+CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
+
+CV_EXPORTS void exp32f(const float* src, float* dst, int n);
+CV_EXPORTS void exp64f(const double* src, double* dst, int n);
+CV_EXPORTS void log32f(const float* src, float* dst, int n);
+CV_EXPORTS void log64f(const double* src, double* dst, int n);
+
+CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
+CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
+CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
+CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
+CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
+CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
+CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
+
+CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
+CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
+CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
+CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
+
+CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
+CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
+CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
+CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
+
+CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
+
+CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
+
+CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
+CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
+
+CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
+CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
+CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
+
+CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
+CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );
+
+CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
+CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );
+
+struct CV_EXPORTS DFT1D
+{
+    static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
+    virtual void apply(const uchar *src, uchar *dst) = 0;
+    virtual ~DFT1D() {}
+};
+
+struct CV_EXPORTS DFT2D
+{
+    static Ptr<DFT2D> create(int width, int height, int depth,
+                             int src_channels, int dst_channels,
+                             int flags, int nonzero_rows = 0);
+    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
+    virtual ~DFT2D() {}
+};
+
+struct CV_EXPORTS DCT2D
+{
+    static Ptr<DCT2D> create(int width, int height, int depth, int flags);
+    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
+    virtual ~DCT2D() {}
+};
+
+//! @} core_hal
+
+//=============================================================================
+// for binary compatibility with 3.0
+
+//! @cond IGNORED
+
+CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
+CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
+
+CV_EXPORTS void exp(const float* src, float* dst, int n);
+CV_EXPORTS void exp(const double* src, double* dst, int n);
+CV_EXPORTS void log(const float* src, float* dst, int n);
+CV_EXPORTS void log(const double* src, double* dst, int n);
+
+CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
+CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
+CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
+CV_EXPORTS void sqrt(const float* src, float* dst, int len);
+CV_EXPORTS void sqrt(const double* src, double* dst, int len);
+CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
+CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
+
+//! @endcond
+
+}} //cv::hal
+
+#endif //OPENCV_HAL_HPP
--- a/3rdparty/opencv/inc/opencv2/core/hal/interface.h
+++ b/3rdparty/opencv/inc/opencv2/core/hal/interface.h
@@ -0,0 +1,190 @@
+#ifndef OPENCV_CORE_HAL_INTERFACE_H
+#define OPENCV_CORE_HAL_INTERFACE_H
+
+//! @addtogroup core_hal_interface
+//! @{
+
+//! @name Return codes
+//! @{
+#define CV_HAL_ERROR_OK 0
+#define CV_HAL_ERROR_NOT_IMPLEMENTED 1
+#define CV_HAL_ERROR_UNKNOWN -1
+//! @}
+
+#ifdef __cplusplus
+#include <cstddef>
+#else
+#include <stddef.h>
+#include <stdbool.h>
+#endif
+
+//! @name Data types
+//! primitive types
+//! - schar  - signed 1 byte integer
+//! - uchar  - unsigned 1 byte integer
+//! - short  - signed 2 byte integer
+//! - ushort - unsigned 2 byte integer
+//! - int    - signed 4 byte integer
+//! - uint   - unsigned 4 byte integer
+//! - int64  - signed 8 byte integer
+//! - uint64 - unsigned 8 byte integer
+//! @{
+#if !defined _MSC_VER && !defined __BORLANDC__
+#  if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
+#    include <cstdint>
+#    ifdef __NEWLIB__
+        typedef unsigned int uint;
+#    else
+        typedef std::uint32_t uint;
+#    endif
+#  else
+#    include <stdint.h>
+     typedef uint32_t uint;
+#  endif
+#else
+   typedef unsigned uint;
+#endif
+
+typedef signed char schar;
+
+#ifndef __IPL_H__
+   typedef unsigned char uchar;
+   typedef unsigned short ushort;
+#endif
+
+#if defined _MSC_VER || defined __BORLANDC__
+   typedef __int64 int64;
+   typedef unsigned __int64 uint64;
+#  define CV_BIG_INT(n)   n##I64
+#  define CV_BIG_UINT(n)  n##UI64
+#else
+   typedef int64_t int64;
+   typedef uint64_t uint64;
+#  define CV_BIG_INT(n)   n##LL
+#  define CV_BIG_UINT(n)  n##ULL
+#endif
+
+#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0"
+
+#define CV_CN_MAX     512
+#define CV_CN_SHIFT   3
+#define CV_DEPTH_MAX  (1 << CV_CN_SHIFT)
+
+#define CV_8U   0
+#define CV_8S   1
+#define CV_16U  2
+#define CV_16S  3
+#define CV_32S  4
+#define CV_32F  5
+#define CV_64F  6
+#define CV_16F  7
+
+#define CV_MAT_DEPTH_MASK       (CV_DEPTH_MAX - 1)
+#define CV_MAT_DEPTH(flags)     ((flags) & CV_MAT_DEPTH_MASK)
+
+#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
+#define CV_MAKE_TYPE CV_MAKETYPE
+
+#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
+#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
+#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
+#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
+#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
+
+#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
+#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
+#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
+#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
+#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
+
+#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
+#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
+#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
+#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
+#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
+
+#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
+#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
+#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
+#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
+#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
+
+#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
+#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
+#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
+#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
+#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
+
+#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
+#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
+#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
+#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
+#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
+
+#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
+#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
+#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
+#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
+#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
+
+#define CV_16FC1 CV_MAKETYPE(CV_16F,1)
+#define CV_16FC2 CV_MAKETYPE(CV_16F,2)
+#define CV_16FC3 CV_MAKETYPE(CV_16F,3)
+#define CV_16FC4 CV_MAKETYPE(CV_16F,4)
+#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
+//! @}
+
+//! @name Comparison operation
+//! @sa cv::CmpTypes
+//! @{
+#define CV_HAL_CMP_EQ 0
+#define CV_HAL_CMP_GT 1
+#define CV_HAL_CMP_GE 2
+#define CV_HAL_CMP_LT 3
+#define CV_HAL_CMP_LE 4
+#define CV_HAL_CMP_NE 5
+//! @}
+
+//! @name Border processing modes
+//! @sa cv::BorderTypes
+//! @{
+#define CV_HAL_BORDER_CONSTANT 0
+#define CV_HAL_BORDER_REPLICATE 1
+#define CV_HAL_BORDER_REFLECT 2
+#define CV_HAL_BORDER_WRAP 3
+#define CV_HAL_BORDER_REFLECT_101 4
+#define CV_HAL_BORDER_TRANSPARENT 5
+#define CV_HAL_BORDER_ISOLATED 16
+//! @}
+
+//! @name DFT flags
+//! @{
+#define CV_HAL_DFT_INVERSE        1
+#define CV_HAL_DFT_SCALE          2
+#define CV_HAL_DFT_ROWS           4
+#define CV_HAL_DFT_COMPLEX_OUTPUT 16
+#define CV_HAL_DFT_REAL_OUTPUT    32
+#define CV_HAL_DFT_TWO_STAGE      64
+#define CV_HAL_DFT_STAGE_COLS    128
+#define CV_HAL_DFT_IS_CONTINUOUS 512
+#define CV_HAL_DFT_IS_INPLACE 1024
+//! @}
+
+//! @name SVD flags
+//! @{
+#define CV_HAL_SVD_NO_UV    1
+#define CV_HAL_SVD_SHORT_UV 2
+#define CV_HAL_SVD_MODIFY_A 4
+#define CV_HAL_SVD_FULL_UV  8
+//! @}
+
+//! @name Gemm flags
+//! @{
+#define CV_HAL_GEMM_1_T 1
+#define CV_HAL_GEMM_2_T 2
+#define CV_HAL_GEMM_3_T 4
+//! @}
+
+//! @}
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin.hpp
@@ -0,0 +1,706 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_INTRIN_HPP
+#define OPENCV_HAL_INTRIN_HPP
+
+#include <cmath>
+#include <float.h>
+#include <stdlib.h>
+#include "opencv2/core/cvdef.h"
+
+#define OPENCV_HAL_ADD(a, b) ((a) + (b))
+#define OPENCV_HAL_AND(a, b) ((a) & (b))
+#define OPENCV_HAL_NOP(a) (a)
+#define OPENCV_HAL_1ST(a, b) (a)
+
+namespace {
+inline unsigned int trailingZeros32(unsigned int value) {
+#if defined(_MSC_VER)
+#if (_MSC_VER < 1700) || defined(_M_ARM) || defined(_M_ARM64)
+    unsigned long index = 0;
+    _BitScanForward(&index, value);
+    return (unsigned int)index;
+#elif defined(__clang__)
+    // clang-cl doesn't export _tzcnt_u32 for non BMI systems
+    return value ? __builtin_ctz(value) : 32;
+#else
+    return _tzcnt_u32(value);
+#endif
+#elif defined(__GNUC__) || defined(__GNUG__)
+    return __builtin_ctz(value);
+#elif defined(__ICC) || defined(__INTEL_COMPILER)
+    return _bit_scan_forward(value);
+#elif defined(__clang__)
+    return llvm.cttz.i32(value, true);
+#else
+    static const int MultiplyDeBruijnBitPosition[32] = {
+        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
+    return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
+#endif
+}
+}
+
+// unlike HAL API, which is in cv::hal,
+// we put intrinsics into cv namespace to make its
+// access from within opencv code more accessible
+namespace cv {
+
+namespace hal {
+
+enum StoreMode
+{
+    STORE_UNALIGNED = 0,
+    STORE_ALIGNED = 1,
+    STORE_ALIGNED_NOCACHE = 2
+};
+
+}
+
+// TODO FIXIT: Don't use "God" traits. Split on separate cases.
+template<typename _Tp> struct V_TypeTraits
+{
+};
+
+#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
+    template<> struct V_TypeTraits<type> \
+    { \
+        typedef type value_type; \
+        typedef int_type_ int_type; \
+        typedef abs_type_ abs_type; \
+        typedef uint_type_ uint_type; \
+        typedef w_type_ w_type; \
+        typedef q_type_ q_type; \
+        typedef sum_type_ sum_type; \
+    \
+        static inline int_type reinterpret_int(type x) \
+        { \
+            union { type l; int_type i; } v; \
+            v.l = x; \
+            return v.i; \
+        } \
+    \
+        static inline type reinterpret_from_int(int_type x) \
+        { \
+            union { type l; int_type i; } v; \
+            v.i = x; \
+            return v.l; \
+        } \
+    }
+
+#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
+    template<> struct V_TypeTraits<type> \
+    { \
+        typedef type value_type; \
+        typedef int_type_ int_type; \
+        typedef abs_type_ abs_type; \
+        typedef uint_type_ uint_type; \
+        typedef w_type_ w_type; \
+        typedef sum_type_ sum_type; \
+    \
+        static inline int_type reinterpret_int(type x) \
+        { \
+            union { type l; int_type i; } v; \
+            v.l = x; \
+            return v.i; \
+        } \
+    \
+        static inline type reinterpret_from_int(int_type x) \
+        { \
+            union { type l; int_type i; } v; \
+            v.i = x; \
+            return v.l; \
+        } \
+    }
+
+CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
+CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
+CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
+CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);
+
+#ifndef CV_DOXYGEN
+
+#ifndef CV_CPU_OPTIMIZATION_HAL_NAMESPACE
+#ifdef CV_FORCE_SIMD128_CPP
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_EMULATOR_CPP
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_EMULATOR_CPP {
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
+#elif defined(CV_CPU_DISPATCH_MODE)
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
+#else
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
+    #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
+#endif
+#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
+#endif
+}
+
+#ifdef CV_DOXYGEN
+#   undef CV_AVX2
+#   undef CV_SSE2
+#   undef CV_NEON
+#   undef CV_VSX
+#   undef CV_FP16
+#   undef CV_MSA
+#   undef CV_RVV
+#endif
+
+#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_RVV) && !defined(CV_FORCE_SIMD128_CPP)
+#define CV__SIMD_FORWARD 128
+#include "opencv2/core/hal/intrin_forward.hpp"
+#endif
+
+#if CV_SSE2 && !defined(CV_FORCE_SIMD128_CPP)
+
+#include "opencv2/core/hal/intrin_sse_em.hpp"
+#include "opencv2/core/hal/intrin_sse.hpp"
+
+#elif CV_NEON && !defined(CV_FORCE_SIMD128_CPP)
+
+#include "opencv2/core/hal/intrin_neon.hpp"
+
+#elif CV_RVV071 && !defined(CV_FORCE_SIMD128_CPP)
+#define CV_SIMD128_CPP 0
+#include "opencv2/core/hal/intrin_rvv071.hpp"
+
+#elif CV_VSX && !defined(CV_FORCE_SIMD128_CPP)
+
+#include "opencv2/core/hal/intrin_vsx.hpp"
+
+#elif CV_MSA && !defined(CV_FORCE_SIMD128_CPP)
+
+#include "opencv2/core/hal/intrin_msa.hpp"
+
+#elif CV_WASM_SIMD && !defined(CV_FORCE_SIMD128_CPP)
+#include "opencv2/core/hal/intrin_wasm.hpp"
+
+#elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP)
+#include "opencv2/core/hal/intrin_rvv.hpp"
+
+#else
+
+#include "opencv2/core/hal/intrin_cpp.hpp"
+
+#endif
+
+// AVX2 can be used together with SSE2, so
+// we define those two sets of intrinsics at once.
+// Most of the intrinsics do not conflict (the proper overloaded variant is
+// resolved by the argument types, e.g. v_float32x4 ~ SSE2, v_float32x8 ~ AVX2),
+// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
+// Correspondingly, the wide intrinsics (which are mapped to the "widest"
+// available instruction set) will get vx_ prefix
+// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
+#if CV_AVX2
+
+#define CV__SIMD_FORWARD 256
+#include "opencv2/core/hal/intrin_forward.hpp"
+#include "opencv2/core/hal/intrin_avx.hpp"
+
+#endif
+
+// AVX512 can be used together with SSE2 and AVX2, so
+// we define those sets of intrinsics at once.
+// For some of AVX512 intrinsics get v512_ prefix instead of v_, e.g. v512_load() vs v_load().
+// Wide intrinsics will be mapped to v512_ counterparts in this case(e.g. vx_load() => v512_load())
+#if CV_AVX512_SKX
+
+#define CV__SIMD_FORWARD 512
+#include "opencv2/core/hal/intrin_forward.hpp"
+#include "opencv2/core/hal/intrin_avx512.hpp"
+
+#endif
+
+//! @cond IGNORED
+
+namespace cv {
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+#endif
+
+#ifndef CV_SIMD128
+#define CV_SIMD128 0
+#endif
+
+#ifndef CV_SIMD128_CPP
+#define CV_SIMD128_CPP 0
+#endif
+
+#ifndef CV_SIMD128_64F
+#define CV_SIMD128_64F 0
+#endif
+
+#ifndef CV_SIMD256
+#define CV_SIMD256 0
+#endif
+
+#ifndef CV_SIMD256_64F
+#define CV_SIMD256_64F 0
+#endif
+
+#ifndef CV_SIMD512
+#define CV_SIMD512 0
+#endif
+
+#ifndef CV_SIMD512_64F
+#define CV_SIMD512_64F 0
+#endif
+
+#ifndef CV_SIMD128_FP16
+#define CV_SIMD128_FP16 0
+#endif
+
+#ifndef CV_SIMD256_FP16
+#define CV_SIMD256_FP16 0
+#endif
+
+#ifndef CV_SIMD512_FP16
+#define CV_SIMD512_FP16 0
+#endif
+
+//==================================================================================================
+
+template<typename _Tp> struct V_RegTraits
+{
+};
+
+#define CV_DEF_REG_TRAITS(prefix, _reg, lane_type, suffix, _u_reg, _w_reg, _q_reg, _int_reg, _round_reg) \
+    template<> struct V_RegTraits<_reg> \
+    { \
+        typedef _reg reg; \
+        typedef _u_reg u_reg; \
+        typedef _w_reg w_reg; \
+        typedef _q_reg q_reg; \
+        typedef _int_reg int_reg; \
+        typedef _round_reg round_reg; \
+    }
+
+#if CV_SIMD128 || CV_SIMD128_CPP
+    CV_DEF_REG_TRAITS(v, v_uint8x16, uchar, u8, v_uint8x16, v_uint16x8, v_uint32x4, v_int8x16, void);
+    CV_DEF_REG_TRAITS(v, v_int8x16, schar, s8, v_uint8x16, v_int16x8, v_int32x4, v_int8x16, void);
+    CV_DEF_REG_TRAITS(v, v_uint16x8, ushort, u16, v_uint16x8, v_uint32x4, v_uint64x2, v_int16x8, void);
+    CV_DEF_REG_TRAITS(v, v_int16x8, short, s16, v_uint16x8, v_int32x4, v_int64x2, v_int16x8, void);
+    CV_DEF_REG_TRAITS(v, v_uint32x4, unsigned, u32, v_uint32x4, v_uint64x2, void, v_int32x4, void);
+    CV_DEF_REG_TRAITS(v, v_int32x4, int, s32, v_uint32x4, v_int64x2, void, v_int32x4, void);
+#if CV_SIMD128_64F || CV_SIMD128_CPP
+    CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, v_float64x2, void, v_int32x4, v_int32x4);
+#else
+    CV_DEF_REG_TRAITS(v, v_float32x4, float, f32, v_float32x4, void, void, v_int32x4, v_int32x4);
+#endif
+    CV_DEF_REG_TRAITS(v, v_uint64x2, uint64, u64, v_uint64x2, void, void, v_int64x2, void);
+    CV_DEF_REG_TRAITS(v, v_int64x2, int64, s64, v_uint64x2, void, void, v_int64x2, void);
+#if CV_SIMD128_64F
+    CV_DEF_REG_TRAITS(v, v_float64x2, double, f64, v_float64x2, void, void, v_int64x2, v_int32x4);
+#endif
+#endif
+
+#if CV_SIMD256
+    CV_DEF_REG_TRAITS(v256, v_uint8x32, uchar, u8, v_uint8x32, v_uint16x16, v_uint32x8, v_int8x32, void);
+    CV_DEF_REG_TRAITS(v256, v_int8x32, schar, s8, v_uint8x32, v_int16x16, v_int32x8, v_int8x32, void);
+    CV_DEF_REG_TRAITS(v256, v_uint16x16, ushort, u16, v_uint16x16, v_uint32x8, v_uint64x4, v_int16x16, void);
+    CV_DEF_REG_TRAITS(v256, v_int16x16, short, s16, v_uint16x16, v_int32x8, v_int64x4, v_int16x16, void);
+    CV_DEF_REG_TRAITS(v256, v_uint32x8, unsigned, u32, v_uint32x8, v_uint64x4, void, v_int32x8, void);
+    CV_DEF_REG_TRAITS(v256, v_int32x8, int, s32, v_uint32x8, v_int64x4, void, v_int32x8, void);
+    CV_DEF_REG_TRAITS(v256, v_float32x8, float, f32, v_float32x8, v_float64x4, void, v_int32x8, v_int32x8);
+    CV_DEF_REG_TRAITS(v256, v_uint64x4, uint64, u64, v_uint64x4, void, void, v_int64x4, void);
+    CV_DEF_REG_TRAITS(v256, v_int64x4, int64, s64, v_uint64x4, void, void, v_int64x4, void);
+    CV_DEF_REG_TRAITS(v256, v_float64x4, double, f64, v_float64x4, void, void, v_int64x4, v_int32x8);
+#endif
+
+#if CV_SIMD512
+    CV_DEF_REG_TRAITS(v512, v_uint8x64, uchar, u8, v_uint8x64, v_uint16x32, v_uint32x16, v_int8x64, void);
+    CV_DEF_REG_TRAITS(v512, v_int8x64, schar, s8, v_uint8x64, v_int16x32, v_int32x16, v_int8x64, void);
+    CV_DEF_REG_TRAITS(v512, v_uint16x32, ushort, u16, v_uint16x32, v_uint32x16, v_uint64x8, v_int16x32, void);
+    CV_DEF_REG_TRAITS(v512, v_int16x32, short, s16, v_uint16x32, v_int32x16, v_int64x8, v_int16x32, void);
+    CV_DEF_REG_TRAITS(v512, v_uint32x16, unsigned, u32, v_uint32x16, v_uint64x8, void, v_int32x16, void);
+    CV_DEF_REG_TRAITS(v512, v_int32x16, int, s32, v_uint32x16, v_int64x8, void, v_int32x16, void);
+    CV_DEF_REG_TRAITS(v512, v_float32x16, float, f32, v_float32x16, v_float64x8, void, v_int32x16, v_int32x16);
+    CV_DEF_REG_TRAITS(v512, v_uint64x8, uint64, u64, v_uint64x8, void, void, v_int64x8, void);
+    CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
+    CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
+#endif
+//! @endcond
+
+#if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
+#define CV__SIMD_NAMESPACE simd512
+namespace CV__SIMD_NAMESPACE {
+    #define CV_SIMD 1
+    #define CV_SIMD_64F CV_SIMD512_64F
+    #define CV_SIMD_FP16 CV_SIMD512_FP16
+    #define CV_SIMD_WIDTH 64
+//! @addtogroup core_hal_intrin
+//! @{
+    //! @brief Maximum available vector register capacity 8-bit unsigned integer values
+    typedef v_uint8x64    v_uint8;
+    //! @brief Maximum available vector register capacity 8-bit signed integer values
+    typedef v_int8x64     v_int8;
+    //! @brief Maximum available vector register capacity 16-bit unsigned integer values
+    typedef v_uint16x32   v_uint16;
+    //! @brief Maximum available vector register capacity 16-bit signed integer values
+    typedef v_int16x32    v_int16;
+    //! @brief Maximum available vector register capacity 32-bit unsigned integer values
+    typedef v_uint32x16   v_uint32;
+    //! @brief Maximum available vector register capacity 32-bit signed integer values
+    typedef v_int32x16    v_int32;
+    //! @brief Maximum available vector register capacity 64-bit unsigned integer values
+    typedef v_uint64x8    v_uint64;
+    //! @brief Maximum available vector register capacity 64-bit signed integer values
+    typedef v_int64x8     v_int64;
+    //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
+    typedef v_float32x16  v_float32;
+    #if CV_SIMD512_64F
+    //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
+    typedef v_float64x8   v_float64;
+    #endif
+//! @}
+
+    #define VXPREFIX(func) v512##func
+} // namespace
+using namespace CV__SIMD_NAMESPACE;
+#elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
+#define CV__SIMD_NAMESPACE simd256
+namespace CV__SIMD_NAMESPACE {
+    #define CV_SIMD 1
+    #define CV_SIMD_64F CV_SIMD256_64F
+    #define CV_SIMD_FP16 CV_SIMD256_FP16
+    #define CV_SIMD_WIDTH 32
+//! @addtogroup core_hal_intrin
+//! @{
+    //! @brief Maximum available vector register capacity 8-bit unsigned integer values
+    typedef v_uint8x32   v_uint8;
+    //! @brief Maximum available vector register capacity 8-bit signed integer values
+    typedef v_int8x32    v_int8;
+    //! @brief Maximum available vector register capacity 16-bit unsigned integer values
+    typedef v_uint16x16  v_uint16;
+    //! @brief Maximum available vector register capacity 16-bit signed integer values
+    typedef v_int16x16   v_int16;
+    //! @brief Maximum available vector register capacity 32-bit unsigned integer values
+    typedef v_uint32x8   v_uint32;
+    //! @brief Maximum available vector register capacity 32-bit signed integer values
+    typedef v_int32x8    v_int32;
+    //! @brief Maximum available vector register capacity 64-bit unsigned integer values
+    typedef v_uint64x4   v_uint64;
+    //! @brief Maximum available vector register capacity 64-bit signed integer values
+    typedef v_int64x4    v_int64;
+    //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
+    typedef v_float32x8  v_float32;
+    #if CV_SIMD256_64F
+    //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
+    typedef v_float64x4  v_float64;
+    #endif
+//! @}
+
+    #define VXPREFIX(func) v256##func
+} // namespace
+using namespace CV__SIMD_NAMESPACE;
+#elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
+#if defined CV_SIMD128_CPP
+#define CV__SIMD_NAMESPACE simd128_cpp
+#else
+#define CV__SIMD_NAMESPACE simd128
+#endif
+namespace CV__SIMD_NAMESPACE {
+    #define CV_SIMD CV_SIMD128
+    #define CV_SIMD_64F CV_SIMD128_64F
+    #define CV_SIMD_WIDTH 16
+//! @addtogroup core_hal_intrin
+//! @{
+    //! @brief Maximum available vector register capacity 8-bit unsigned integer values
+    typedef v_uint8x16  v_uint8;
+    //! @brief Maximum available vector register capacity 8-bit signed integer values
+    typedef v_int8x16   v_int8;
+    //! @brief Maximum available vector register capacity 16-bit unsigned integer values
+    typedef v_uint16x8  v_uint16;
+    //! @brief Maximum available vector register capacity 16-bit signed integer values
+    typedef v_int16x8   v_int16;
+    //! @brief Maximum available vector register capacity 32-bit unsigned integer values
+    typedef v_uint32x4  v_uint32;
+    //! @brief Maximum available vector register capacity 32-bit signed integer values
+    typedef v_int32x4   v_int32;
+    //! @brief Maximum available vector register capacity 64-bit unsigned integer values
+    typedef v_uint64x2  v_uint64;
+    //! @brief Maximum available vector register capacity 64-bit signed integer values
+    typedef v_int64x2   v_int64;
+    //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
+    typedef v_float32x4 v_float32;
+    #if CV_SIMD128_64F
+    //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
+    typedef v_float64x2 v_float64;
+    #endif
+//! @}
+
+    #define VXPREFIX(func) v##func
+} // namespace
+using namespace CV__SIMD_NAMESPACE;
+#endif
+
+namespace CV__SIMD_NAMESPACE {
+//! @addtogroup core_hal_intrin
+//! @{
+    //! @name Wide init with value
+    //! @{
+    //! @brief Create maximum available capacity vector with elements set to a specific value
+    inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
+    inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
+    inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
+    inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
+    inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
+    inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
+    inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
+    inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
+    inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
+#if CV_SIMD_64F
+    inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
+#endif
+    //! @}
+
+    //! @name Wide init with zero
+    //! @{
+    //! @brief Create maximum available capacity vector with elements set to zero
+    inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
+    inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
+    inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
+    inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
+    inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
+    inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
+    inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
+    inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
+    inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
+#if CV_SIMD_64F
+    inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
+#endif
+    //! @}
+
+    //! @name Wide load from memory
+    //! @{
+    //! @brief Load maximum available capacity register contents from memory
+    inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
+#if CV_SIMD_64F
+    inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
+#endif
+    //! @}
+
+    //! @name Wide load from memory(aligned)
+    //! @{
+    //! @brief Load maximum available capacity register contents from memory(aligned)
+    inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+#if CV_SIMD_64F
+    inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+#endif
+    //! @}
+
+    //! @name Wide load lower half from memory
+    //! @{
+    //! @brief Load lower half of maximum available capacity register from memory
+    inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
+#if CV_SIMD_64F
+    inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
+#endif
+    //! @}
+
+    //! @name Wide load halfs from memory
+    //! @{
+    //! @brief Load maximum available capacity register contents from two memory blocks
+    inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+#if CV_SIMD_64F
+    inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+#endif
+    //! @}
+
+    //! @name Wide LUT of elements
+    //! @{
+    //! @brief Load maximum available capacity register contents with array elements by provided indexes
+    inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+#if CV_SIMD_64F
+    inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+#endif
+    //! @}
+
+    //! @name Wide LUT of element pairs
+    //! @{
+    //! @brief Load maximum available capacity register contents with array element pairs by provided indexes
+    inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+#if CV_SIMD_64F
+    inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+#endif
+    //! @}
+
+    //! @name Wide LUT of element quads
+    //! @{
+    //! @brief Load maximum available capacity register contents with array element quads by provided indexes
+    inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    //! @}
+
+    //! @name Wide load with double expansion
+    //! @{
+    //! @brief Load maximum available capacity register contents from memory with double expand
+    inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    //! @}
+
+    //! @name Wide load with quad expansion
+    //! @{
+    //! @brief Load maximum available capacity register contents from memory with quad expand
+    inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
+    inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
+    //! @}
+
+    /** @brief SIMD processing state cleanup call */
+    inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
+
+
+//! @cond IGNORED
+
+    // backward compatibility
+    template<typename _Tp, typename _Tvec> static inline
+    void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
+    // backward compatibility
+    template<typename _Tp, typename _Tvec> static inline
+    void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
+
+//! @endcond
+
+
+//! @}
+    #undef VXPREFIX
+} // namespace
+
+//! @cond IGNORED
+#ifndef CV_SIMD_64F
+#define CV_SIMD_64F 0
+#endif
+
+#ifndef CV_SIMD_FP16
+#define CV_SIMD_FP16 0  //!< Defined to 1 on native support of operations with float16x8_t / float16x16_t (SIMD256) types
+#endif
+
+#ifndef CV_SIMD
+#define CV_SIMD 0
+#endif
+
+#include "simd_utils.impl.hpp"
+
+#ifndef CV_DOXYGEN
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+#endif
+
+} // cv::
+
+//! @endcond
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_avx.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_avx.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_avx512.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_avx512.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_cpp.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_cpp.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_forward.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_forward.hpp
@@ -0,0 +1,191 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef CV__SIMD_FORWARD
+#error "Need to pre-define forward width"
+#endif
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+/** Types **/
+#if CV__SIMD_FORWARD == 1024
+// [todo] 1024
+#error "1024-long ops not implemented yet"
+#elif CV__SIMD_FORWARD == 512
+// 512
+#define __CV_VX(fun)   v512_##fun
+#define __CV_V_UINT8   v_uint8x64
+#define __CV_V_INT8    v_int8x64
+#define __CV_V_UINT16  v_uint16x32
+#define __CV_V_INT16   v_int16x32
+#define __CV_V_UINT32  v_uint32x16
+#define __CV_V_INT32   v_int32x16
+#define __CV_V_UINT64  v_uint64x8
+#define __CV_V_INT64   v_int64x8
+#define __CV_V_FLOAT32 v_float32x16
+#define __CV_V_FLOAT64 v_float64x8
+struct v_uint8x64;
+struct v_int8x64;
+struct v_uint16x32;
+struct v_int16x32;
+struct v_uint32x16;
+struct v_int32x16;
+struct v_uint64x8;
+struct v_int64x8;
+struct v_float32x16;
+struct v_float64x8;
+#elif CV__SIMD_FORWARD == 256
+// 256
+#define __CV_VX(fun)   v256_##fun
+#define __CV_V_UINT8   v_uint8x32
+#define __CV_V_INT8    v_int8x32
+#define __CV_V_UINT16  v_uint16x16
+#define __CV_V_INT16   v_int16x16
+#define __CV_V_UINT32  v_uint32x8
+#define __CV_V_INT32   v_int32x8
+#define __CV_V_UINT64  v_uint64x4
+#define __CV_V_INT64   v_int64x4
+#define __CV_V_FLOAT32 v_float32x8
+#define __CV_V_FLOAT64 v_float64x4
+struct v_uint8x32;
+struct v_int8x32;
+struct v_uint16x16;
+struct v_int16x16;
+struct v_uint32x8;
+struct v_int32x8;
+struct v_uint64x4;
+struct v_int64x4;
+struct v_float32x8;
+struct v_float64x4;
+#else
+// 128
+#define __CV_VX(fun)   v_##fun
+#define __CV_V_UINT8   v_uint8x16
+#define __CV_V_INT8    v_int8x16
+#define __CV_V_UINT16  v_uint16x8
+#define __CV_V_INT16   v_int16x8
+#define __CV_V_UINT32  v_uint32x4
+#define __CV_V_INT32   v_int32x4
+#define __CV_V_UINT64  v_uint64x2
+#define __CV_V_INT64   v_int64x2
+#define __CV_V_FLOAT32 v_float32x4
+#define __CV_V_FLOAT64 v_float64x2
+struct v_uint8x16;
+struct v_int8x16;
+struct v_uint16x8;
+struct v_int16x8;
+struct v_uint32x4;
+struct v_int32x4;
+struct v_uint64x2;
+struct v_int64x2;
+struct v_float32x4;
+struct v_float64x2;
+#endif
+
+/** Value reordering **/
+
+// Expansion
+void v_expand(const __CV_V_UINT8&,  __CV_V_UINT16&, __CV_V_UINT16&);
+void v_expand(const __CV_V_INT8&,   __CV_V_INT16&,  __CV_V_INT16&);
+void v_expand(const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
+void v_expand(const __CV_V_INT16&,  __CV_V_INT32&,  __CV_V_INT32&);
+void v_expand(const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
+void v_expand(const __CV_V_INT32&,  __CV_V_INT64&,  __CV_V_INT64&);
+// Low Expansion
+__CV_V_UINT16 v_expand_low(const __CV_V_UINT8&);
+__CV_V_INT16  v_expand_low(const __CV_V_INT8&);
+__CV_V_UINT32 v_expand_low(const __CV_V_UINT16&);
+__CV_V_INT32  v_expand_low(const __CV_V_INT16&);
+__CV_V_UINT64 v_expand_low(const __CV_V_UINT32&);
+__CV_V_INT64  v_expand_low(const __CV_V_INT32&);
+// High Expansion
+__CV_V_UINT16 v_expand_high(const __CV_V_UINT8&);
+__CV_V_INT16  v_expand_high(const __CV_V_INT8&);
+__CV_V_UINT32 v_expand_high(const __CV_V_UINT16&);
+__CV_V_INT32  v_expand_high(const __CV_V_INT16&);
+__CV_V_UINT64 v_expand_high(const __CV_V_UINT32&);
+__CV_V_INT64  v_expand_high(const __CV_V_INT32&);
+// Load & Low Expansion
+__CV_V_UINT16 __CV_VX(load_expand)(const uchar*);
+__CV_V_INT16  __CV_VX(load_expand)(const schar*);
+__CV_V_UINT32 __CV_VX(load_expand)(const ushort*);
+__CV_V_INT32  __CV_VX(load_expand)(const short*);
+__CV_V_UINT64 __CV_VX(load_expand)(const uint*);
+__CV_V_INT64  __CV_VX(load_expand)(const int*);
+// Load lower 8-bit and expand into 32-bit
+__CV_V_UINT32 __CV_VX(load_expand_q)(const uchar*);
+__CV_V_INT32  __CV_VX(load_expand_q)(const schar*);
+
+// Saturating Pack
+__CV_V_UINT8  v_pack(const __CV_V_UINT16&, const __CV_V_UINT16&);
+__CV_V_INT8   v_pack(const __CV_V_INT16&,  const __CV_V_INT16&);
+__CV_V_UINT16 v_pack(const __CV_V_UINT32&, const __CV_V_UINT32&);
+__CV_V_INT16  v_pack(const __CV_V_INT32&,  const __CV_V_INT32&);
+// Non-saturating Pack
+__CV_V_UINT32 v_pack(const __CV_V_UINT64&, const __CV_V_UINT64&);
+__CV_V_INT32  v_pack(const __CV_V_INT64&,  const __CV_V_INT64&);
+// Pack signed integers with unsigned saturation
+__CV_V_UINT8  v_pack_u(const __CV_V_INT16&, const __CV_V_INT16&);
+__CV_V_UINT16 v_pack_u(const __CV_V_INT32&, const __CV_V_INT32&);
+
+/** Arithmetic, bitwise and comparison operations **/
+
+// Non-saturating multiply
+#if CV_VSX
+template<typename Tvec>
+Tvec v_mul_wrap(const Tvec& a, const Tvec& b);
+#else
+__CV_V_UINT8  v_mul_wrap(const __CV_V_UINT8&,  const __CV_V_UINT8&);
+__CV_V_INT8   v_mul_wrap(const __CV_V_INT8&,   const __CV_V_INT8&);
+__CV_V_UINT16 v_mul_wrap(const __CV_V_UINT16&, const __CV_V_UINT16&);
+__CV_V_INT16  v_mul_wrap(const __CV_V_INT16&,  const __CV_V_INT16&);
+#endif
+
+//  Multiply and expand
+#if CV_VSX
+template<typename Tvec, typename Twvec>
+void v_mul_expand(const Tvec& a, const Tvec& b, Twvec& c, Twvec& d);
+#else
+void v_mul_expand(const __CV_V_UINT8&,  const __CV_V_UINT8&,  __CV_V_UINT16&, __CV_V_UINT16&);
+void v_mul_expand(const __CV_V_INT8&,   const __CV_V_INT8&,   __CV_V_INT16&,  __CV_V_INT16&);
+void v_mul_expand(const __CV_V_UINT16&, const __CV_V_UINT16&, __CV_V_UINT32&, __CV_V_UINT32&);
+void v_mul_expand(const __CV_V_INT16&,  const __CV_V_INT16&,  __CV_V_INT32&,  __CV_V_INT32&);
+void v_mul_expand(const __CV_V_UINT32&, const __CV_V_UINT32&, __CV_V_UINT64&, __CV_V_UINT64&);
+void v_mul_expand(const __CV_V_INT32&,  const __CV_V_INT32&,  __CV_V_INT64&,  __CV_V_INT64&);
+#endif
+
+// Conversions
+__CV_V_FLOAT32 v_cvt_f32(const __CV_V_INT32& a);
+__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a);
+__CV_V_FLOAT32 v_cvt_f32(const __CV_V_FLOAT64& a, const __CV_V_FLOAT64& b);
+__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT32& a);
+__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_INT32& a);
+__CV_V_FLOAT64 v_cvt_f64(const __CV_V_FLOAT32& a);
+__CV_V_FLOAT64 v_cvt_f64_high(const __CV_V_FLOAT32& a);
+__CV_V_FLOAT64 v_cvt_f64(const __CV_V_INT64& a);
+
+/** Cleanup **/
+#undef CV__SIMD_FORWARD
+#undef __CV_VX
+#undef __CV_V_UINT8
+#undef __CV_V_INT8
+#undef __CV_V_UINT16
+#undef __CV_V_INT16
+#undef __CV_V_UINT32
+#undef __CV_V_INT32
+#undef __CV_V_UINT64
+#undef __CV_V_INT64
+#undef __CV_V_FLOAT32
+#undef __CV_V_FLOAT64
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+} // cv::
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_msa.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_msa.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_neon.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_neon.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_rvv.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_rvv.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_rvv071.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_rvv071.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_sse.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_sse.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_sse_em.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_sse_em.hpp
@@ -0,0 +1,180 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#ifndef OPENCV_HAL_INTRIN_SSE_EM_HPP
+#define OPENCV_HAL_INTRIN_SSE_EM_HPP
+
+namespace cv
+{
+
+//! @cond IGNORED
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
+
+#define OPENCV_HAL_SSE_WRAP_1(fun, tp) \
+    inline tp _v128_##fun(const tp& a) \
+    { return _mm_##fun(a); }
+
+#define OPENCV_HAL_SSE_WRAP_2(fun, tp) \
+    inline tp _v128_##fun(const tp& a, const tp& b) \
+    { return _mm_##fun(a, b); }
+
+#define OPENCV_HAL_SSE_WRAP_3(fun, tp) \
+    inline tp _v128_##fun(const tp& a, const tp& b, const tp& c) \
+    { return _mm_##fun(a, b, c); }
+
+///////////////////////////// XOP /////////////////////////////
+
+// [todo] define CV_XOP
+#if 1 // CV_XOP
+inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b)
+{
+    const __m128i delta = _mm_set1_epi32((int)0x80000000);
+    return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
+}
+// wrapping XOP
+#else
+OPENCV_HAL_SSE_WRAP_2(_v128_comgt_epu32, __m128i)
+#endif // !CV_XOP
+
+///////////////////////////// SSE4.1 /////////////////////////////
+
+#if !CV_SSE4_1
+
+/** Swizzle **/
+inline __m128i _v128_blendv_epi8(const __m128i& a, const __m128i& b, const __m128i& mask)
+{ return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(b, a), mask)); }
+
+/** Convert **/
+// 8 >> 16
+inline __m128i _v128_cvtepu8_epi16(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpacklo_epi8(a, z);
+}
+inline __m128i _v128_cvtepi8_epi16(const __m128i& a)
+{ return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); }
+// 8 >> 32
+inline __m128i _v128_cvtepu8_epi32(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z);
+}
+inline __m128i _v128_cvtepi8_epi32(const __m128i& a)
+{
+    __m128i r = _mm_unpacklo_epi8(a, a);
+    r = _mm_unpacklo_epi8(r, r);
+    return _mm_srai_epi32(r, 24);
+}
+// 16 >> 32
+inline __m128i _v128_cvtepu16_epi32(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpacklo_epi16(a, z);
+}
+inline __m128i _v128_cvtepi16_epi32(const __m128i& a)
+{ return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); }
+// 32 >> 64
+inline __m128i _v128_cvtepu32_epi64(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpacklo_epi32(a, z);
+}
+inline __m128i _v128_cvtepi32_epi64(const __m128i& a)
+{ return _mm_unpacklo_epi32(a, _mm_srai_epi32(a, 31)); }
+
+/** Arithmetic **/
+inline __m128i _v128_mullo_epi32(const __m128i& a, const __m128i& b)
+{
+    __m128i c0 = _mm_mul_epu32(a, b);
+    __m128i c1 = _mm_mul_epu32(_mm_srli_epi64(a, 32), _mm_srli_epi64(b, 32));
+    __m128i d0 = _mm_unpacklo_epi32(c0, c1);
+    __m128i d1 = _mm_unpackhi_epi32(c0, c1);
+    return _mm_unpacklo_epi64(d0, d1);
+}
+
+/** Math **/
+inline __m128i _v128_min_epu32(const __m128i& a, const __m128i& b)
+{ return _v128_blendv_epi8(a, b, _v128_comgt_epu32(a, b)); }
+
+// wrapping SSE4.1
+#else
+OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi16, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi16, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepu8_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepi8_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepu16_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepi16_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepu32_epi64, __m128i)
+OPENCV_HAL_SSE_WRAP_1(cvtepi32_epi64, __m128i)
+OPENCV_HAL_SSE_WRAP_2(min_epu32, __m128i)
+OPENCV_HAL_SSE_WRAP_2(mullo_epi32, __m128i)
+OPENCV_HAL_SSE_WRAP_3(blendv_epi8, __m128i)
+#endif // !CV_SSE4_1
+
+///////////////////////////// Revolutionary /////////////////////////////
+
+/** Convert **/
+// 16 << 8
+inline __m128i _v128_cvtepu8_epi16_high(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpackhi_epi8(a, z);
+}
+inline __m128i _v128_cvtepi8_epi16_high(const __m128i& a)
+{ return _mm_srai_epi16(_mm_unpackhi_epi8(a, a), 8); }
+// 32 << 16
+inline __m128i _v128_cvtepu16_epi32_high(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpackhi_epi16(a, z);
+}
+inline __m128i _v128_cvtepi16_epi32_high(const __m128i& a)
+{ return _mm_srai_epi32(_mm_unpackhi_epi16(a, a), 16); }
+// 64 << 32
+inline __m128i _v128_cvtepu32_epi64_high(const __m128i& a)
+{
+    const __m128i z = _mm_setzero_si128();
+    return _mm_unpackhi_epi32(a, z);
+}
+inline __m128i _v128_cvtepi32_epi64_high(const __m128i& a)
+{ return _mm_unpackhi_epi32(a, _mm_srai_epi32(a, 31)); }
+
+/** Miscellaneous **/
+inline __m128i _v128_packs_epu32(const __m128i& a, const __m128i& b)
+{
+    const __m128i m = _mm_set1_epi32(65535);
+    __m128i am = _v128_min_epu32(a, m);
+    __m128i bm = _v128_min_epu32(b, m);
+#if CV_SSE4_1
+    return _mm_packus_epi32(am, bm);
+#else
+    const __m128i d = _mm_set1_epi32(32768), nd = _mm_set1_epi16(-32768);
+    am = _mm_sub_epi32(am, d);
+    bm = _mm_sub_epi32(bm, d);
+    am = _mm_packs_epi32(am, bm);
+    return _mm_sub_epi16(am, nd);
+#endif
+}
+
+template<int i>
+inline int64 _v128_extract_epi64(const __m128i& a)
+{
+#if defined(CV__SIMD_HAVE_mm_extract_epi64) || (CV_SSE4_1 && (defined(__x86_64__)/*GCC*/ || defined(_M_X64)/*MSVC*/))
+#define CV__SIMD_NATIVE_mm_extract_epi64 1
+    return _mm_extract_epi64(a, i);
+#else
+    CV_DECL_ALIGNED(16) int64 tmp[2];
+    _mm_store_si128((__m128i*)tmp, a);
+    return tmp[i];
+#endif
+}
+
+CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
+
+//! @endcond
+
+} // cv::
+
+#endif // OPENCV_HAL_INTRIN_SSE_EM_HPP
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_vsx.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_vsx.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/intrin_wasm.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/intrin_wasm.hpp
--- a/3rdparty/opencv/inc/opencv2/core/hal/msa_macros.h
+++ b/3rdparty/opencv/inc/opencv2/core/hal/msa_macros.h
--- a/3rdparty/opencv/inc/opencv2/core/hal/simd_utils.impl.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/hal/simd_utils.impl.hpp
@@ -0,0 +1,146 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This header is not standalone. Don't include directly, use "intrin.hpp" instead.
+#ifdef OPENCV_HAL_INTRIN_HPP  // defined in intrin.hpp
+
+
+#if CV_SIMD128 || CV_SIMD128_CPP
+
+template<typename _T> struct Type2Vec128_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC128_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec128_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uchar, v_uint8x16);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(schar, v_int8x16);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(ushort, v_uint16x8);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(short, v_int16x8);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(unsigned, v_uint32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int, v_int32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(float, v_float32x4);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(uint64, v_uint64x2);
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(int64, v_int64x2);
+#if CV_SIMD128_64F
+CV_INTRIN_DEF_TYPE2VEC128_TRAITS(double, v_float64x2);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec128_Traits<_T>::vec_type v_setall(const _T& a);
+
+template<> inline Type2Vec128_Traits< uchar>::vec_type v_setall< uchar>(const  uchar& a) { return v_setall_u8(a); }
+template<> inline Type2Vec128_Traits< schar>::vec_type v_setall< schar>(const  schar& a) { return v_setall_s8(a); }
+template<> inline Type2Vec128_Traits<ushort>::vec_type v_setall<ushort>(const ushort& a) { return v_setall_u16(a); }
+template<> inline Type2Vec128_Traits< short>::vec_type v_setall< short>(const  short& a) { return v_setall_s16(a); }
+template<> inline Type2Vec128_Traits<  uint>::vec_type v_setall<  uint>(const   uint& a) { return v_setall_u32(a); }
+template<> inline Type2Vec128_Traits<   int>::vec_type v_setall<   int>(const    int& a) { return v_setall_s32(a); }
+template<> inline Type2Vec128_Traits<uint64>::vec_type v_setall<uint64>(const uint64& a) { return v_setall_u64(a); }
+template<> inline Type2Vec128_Traits< int64>::vec_type v_setall< int64>(const  int64& a) { return v_setall_s64(a); }
+template<> inline Type2Vec128_Traits< float>::vec_type v_setall< float>(const  float& a) { return v_setall_f32(a); }
+#if CV_SIMD128_64F
+template<> inline Type2Vec128_Traits<double>::vec_type v_setall<double>(const double& a) { return v_setall_f64(a); }
+#endif
+
+#endif  // SIMD128
+
+
+#if CV_SIMD256
+
+template<typename _T> struct Type2Vec256_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC256_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec256_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uchar, v_uint8x32);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(schar, v_int8x32);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(ushort, v_uint16x16);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(short, v_int16x16);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(unsigned, v_uint32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int, v_int32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(float, v_float32x8);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(uint64, v_uint64x4);
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(int64, v_int64x4);
+#if CV_SIMD256_64F
+CV_INTRIN_DEF_TYPE2VEC256_TRAITS(double, v_float64x4);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec256_Traits<_T>::vec_type v256_setall(const _T& a);
+
+template<> inline Type2Vec256_Traits< uchar>::vec_type v256_setall< uchar>(const  uchar& a) { return v256_setall_u8(a); }
+template<> inline Type2Vec256_Traits< schar>::vec_type v256_setall< schar>(const  schar& a) { return v256_setall_s8(a); }
+template<> inline Type2Vec256_Traits<ushort>::vec_type v256_setall<ushort>(const ushort& a) { return v256_setall_u16(a); }
+template<> inline Type2Vec256_Traits< short>::vec_type v256_setall< short>(const  short& a) { return v256_setall_s16(a); }
+template<> inline Type2Vec256_Traits<  uint>::vec_type v256_setall<  uint>(const   uint& a) { return v256_setall_u32(a); }
+template<> inline Type2Vec256_Traits<   int>::vec_type v256_setall<   int>(const    int& a) { return v256_setall_s32(a); }
+template<> inline Type2Vec256_Traits<uint64>::vec_type v256_setall<uint64>(const uint64& a) { return v256_setall_u64(a); }
+template<> inline Type2Vec256_Traits< int64>::vec_type v256_setall< int64>(const  int64& a) { return v256_setall_s64(a); }
+template<> inline Type2Vec256_Traits< float>::vec_type v256_setall< float>(const  float& a) { return v256_setall_f32(a); }
+#if CV_SIMD256_64F
+template<> inline Type2Vec256_Traits<double>::vec_type v256_setall<double>(const double& a) { return v256_setall_f64(a); }
+#endif
+
+#endif  // SIMD256
+
+
+#if CV_SIMD512
+
+template<typename _T> struct Type2Vec512_Traits;
+#define CV_INTRIN_DEF_TYPE2VEC512_TRAITS(type_, vec_type_) \
+    template<> struct Type2Vec512_Traits<type_> \
+    { \
+        typedef vec_type_ vec_type; \
+    }
+
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uchar, v_uint8x64);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(schar, v_int8x64);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(ushort, v_uint16x32);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(short, v_int16x32);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(unsigned, v_uint32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int, v_int32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(float, v_float32x16);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(uint64, v_uint64x8);
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(int64, v_int64x8);
+#if CV_SIMD512_64F
+CV_INTRIN_DEF_TYPE2VEC512_TRAITS(double, v_float64x8);
+#endif
+
+template<typename _T> static inline
+typename Type2Vec512_Traits<_T>::vec_type v512_setall(const _T& a);
+
+template<> inline Type2Vec512_Traits< uchar>::vec_type v512_setall< uchar>(const  uchar& a) { return v512_setall_u8(a); }
+template<> inline Type2Vec512_Traits< schar>::vec_type v512_setall< schar>(const  schar& a) { return v512_setall_s8(a); }
+template<> inline Type2Vec512_Traits<ushort>::vec_type v512_setall<ushort>(const ushort& a) { return v512_setall_u16(a); }
+template<> inline Type2Vec512_Traits< short>::vec_type v512_setall< short>(const  short& a) { return v512_setall_s16(a); }
+template<> inline Type2Vec512_Traits<  uint>::vec_type v512_setall<  uint>(const   uint& a) { return v512_setall_u32(a); }
+template<> inline Type2Vec512_Traits<   int>::vec_type v512_setall<   int>(const    int& a) { return v512_setall_s32(a); }
+template<> inline Type2Vec512_Traits<uint64>::vec_type v512_setall<uint64>(const uint64& a) { return v512_setall_u64(a); }
+template<> inline Type2Vec512_Traits< int64>::vec_type v512_setall< int64>(const  int64& a) { return v512_setall_s64(a); }
+template<> inline Type2Vec512_Traits< float>::vec_type v512_setall< float>(const  float& a) { return v512_setall_f32(a); }
+#if CV_SIMD512_64F
+template<> inline Type2Vec512_Traits<double>::vec_type v512_setall<double>(const double& a) { return v512_setall_f64(a); }
+#endif
+
+#endif  // SIMD512
+
+
+#if CV_SIMD_WIDTH == 16
+template<typename _T> static inline
+typename Type2Vec128_Traits<_T>::vec_type vx_setall(const _T& a) { return v_setall(a); }
+#elif CV_SIMD_WIDTH == 32
+template<typename _T> static inline
+typename Type2Vec256_Traits<_T>::vec_type vx_setall(const _T& a) { return v256_setall(a); }
+#elif CV_SIMD_WIDTH == 64
+template<typename _T> static inline
+typename Type2Vec512_Traits<_T>::vec_type vx_setall(const _T& a) { return v512_setall(a); }
+#else
+#error "Build configuration error, unsupported CV_SIMD_WIDTH"
+#endif
+
+
+#endif  // OPENCV_HAL_INTRIN_HPP
--- a/3rdparty/opencv/inc/opencv2/core/mat.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/mat.hpp
--- a/3rdparty/opencv/inc/opencv2/core/mat.inl.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/mat.inl.hpp
--- a/3rdparty/opencv/inc/opencv2/core/matx.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/matx.hpp
--- a/3rdparty/opencv/inc/opencv2/core/neon_utils.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/neon_utils.hpp
@@ -0,0 +1,128 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2015, Itseez Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_HAL_NEON_UTILS_HPP
+#define OPENCV_HAL_NEON_UTILS_HPP
+
+#include "opencv2/core/cvdef.h"
+
+//! @addtogroup core_utils_neon
+//! @{
+
+#if CV_NEON
+
+inline int32x2_t cv_vrnd_s32_f32(float32x2_t v)
+{
+    static int32x2_t v_sign = vdup_n_s32(1 << 31),
+        v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
+
+    int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
+    return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
+}
+
+inline int32x4_t cv_vrndq_s32_f32(float32x4_t v)
+{
+    static int32x4_t v_sign = vdupq_n_s32(1 << 31),
+        v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
+
+    int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
+    return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
+}
+
+inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
+{
+    static float32x2_t v_05 = vdup_n_f32(0.5f);
+    return vcvt_u32_f32(vadd_f32(v, v_05));
+}
+
+inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
+{
+    static float32x4_t v_05 = vdupq_n_f32(0.5f);
+    return vcvtq_u32_f32(vaddq_f32(v, v_05));
+}
+
+inline float32x4_t cv_vrecpq_f32(float32x4_t val)
+{
+    float32x4_t reciprocal = vrecpeq_f32(val);
+    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
+    return reciprocal;
+}
+
+inline float32x2_t cv_vrecp_f32(float32x2_t val)
+{
+    float32x2_t reciprocal = vrecpe_f32(val);
+    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
+    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
+    return reciprocal;
+}
+
+inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
+{
+    float32x4_t e = vrsqrteq_f32(val);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
+    return e;
+}
+
+inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
+{
+    float32x2_t e = vrsqrte_f32(val);
+    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
+    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
+    return e;
+}
+
+inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
+{
+    return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
+}
+
+inline float32x2_t cv_vsqrt_f32(float32x2_t val)
+{
+    return cv_vrecp_f32(cv_vrsqrt_f32(val));
+}
+
+#endif
+
+//! @}
+
+#endif // OPENCV_HAL_NEON_UTILS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/ocl.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/ocl.hpp
@@ -0,0 +1,917 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OPENCL_HPP
+#define OPENCV_OPENCL_HPP
+
+#include "opencv2/core.hpp"
+#include <typeinfo>
+#include <typeindex>
+
+namespace cv { namespace ocl {
+
+//! @addtogroup core_opencl
+//! @{
+
+CV_EXPORTS_W bool haveOpenCL();
+CV_EXPORTS_W bool useOpenCL();
+CV_EXPORTS_W bool haveAmdBlas();
+CV_EXPORTS_W bool haveAmdFft();
+CV_EXPORTS_W void setUseOpenCL(bool flag);
+CV_EXPORTS_W void finish();
+
+CV_EXPORTS bool haveSVM();
+
+class CV_EXPORTS Context;
+class CV_EXPORTS_W_SIMPLE Device;
+class CV_EXPORTS Kernel;
+class CV_EXPORTS Program;
+class CV_EXPORTS ProgramSource;
+class CV_EXPORTS Queue;
+class CV_EXPORTS PlatformInfo;
+class CV_EXPORTS Image2D;
+
+class CV_EXPORTS_W_SIMPLE Device
+{
+public:
+    CV_WRAP Device() CV_NOEXCEPT;
+    explicit Device(void* d);
+    Device(const Device& d);
+    Device& operator = (const Device& d);
+    Device(Device&& d) CV_NOEXCEPT;
+    Device& operator = (Device&& d) CV_NOEXCEPT;
+    CV_WRAP ~Device();
+
+    void set(void* d);
+
+    enum
+    {
+        TYPE_DEFAULT     = (1 << 0),
+        TYPE_CPU         = (1 << 1),
+        TYPE_GPU         = (1 << 2),
+        TYPE_ACCELERATOR = (1 << 3),
+        TYPE_DGPU        = TYPE_GPU + (1 << 16),
+        TYPE_IGPU        = TYPE_GPU + (1 << 17),
+        TYPE_ALL         = 0xFFFFFFFF
+    };
+
+    CV_WRAP String name() const;
+    CV_WRAP String extensions() const;
+    CV_WRAP bool isExtensionSupported(const String& extensionName) const;
+    CV_WRAP String version() const;
+    CV_WRAP String vendorName() const;
+    CV_WRAP String OpenCL_C_Version() const;
+    CV_WRAP String OpenCLVersion() const;
+    CV_WRAP int deviceVersionMajor() const;
+    CV_WRAP int deviceVersionMinor() const;
+    CV_WRAP String driverVersion() const;
+    void* ptr() const;
+
+    CV_WRAP int type() const;
+
+    CV_WRAP int addressBits() const;
+    CV_WRAP bool available() const;
+    CV_WRAP bool compilerAvailable() const;
+    CV_WRAP bool linkerAvailable() const;
+
+    enum
+    {
+        FP_DENORM=(1 << 0),
+        FP_INF_NAN=(1 << 1),
+        FP_ROUND_TO_NEAREST=(1 << 2),
+        FP_ROUND_TO_ZERO=(1 << 3),
+        FP_ROUND_TO_INF=(1 << 4),
+        FP_FMA=(1 << 5),
+        FP_SOFT_FLOAT=(1 << 6),
+        FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
+    };
+    CV_WRAP int doubleFPConfig() const;
+    CV_WRAP int singleFPConfig() const;
+    CV_WRAP int halfFPConfig() const;
+
+    CV_WRAP bool endianLittle() const;
+    CV_WRAP bool errorCorrectionSupport() const;
+
+    enum
+    {
+        EXEC_KERNEL=(1 << 0),
+        EXEC_NATIVE_KERNEL=(1 << 1)
+    };
+    CV_WRAP int executionCapabilities() const;
+
+    CV_WRAP size_t globalMemCacheSize() const;
+
+    enum
+    {
+        NO_CACHE=0,
+        READ_ONLY_CACHE=1,
+        READ_WRITE_CACHE=2
+    };
+    CV_WRAP int globalMemCacheType() const;
+    CV_WRAP int globalMemCacheLineSize() const;
+    CV_WRAP size_t globalMemSize() const;
+
+    CV_WRAP size_t localMemSize() const;
+    enum
+    {
+        NO_LOCAL_MEM=0,
+        LOCAL_IS_LOCAL=1,
+        LOCAL_IS_GLOBAL=2
+    };
+    CV_WRAP int localMemType() const;
+    CV_WRAP bool hostUnifiedMemory() const;
+
+    CV_WRAP bool imageSupport() const;
+
+    CV_WRAP bool imageFromBufferSupport() const;
+    uint imagePitchAlignment() const;
+    uint imageBaseAddressAlignment() const;
+
+    /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
+    CV_WRAP bool intelSubgroupsSupport() const;
+
+    CV_WRAP size_t image2DMaxWidth() const;
+    CV_WRAP size_t image2DMaxHeight() const;
+
+    CV_WRAP size_t image3DMaxWidth() const;
+    CV_WRAP size_t image3DMaxHeight() const;
+    CV_WRAP size_t image3DMaxDepth() const;
+
+    CV_WRAP size_t imageMaxBufferSize() const;
+    CV_WRAP size_t imageMaxArraySize() const;
+
+    enum
+    {
+        UNKNOWN_VENDOR=0,
+        VENDOR_AMD=1,
+        VENDOR_INTEL=2,
+        VENDOR_NVIDIA=3
+    };
+    CV_WRAP int vendorID() const;
+    // FIXIT
+    // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
+    // This method should use platform name instead of vendor name.
+    // After fix restore code in arithm.cpp: ocl_compare()
+    CV_WRAP inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
+    CV_WRAP inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
+    CV_WRAP inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
+
+    CV_WRAP int maxClockFrequency() const;
+    CV_WRAP int maxComputeUnits() const;
+    CV_WRAP int maxConstantArgs() const;
+    CV_WRAP size_t maxConstantBufferSize() const;
+
+    CV_WRAP size_t maxMemAllocSize() const;
+    CV_WRAP size_t maxParameterSize() const;
+
+    CV_WRAP int maxReadImageArgs() const;
+    CV_WRAP int maxWriteImageArgs() const;
+    CV_WRAP int maxSamplers() const;
+
+    CV_WRAP size_t maxWorkGroupSize() const;
+    CV_WRAP int maxWorkItemDims() const;
+    void maxWorkItemSizes(size_t*) const;
+
+    CV_WRAP int memBaseAddrAlign() const;
+
+    CV_WRAP int nativeVectorWidthChar() const;
+    CV_WRAP int nativeVectorWidthShort() const;
+    CV_WRAP int nativeVectorWidthInt() const;
+    CV_WRAP int nativeVectorWidthLong() const;
+    CV_WRAP int nativeVectorWidthFloat() const;
+    CV_WRAP int nativeVectorWidthDouble() const;
+    CV_WRAP int nativeVectorWidthHalf() const;
+
+    CV_WRAP int preferredVectorWidthChar() const;
+    CV_WRAP int preferredVectorWidthShort() const;
+    CV_WRAP int preferredVectorWidthInt() const;
+    CV_WRAP int preferredVectorWidthLong() const;
+    CV_WRAP int preferredVectorWidthFloat() const;
+    CV_WRAP int preferredVectorWidthDouble() const;
+    CV_WRAP int preferredVectorWidthHalf() const;
+
+    CV_WRAP size_t printfBufferSize() const;
+    CV_WRAP size_t profilingTimerResolution() const;
+
+    CV_WRAP static const Device& getDefault();
+
+    /**
+     * @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success.
+     *
+     * @note Ownership of the passed device is passed to OpenCV on success.
+     * The caller should additionally call `clRetainDevice` on it if it intends
+     * to continue using the device.
+      */
+    static Device fromHandle(void* d);
+
+    struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
+protected:
+    Impl* p;
+};
+
+
+class CV_EXPORTS Context
+{
+public:
+    Context() CV_NOEXCEPT;
+    explicit Context(int dtype);  //!< @deprecated
+    ~Context();
+    Context(const Context& c);
+    Context& operator= (const Context& c);
+    Context(Context&& c) CV_NOEXCEPT;
+    Context& operator = (Context&& c) CV_NOEXCEPT;
+
+    /** @deprecated */
+    bool create();
+    /** @deprecated */
+    bool create(int dtype);
+
+    size_t ndevices() const;
+    Device& device(size_t idx) const;
+    Program getProg(const ProgramSource& prog,
+                    const String& buildopt, String& errmsg);
+    void unloadProg(Program& prog);
+
+
+    /** Get thread-local OpenCL context (initialize if necessary) */
+#if 0  // OpenCV 5.0
+    static Context& getDefault();
+#else
+    static Context& getDefault(bool initialize = true);
+#endif
+
+    /** @returns cl_context value */
+    void* ptr() const;
+
+    /**
+     * @brief Get OpenCL context property specified on context creation
+     * @param propertyId Property id (CL_CONTEXT_* as defined in cl_context_properties type)
+     * @returns Property value if property was specified on clCreateContext, or NULL if context created without the property
+     */
+    void* getOpenCLContextProperty(int propertyId) const;
+
+    bool useSVM() const;
+    void setUseSVM(bool enabled);
+
+    /**
+     * @param context OpenCL handle (cl_context). clRetainContext() is called on success
+     */
+    static Context fromHandle(void* context);
+    static Context fromDevice(const ocl::Device& device);
+    static Context create(const std::string& configuration);
+
+    void release();
+
+    class CV_EXPORTS UserContext {
+    public:
+        virtual ~UserContext();
+    };
+    template <typename T>
+    inline void setUserContext(const std::shared_ptr<T>& userContext) {
+        setUserContext(typeid(T), userContext);
+    }
+    template <typename T>
+    inline std::shared_ptr<T> getUserContext() {
+        return std::dynamic_pointer_cast<T>(getUserContext(typeid(T)));
+    }
+    void setUserContext(std::type_index typeId, const std::shared_ptr<UserContext>& userContext);
+    std::shared_ptr<UserContext> getUserContext(std::type_index typeId);
+
+    struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
+// TODO OpenCV 5.0
+//protected:
+    Impl* p;
+};
+
+/** @deprecated */
+class CV_EXPORTS Platform
+{
+public:
+    Platform() CV_NOEXCEPT;
+    ~Platform();
+    Platform(const Platform& p);
+    Platform& operator = (const Platform& p);
+    Platform(Platform&& p) CV_NOEXCEPT;
+    Platform& operator = (Platform&& p) CV_NOEXCEPT;
+
+    void* ptr() const;
+
+    /** @deprecated */
+    static Platform& getDefault();
+
+    struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
+protected:
+    Impl* p;
+};
+
+/** @brief Attaches OpenCL context to OpenCV
+@note
+  OpenCV will check if available OpenCL platform has platformName name, then assign context to
+  OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and
+  new command queue will be created.
+@param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
+@param platformID ID of platform attached context was created for
+@param context OpenCL context to be attached to OpenCV
+@param deviceID ID of device, must be created from attached context
+*/
+CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID);
+
+/** @brief Convert OpenCL buffer to UMat
+@note
+  OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory
+  content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and
+  `clRetainMemObject` is called.
+@param cl_mem_buffer source clBuffer handle
+@param step num of bytes in single row
+@param rows number of rows
+@param cols number of cols
+@param type OpenCV type of image
+@param dst destination UMat
+*/
+CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst);
+
+/** @brief Convert OpenCL image2d_t to UMat
+@note
+  OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content
+  is copied from image to UMat with `clEnqueueCopyImageToBuffer` function.
+@param cl_mem_image source image2d_t handle
+@param dst destination UMat
+*/
+CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
+
+// TODO Move to internal header
+/// @deprecated
+void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
+
+class CV_EXPORTS Queue
+{
+public:
+    Queue() CV_NOEXCEPT;
+    explicit Queue(const Context& c, const Device& d=Device());
+    ~Queue();
+    Queue(const Queue& q);
+    Queue& operator = (const Queue& q);
+    Queue(Queue&& q) CV_NOEXCEPT;
+    Queue& operator = (Queue&& q) CV_NOEXCEPT;
+
+    bool create(const Context& c=Context(), const Device& d=Device());
+    void finish();
+    void* ptr() const;
+    static Queue& getDefault();
+
+    /// @brief Returns OpenCL command queue with enable profiling mode support
+    const Queue& getProfilingQueue() const;
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return p; }
+    inline bool empty() const { return !p; }
+protected:
+    Impl* p;
+};
+
+
+class CV_EXPORTS KernelArg
+{
+public:
+    enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
+    KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
+    KernelArg() CV_NOEXCEPT;
+
+    static KernelArg Local(size_t localMemSize)
+    { return KernelArg(LOCAL, 0, 1, 1, 0, localMemSize); }
+    static KernelArg PtrWriteOnly(const UMat& m)
+    { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
+    static KernelArg PtrReadOnly(const UMat& m)
+    { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
+    static KernelArg PtrReadWrite(const UMat& m)
+    { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
+    static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); }
+    static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); }
+    static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
+    { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
+    static KernelArg Constant(const Mat& m);
+    template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
+    { return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); }
+
+    int flags;
+    UMat* m;
+    const void* obj;
+    size_t sz;
+    int wscale, iwscale;
+};
+
+
+class CV_EXPORTS Kernel
+{
+public:
+    Kernel() CV_NOEXCEPT;
+    Kernel(const char* kname, const Program& prog);
+    Kernel(const char* kname, const ProgramSource& prog,
+           const String& buildopts = String(), String* errmsg=0);
+    ~Kernel();
+    Kernel(const Kernel& k);
+    Kernel& operator = (const Kernel& k);
+    Kernel(Kernel&& k) CV_NOEXCEPT;
+    Kernel& operator = (Kernel&& k) CV_NOEXCEPT;
+
+    bool empty() const;
+    bool create(const char* kname, const Program& prog);
+    bool create(const char* kname, const ProgramSource& prog,
+                const String& buildopts, String* errmsg=0);
+
+    int set(int i, const void* value, size_t sz);
+    int set(int i, const Image2D& image2D);
+    int set(int i, const UMat& m);
+    int set(int i, const KernelArg& arg);
+    template<typename _Tp> int set(int i, const _Tp& value)
+    { return set(i, &value, sizeof(value)); }
+
+
+protected:
+    template<typename _Tp0> inline
+    int set_args_(int i, const _Tp0& a0) { return set(i, a0); }
+    template<typename _Tp0, typename... _Tps> inline
+    int set_args_(int i, const _Tp0& a0, const _Tps&... rest_args) { i = set(i, a0); return set_args_(i, rest_args...); }
+public:
+    /** @brief Setup OpenCL Kernel arguments.
+    Avoid direct using of set(i, ...) methods.
+    @code
+    bool ok = kernel
+        .args(
+            srcUMat, dstUMat,
+            (float)some_float_param
+        ).run(ndims, globalSize, localSize);
+    if (!ok) return false;
+    @endcode
+    */
+    template<typename... _Tps> inline
+    Kernel& args(const _Tps&... kernel_args) { set_args_(0, kernel_args...); return *this; }
+
+    /** @brief Run the OpenCL kernel (globalsize value may be adjusted)
+
+    @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
+    @param globalsize work items for each dimension. It is not the final globalsize passed to
+      OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding
+      value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The
+      adjusted values are greater than or equal to the original values.
+    @param localsize work-group size for each dimension.
+    @param sync specify whether to wait for OpenCL computation to finish before return.
+    @param q command queue
+
+    @note Use run_() if your kernel code doesn't support adjusted globalsize.
+    */
+    bool run(int dims, size_t globalsize[],
+             size_t localsize[], bool sync, const Queue& q=Queue());
+
+    /** @brief Run the OpenCL kernel
+     *
+     * @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
+     * @param globalsize work items for each dimension. This value is passed to OpenCL without changes.
+     * @param localsize work-group size for each dimension.
+     * @param sync specify whether to wait for OpenCL computation to finish before return.
+     * @param q command queue
+     */
+    bool run_(int dims, size_t globalsize[], size_t localsize[], bool sync, const Queue& q=Queue());
+
+    bool runTask(bool sync, const Queue& q=Queue());
+
+    /** @brief Similar to synchronized run_() call with returning of kernel execution time
+     *
+     * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
+     * @return Execution time in nanoseconds or negative number on error
+     */
+    int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue());
+
+    size_t workGroupSize() const;
+    size_t preferedWorkGroupSizeMultiple() const;
+    bool compileWorkGroupSize(size_t wsz[]) const;
+    size_t localMemSize() const;
+
+    void* ptr() const;
+    struct Impl;
+
+protected:
+    Impl* p;
+};
+
+class CV_EXPORTS Program
+{
+public:
+    Program() CV_NOEXCEPT;
+    Program(const ProgramSource& src,
+            const String& buildflags, String& errmsg);
+    Program(const Program& prog);
+    Program& operator = (const Program& prog);
+    Program(Program&& prog) CV_NOEXCEPT;
+    Program& operator = (Program&& prog) CV_NOEXCEPT;
+    ~Program();
+
+    bool create(const ProgramSource& src,
+                const String& buildflags, String& errmsg);
+
+    void* ptr() const;
+
+    /**
+     * @brief Query device-specific program binary.
+     *
+     * Returns RAW OpenCL executable binary without additional attachments.
+     *
+     * @sa ProgramSource::fromBinary
+     *
+     * @param[out] binary output buffer
+     */
+    void getBinary(std::vector<char>& binary) const;
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
+protected:
+    Impl* p;
+public:
+#ifndef OPENCV_REMOVE_DEPRECATED_API
+    // TODO Remove this
+    CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead
+    CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary)
+    CV_DEPRECATED const ProgramSource& source() const; // implementation removed
+    CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced
+    CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced
+#endif
+};
+
+
+class CV_EXPORTS ProgramSource
+{
+public:
+    typedef uint64 hash_t; // deprecated
+
+    ProgramSource() CV_NOEXCEPT;
+    explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash);
+    explicit ProgramSource(const String& prog); // deprecated
+    explicit ProgramSource(const char* prog); // deprecated
+    ~ProgramSource();
+    ProgramSource(const ProgramSource& prog);
+    ProgramSource& operator = (const ProgramSource& prog);
+    ProgramSource(ProgramSource&& prog) CV_NOEXCEPT;
+    ProgramSource& operator = (ProgramSource&& prog) CV_NOEXCEPT;
+
+    const String& source() const; // deprecated
+    hash_t hash() const; // deprecated
+
+
+    /** @brief Describe OpenCL program binary.
+     * Do not call clCreateProgramWithBinary() and/or clBuildProgram().
+     *
+     * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
+     *
+     * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version.
+     *
+     * @param module name of program owner module
+     * @param name unique name of program (module+name is used as key for OpenCL program caching)
+     * @param binary buffer address. See buffer lifetime requirement in description.
+     * @param size buffer size
+     * @param buildOptions additional program-related build options passed to clBuildProgram()
+     * @return created ProgramSource object
+     */
+    static ProgramSource fromBinary(const String& module, const String& name,
+            const unsigned char* binary, const size_t size,
+            const cv::String& buildOptions = cv::String());
+
+    /** @brief Describe OpenCL program in SPIR format.
+     * Do not call clCreateProgramWithBinary() and/or clBuildProgram().
+     *
+     * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior)
+     *
+     * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
+     *
+     * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension:
+     * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html
+     * (but they are not portable between different platforms: 32-bit / 64-bit)
+     *
+     * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'.
+     *
+     * @param module name of program owner module
+     * @param name unique name of program (module+name is used as key for OpenCL program caching)
+     * @param binary buffer address. See buffer lifetime requirement in description.
+     * @param size buffer size
+     * @param buildOptions additional program-related build options passed to clBuildProgram()
+     *        (these options are added automatically: '-x spir' and '-spir-std=1.2')
+     * @return created ProgramSource object.
+     */
+    static ProgramSource fromSPIR(const String& module, const String& name,
+            const unsigned char* binary, const size_t size,
+            const cv::String& buildOptions = cv::String());
+
+    //OpenCL 2.1+ only
+    //static Program fromSPIRV(const String& module, const String& name,
+    //        const unsigned char* binary, const size_t size,
+    //        const cv::String& buildOptions = cv::String());
+
+    struct Impl; friend struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
+protected:
+    Impl* p;
+};
+
+class CV_EXPORTS PlatformInfo
+{
+public:
+    PlatformInfo() CV_NOEXCEPT;
+    /**
+     * @param id pointer cl_platform_id (cl_platform_id*)
+     */
+    explicit PlatformInfo(void* id);
+    ~PlatformInfo();
+
+    PlatformInfo(const PlatformInfo& i);
+    PlatformInfo& operator =(const PlatformInfo& i);
+    PlatformInfo(PlatformInfo&& i) CV_NOEXCEPT;
+    PlatformInfo& operator = (PlatformInfo&& i) CV_NOEXCEPT;
+
+    String name() const;
+    String vendor() const;
+
+    /// See CL_PLATFORM_VERSION
+    String version() const;
+    int versionMajor() const;
+    int versionMinor() const;
+
+    int deviceNumber() const;
+    void getDevice(Device& device, int d) const;
+
+    struct Impl;
+    bool empty() const { return !p; }
+protected:
+    Impl* p;
+};
+
+CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
+CV_EXPORTS const char* typeToStr(int t);
+CV_EXPORTS const char* memopTypeToStr(int t);
+CV_EXPORTS const char* vecopTypeToStr(int t);
+CV_EXPORTS const char* getOpenCLErrorString(int errorCode);
+CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
+CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
+
+
+enum OclVectorStrategy
+{
+    // all matrices have its own vector width
+    OCL_VECTOR_OWN = 0,
+    // all matrices have maximal vector width among all matrices
+    // (useful for cases when matrices have different data types)
+    OCL_VECTOR_MAX = 1,
+
+    // default strategy
+    OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
+};
+
+CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                         InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                         InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
+                                         OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
+
+CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
+                                       InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                       InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                       InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
+                                       OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
+
+// with OCL_VECTOR_MAX strategy
+CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+                                            InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+                                            InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
+
+CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
+
+class CV_EXPORTS Image2D
+{
+public:
+    Image2D() CV_NOEXCEPT;
+
+    /**
+    @param src UMat object from which to get image properties and data
+    @param norm flag to enable the use of normalized channel data types
+    @param alias flag indicating that the image should alias the src UMat. If true, changes to the
+        image or src will be reflected in both objects.
+    */
+    explicit Image2D(const UMat &src, bool norm = false, bool alias = false);
+    Image2D(const Image2D & i);
+    ~Image2D();
+
+    Image2D & operator = (const Image2D & i);
+    Image2D(Image2D &&) CV_NOEXCEPT;
+    Image2D &operator=(Image2D &&) CV_NOEXCEPT;
+
+    /** Indicates if creating an aliased image should succeed.
+    Depends on the underlying platform and the dimensions of the UMat.
+    */
+    static bool canCreateAlias(const UMat &u);
+
+    /** Indicates if the image format is supported.
+    */
+    static bool isFormatSupported(int depth, int cn, bool norm);
+
+    void* ptr() const;
+protected:
+    struct Impl;
+    Impl* p;
+};
+
+class CV_EXPORTS Timer
+{
+public:
+    Timer(const Queue& q);
+    ~Timer();
+    void start();
+    void stop();
+
+    uint64 durationNS() const; //< duration in nanoseconds
+
+protected:
+    struct Impl;
+    Impl* const p;
+
+private:
+    Timer(const Timer&); // disabled
+    Timer& operator=(const Timer&); // disabled
+};
+
+CV_EXPORTS MatAllocator* getOpenCLAllocator();
+
+
+class CV_EXPORTS_W OpenCLExecutionContext
+{
+public:
+    OpenCLExecutionContext() = default;
+    ~OpenCLExecutionContext() = default;
+
+    OpenCLExecutionContext(const OpenCLExecutionContext&) = default;
+    OpenCLExecutionContext(OpenCLExecutionContext&&) = default;
+
+    OpenCLExecutionContext& operator=(const OpenCLExecutionContext&) = default;
+    OpenCLExecutionContext& operator=(OpenCLExecutionContext&&) = default;
+
+    /** Get associated ocl::Context */
+    Context& getContext() const;
+    /** Get the single default associated ocl::Device */
+    Device& getDevice() const;
+    /** Get the single ocl::Queue that is associated with the ocl::Context and
+     *  the single default ocl::Device
+     */
+    Queue& getQueue() const;
+
+    bool useOpenCL() const;
+    void setUseOpenCL(bool flag);
+
+    /** Get OpenCL execution context of current thread.
+     *
+     * Initialize OpenCL execution context if it is empty
+     * - create new
+     * - reuse context of the main thread (threadID = 0)
+     */
+    static OpenCLExecutionContext& getCurrent();
+
+    /** Get OpenCL execution context of current thread (can be empty) */
+    static OpenCLExecutionContext& getCurrentRef();
+
+    /** Bind this OpenCL execution context to current thread.
+     *
+     * Context can't be empty.
+     *
+     * @note clFinish is not called for queue of previous execution context
+     */
+    void bind() const;
+
+    /** Creates new execution context with same OpenCV context and device
+     *
+     * @param q OpenCL queue
+     */
+    OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const;
+    /** @overload */
+    OpenCLExecutionContext cloneWithNewQueue() const;
+
+    /** @brief Creates OpenCL execution context
+     * OpenCV will check if available OpenCL platform has platformName name,
+     * then assign context to OpenCV.
+     * The deviceID device will be used as target device and a new command queue will be created.
+     *
+     * @note On success, ownership of one reference of the context and device is taken.
+     * The caller should additionally call `clRetainContext` and/or `clRetainDevice`
+     * to increase the reference count if it wishes to continue using them.
+     *
+     * @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
+     * @param platformID ID of platform attached context was created for (cl_platform_id)
+     * @param context OpenCL context to be attached to OpenCV (cl_context)
+     * @param deviceID OpenCL device (cl_device_id)
+     */
+    static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID);
+
+    /** @brief Creates OpenCL execution context
+     *
+     * @param context non-empty OpenCL context
+     * @param device non-empty OpenCL device (must be a part of context)
+     * @param queue non-empty OpenCL queue for provided context and device
+     */
+    static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue);
+    /** @overload */
+    static OpenCLExecutionContext create(const Context& context, const Device& device);
+
+    struct Impl;
+    inline bool empty() const { return !p; }
+    void release();
+protected:
+    std::shared_ptr<Impl> p;
+};
+
+class OpenCLExecutionContextScope
+{
+    OpenCLExecutionContext ctx_;
+public:
+    inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx)
+    {
+        CV_Assert(!ctx.empty());
+        ctx_ = OpenCLExecutionContext::getCurrentRef();
+        ctx.bind();
+    }
+
+    inline ~OpenCLExecutionContextScope()
+    {
+        if (!ctx_.empty())
+        {
+            ctx_.bind();
+        }
+    }
+};
+
+#ifdef __OPENCV_BUILD
+namespace internal {
+
+CV_EXPORTS bool isOpenCLForced();
+#define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition))
+
+CV_EXPORTS bool isPerformanceCheckBypassed();
+#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
+
+CV_EXPORTS bool isCLBuffer(UMat& u);
+
+} // namespace internal
+#endif
+
+//! @}
+
+}}
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/core/ocl_genbase.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/ocl_genbase.hpp
@@ -0,0 +1,69 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_OPENCL_GENBASE_HPP
+#define OPENCV_OPENCL_GENBASE_HPP
+
+//! @cond IGNORED
+
+namespace cv {
+namespace ocl {
+
+class ProgramSource;
+
+namespace internal {
+
+struct CV_EXPORTS ProgramEntry
+{
+    const char* module;
+    const char* name;
+    const char* programCode;
+    const char* programHash;
+    ProgramSource* pProgramSource;
+
+    operator ProgramSource& () const;
+};
+
+} } } // namespace
+
+//! @endcond
+
+#endif
--- a/3rdparty/opencv/inc/opencv2/core/opencl/ocl_defs.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/opencl/ocl_defs.hpp
@@ -0,0 +1,82 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef OPENCV_CORE_OPENCL_DEFS_HPP
+#define OPENCV_CORE_OPENCL_DEFS_HPP
+
+#include "opencv2/core/utility.hpp"
+#include "cvconfig.h"
+
+namespace cv { namespace ocl {
+#ifdef HAVE_OPENCL
+/// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context
+CV_EXPORTS bool isOpenCLActivated();
+#else
+static inline bool isOpenCLActivated() { return false; }
+#endif
+}} // namespace
+
+
+//#define CV_OPENCL_RUN_ASSERT
+
+#ifdef HAVE_OPENCL
+
+#ifdef CV_OPENCL_RUN_VERBOSE
+#define CV_OCL_RUN_(condition, func, ...)                                   \
+    {                                                                       \
+        if (cv::ocl::isOpenCLActivated() && (condition) && func)            \
+        {                                                                   \
+            printf("%s: OpenCL implementation is running\n", CV_Func);      \
+            fflush(stdout);                                                 \
+            CV_IMPL_ADD(CV_IMPL_OCL);                                       \
+            return __VA_ARGS__;                                             \
+        }                                                                   \
+        else                                                                \
+        {                                                                   \
+            printf("%s: Plain implementation is running\n", CV_Func);       \
+            fflush(stdout);                                                 \
+        }                                                                   \
+    }
+#elif defined CV_OPENCL_RUN_ASSERT
+#define CV_OCL_RUN_(condition, func, ...)                                   \
+    {                                                                       \
+        if (cv::ocl::isOpenCLActivated() && (condition))                    \
+        {                                                                   \
+            if(func)                                                        \
+            {                                                               \
+                CV_IMPL_ADD(CV_IMPL_OCL);                                   \
+            }                                                               \
+            else                                                            \
+            {                                                               \
+                CV_Error(cv::Error::StsAssert, #func);                      \
+            }                                                               \
+            return __VA_ARGS__;                                             \
+        }                                                                   \
+    }
+#else
+#define CV_OCL_RUN_(condition, func, ...)                                   \
+try \
+{ \
+    if (cv::ocl::isOpenCLActivated() && (condition) && func)                \
+    {                                                                       \
+        CV_IMPL_ADD(CV_IMPL_OCL);                                           \
+        return __VA_ARGS__;                                                 \
+    } \
+} \
+catch (const cv::Exception& e) \
+{ \
+    CV_UNUSED(e); /* TODO: Add some logging here */ \
+}
+#endif
+
+#else
+#define CV_OCL_RUN_(condition, func, ...)
+#endif
+
+#define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func)
+
+#endif // OPENCV_CORE_OPENCL_DEFS_HPP
--- a/3rdparty/opencv/inc/opencv2/core/opencl/opencl_info.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/opencl/opencl_info.hpp
@@ -0,0 +1,212 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include <iostream>
+
+#include <opencv2/core.hpp>
+#include <opencv2/core/ocl.hpp>
+
+#ifndef DUMP_CONFIG_PROPERTY
+#define DUMP_CONFIG_PROPERTY(...)
+#endif
+
+#ifndef DUMP_MESSAGE_STDOUT
+#define DUMP_MESSAGE_STDOUT(...) do { std::cout << __VA_ARGS__ << std::endl; } while (false)
+#endif
+
+namespace cv {
+
+namespace {
+static std::string bytesToStringRepr(size_t value)
+{
+    size_t b = value % 1024;
+    value /= 1024;
+
+    size_t kb = value % 1024;
+    value /= 1024;
+
+    size_t mb = value % 1024;
+    value /= 1024;
+
+    size_t gb = value;
+
+    std::ostringstream stream;
+
+    if (gb > 0)
+        stream << gb << " GB ";
+    if (mb > 0)
+        stream << mb << " MB ";
+    if (kb > 0)
+        stream << kb << " KB ";
+    if (b > 0)
+        stream << b << " B";
+
+    std::string s = stream.str();
+    if (s[s.size() - 1] == ' ')
+        s = s.substr(0, s.size() - 1);
+    return s;
+}
+
+static String getDeviceTypeString(const cv::ocl::Device& device)
+{
+    if (device.type() == cv::ocl::Device::TYPE_CPU) {
+        return "CPU";
+    }
+
+    if (device.type() == cv::ocl::Device::TYPE_GPU) {
+        if (device.hostUnifiedMemory()) {
+            return "iGPU";
+        } else {
+            return "dGPU";
+        }
+    }
+
+    return "unknown";
+}
+} // namespace
+
+static void dumpOpenCLInformation()
+{
+    using namespace cv::ocl;
+
+    try
+    {
+        if (!haveOpenCL() || !useOpenCL())
+        {
+            DUMP_MESSAGE_STDOUT("OpenCL is disabled");
+            DUMP_CONFIG_PROPERTY("cv_ocl", "disabled");
+            return;
+        }
+
+        std::vector<PlatformInfo> platforms;
+        cv::ocl::getPlatfomsInfo(platforms);
+        if (platforms.empty())
+        {
+            DUMP_MESSAGE_STDOUT("OpenCL is not available");
+            DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
+            return;
+        }
+
+        DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
+        for (size_t i = 0; i < platforms.size(); i++)
+        {
+            const PlatformInfo* platform = &platforms[i];
+            DUMP_MESSAGE_STDOUT("    " << platform->name());
+            Device current_device;
+            for (int j = 0; j < platform->deviceNumber(); j++)
+            {
+                platform->getDevice(current_device, j);
+                String deviceTypeStr = getDeviceTypeString(current_device);
+                DUMP_MESSAGE_STDOUT( "        " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")");
+                DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ),
+                    cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)",
+                    platform->name().c_str(), deviceTypeStr.c_str(), current_device.name().c_str(), current_device.version().c_str()) );
+            }
+        }
+        const Device& device = Device::getDefault();
+        if (!device.available())
+            CV_Error(Error::OpenCLInitError, "OpenCL device is not available");
+
+        DUMP_MESSAGE_STDOUT("Current OpenCL device: ");
+
+        String deviceTypeStr = getDeviceTypeString(device);
+        DUMP_MESSAGE_STDOUT("    Type = " << deviceTypeStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr);
+
+        DUMP_MESSAGE_STDOUT("    Name = " << device.name());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceName", device.name());
+
+        DUMP_MESSAGE_STDOUT("    Version = " << device.version());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceVersion", device.version());
+
+        DUMP_MESSAGE_STDOUT("    Driver version = " << device.driverVersion());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_driverVersion", device.driverVersion());
+
+        DUMP_MESSAGE_STDOUT("    Address bits = " << device.addressBits());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_addressBits", device.addressBits());
+
+        DUMP_MESSAGE_STDOUT("    Compute units = " << device.maxComputeUnits());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxComputeUnits", device.maxComputeUnits());
+
+        DUMP_MESSAGE_STDOUT("    Max work group size = " << device.maxWorkGroupSize());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxWorkGroupSize", device.maxWorkGroupSize());
+
+        std::string localMemorySizeStr = bytesToStringRepr(device.localMemSize());
+        DUMP_MESSAGE_STDOUT("    Local memory size = " << localMemorySizeStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_localMemSize", device.localMemSize());
+
+        std::string maxMemAllocSizeStr = bytesToStringRepr(device.maxMemAllocSize());
+        DUMP_MESSAGE_STDOUT("    Max memory allocation size = " << maxMemAllocSizeStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxMemAllocSize", device.maxMemAllocSize());
+
+        const char* doubleSupportStr = device.doubleFPConfig() > 0 ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Double support = " << doubleSupportStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0);
+
+        const char* halfSupportStr = device.halfFPConfig() > 0 ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Half support = " << halfSupportStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_haveHalfSupport", device.halfFPConfig() > 0);
+
+        const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Host unified memory = " << isUnifiedMemoryStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory());
+
+        DUMP_MESSAGE_STDOUT("    Device extensions:");
+        String extensionsStr = device.extensions();
+        size_t pos = 0;
+        while (pos < extensionsStr.size())
+        {
+            size_t pos2 = extensionsStr.find(' ', pos);
+            if (pos2 == String::npos)
+                pos2 = extensionsStr.size();
+            if (pos2 > pos)
+            {
+                String extensionName = extensionsStr.substr(pos, pos2 - pos);
+                DUMP_MESSAGE_STDOUT("        " << extensionName);
+            }
+            pos = pos2 + 1;
+        }
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr);
+
+        const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Has AMD Blas = " << haveAmdBlasStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdBlas", haveAmdBlas());
+
+        const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No";
+        DUMP_MESSAGE_STDOUT("    Has AMD Fft = " << haveAmdFftStr);
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdFft", haveAmdFft());
+
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width char = " << device.preferredVectorWidthChar());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width short = " << device.preferredVectorWidthShort());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width int = " << device.preferredVectorWidthInt());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width long = " << device.preferredVectorWidthLong());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width float = " << device.preferredVectorWidthFloat());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width double = " << device.preferredVectorWidthDouble());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble());
+
+        DUMP_MESSAGE_STDOUT("    Preferred vector width half = " << device.preferredVectorWidthHalf());
+        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthHalf", device.preferredVectorWidthHalf());
+    }
+    catch (...)
+    {
+        DUMP_MESSAGE_STDOUT("Exception. Can't dump OpenCL info");
+        DUMP_MESSAGE_STDOUT("OpenCL device not available");
+        DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
+    }
+}
+#undef DUMP_MESSAGE_STDOUT
+#undef DUMP_CONFIG_PROPERTY
+
+} // namespace
--- a/3rdparty/opencv/inc/opencv2/core/opencl/opencl_svm.hpp
+++ b/3rdparty/opencv/inc/opencv2/core/opencl/opencl_svm.hpp
@@ -0,0 +1,81 @@
+/* See LICENSE file in the root OpenCV directory */
+
+#ifndef OPENCV_CORE_OPENCL_SVM_HPP
+#define OPENCV_CORE_OPENCL_SVM_HPP
+
+//
+// Internal usage only (binary compatibility is not guaranteed)
+//
+#ifndef __OPENCV_BUILD
+#error Internal header file
+#endif
+
+#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
+#include "runtime/opencl_core.hpp"
+#include "runtime/opencl_svm_20.hpp"
+#include "runtime/opencl_svm_hsa_extension.hpp"
+
+namespace cv { namespace ocl { namespace svm {
+
+struct SVMCapabilities
+{
+    enum Value
+    {
+        SVM_COARSE_GRAIN_BUFFER = (1 << 0),
+        SVM_FINE_GRAIN_BUFFER = (1 << 1),
+        SVM_FINE_GRAIN_SYSTEM = (1 << 2),
+        SVM_ATOMICS = (1 << 3),
+    };
+    int value_;
+
+    SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
+    operator int() const { return value_; }
+
+    inline bool isNoSVMSupport() const { return value_ == 0; }
+    inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
+    inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
+    inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
+    inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
+};
+
+CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
+
+struct SVMFunctions
+{
+    clSVMAllocAMD_fn fn_clSVMAlloc;
+    clSVMFreeAMD_fn fn_clSVMFree;
+    clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
+    //clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
+    //clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
+    clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
+    clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
+    clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
+    clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
+
+    inline SVMFunctions()
+        : fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
+          fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
+          /*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
+          fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
+    {
+        // nothing
+    }
+
+    inline bool isValid() const
+    {
+        return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
+                /*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
+                fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
+    }
+};
+
+// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
+CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
+
+CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
+
+}}} //namespace cv::ocl::svm
+#endif
+
+#endif // OPENCV_CORE_OPENCL_SVM_HPP
+/* End of file. */
--- a/Show More
+++ b/Show More