添加项目文件。

This commit is contained in:
CaiXiang
2025-01-20 10:30:01 +08:00
parent 77371da5d7
commit 752be79e06
1010 changed files with 610100 additions and 0 deletions

View File

@@ -0,0 +1,271 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
#ifndef __OPENCV_RGBD_COLORED_KINFU_HPP__
#define __OPENCV_RGBD_COLORED_KINFU_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/affine.hpp"
#include <opencv2/rgbd/volume.hpp>
namespace cv {
namespace colored_kinfu {
//! @addtogroup kinect_fusion
//! @{
struct CV_EXPORTS_W Params
{
CV_WRAP Params(){}
/**
* @brief Constructor for Params
* Sets the initial pose of the TSDF volume.
* @param volumeInitialPoseRot rotation matrix
* @param volumeInitialPoseTransl translation vector
*/
CV_WRAP Params(Matx33f volumeInitialPoseRot, Vec3f volumeInitialPoseTransl)
{
setInitialVolumePose(volumeInitialPoseRot,volumeInitialPoseTransl);
}
/**
* @brief Constructor for Params
* Sets the initial pose of the TSDF volume.
* @param volumeInitialPose 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume
*/
CV_WRAP Params(Matx44f volumeInitialPose)
{
setInitialVolumePose(volumeInitialPose);
}
/**
* @brief Set Initial Volume Pose
* Sets the initial pose of the TSDF volume.
* @param R rotation matrix
* @param t translation vector
*/
CV_WRAP void setInitialVolumePose(Matx33f R, Vec3f t);
/**
* @brief Set Initial Volume Pose
* Sets the initial pose of the TSDF volume.
* @param homogen_tf 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume
*/
CV_WRAP void setInitialVolumePose(Matx44f homogen_tf);
/**
* @brief Default parameters
* A set of parameters which provides better model quality, can be very slow.
*/
CV_WRAP static Ptr<Params> defaultParams();
/** @brief Coarse parameters
A set of parameters which provides better speed, can fail to match frames
in case of rapid sensor motion.
*/
CV_WRAP static Ptr<Params> coarseParams();
/** @brief HashTSDF parameters
A set of parameters suitable for use with HashTSDFVolume
*/
CV_WRAP static Ptr<Params> hashTSDFParams(bool isCoarse);
/** @brief ColoredTSDF parameters
A set of parameters suitable for use with HashTSDFVolume
*/
CV_WRAP static Ptr<Params> coloredTSDFParams(bool isCoarse);
/** @brief frame size in pixels */
CV_PROP_RW Size frameSize;
/** @brief rgb frame size in pixels */
CV_PROP_RW Size rgb_frameSize;
CV_PROP_RW kinfu::VolumeType volumeType;
/** @brief camera intrinsics */
CV_PROP_RW Matx33f intr;
/** @brief rgb camera intrinsics */
CV_PROP_RW Matx33f rgb_intr;
/** @brief pre-scale per 1 meter for input values
Typical values are:
* 5000 per 1 meter for the 16-bit PNG files of TUM database
* 1000 per 1 meter for Kinect 2 device
* 1 per 1 meter for the 32-bit float images in the ROS bag files
*/
CV_PROP_RW float depthFactor;
/** @brief Depth sigma in meters for bilateral smooth */
CV_PROP_RW float bilateral_sigma_depth;
/** @brief Spatial sigma in pixels for bilateral smooth */
CV_PROP_RW float bilateral_sigma_spatial;
/** @brief Kernel size in pixels for bilateral smooth */
CV_PROP_RW int bilateral_kernel_size;
/** @brief Number of pyramid levels for ICP */
CV_PROP_RW int pyramidLevels;
/** @brief Resolution of voxel space
Number of voxels in each dimension.
*/
CV_PROP_RW Vec3i volumeDims;
/** @brief Size of voxel in meters */
CV_PROP_RW float voxelSize;
/** @brief Minimal camera movement in meters
Integrate new depth frame only if camera movement exceeds this value.
*/
CV_PROP_RW float tsdf_min_camera_movement;
/** @brief initial volume pose in meters */
Affine3f volumePose;
/** @brief distance to truncate in meters
Distances to surface that exceed this value will be truncated to 1.0.
*/
CV_PROP_RW float tsdf_trunc_dist;
/** @brief max number of frames per voxel
Each voxel keeps running average of distances no longer than this value.
*/
CV_PROP_RW int tsdf_max_weight;
/** @brief A length of one raycast step
How much voxel sizes we skip each raycast step
*/
CV_PROP_RW float raycast_step_factor;
// gradient delta in voxel sizes
// fixed at 1.0f
// float gradient_delta_factor;
/** @brief light pose for rendering in meters */
CV_PROP_RW Vec3f lightPose;
/** @brief distance theshold for ICP in meters */
CV_PROP_RW float icpDistThresh;
/** angle threshold for ICP in radians */
CV_PROP_RW float icpAngleThresh;
/** number of ICP iterations for each pyramid level */
CV_PROP_RW std::vector<int> icpIterations;
/** @brief Threshold for depth truncation in meters
All depth values beyond this threshold will be set to zero
*/
CV_PROP_RW float truncateThreshold;
};
/** @brief KinectFusion implementation
This class implements a 3d reconstruction algorithm described in
@cite kinectfusion paper.
It takes a sequence of depth images taken from depth sensor
(or any depth images source such as stereo camera matching algorithm or even raymarching renderer).
The output can be obtained as a vector of points and their normals
or can be Phong-rendered from given camera pose.
An internal representation of a model is a voxel cuboid that keeps TSDF values
which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF).
There is no interface to that representation yet.
KinFu uses OpenCL acceleration automatically if available.
To enable or disable it explicitly use cv::setUseOptimized() or cv::ocl::setUseOpenCL().
This implementation is based on [kinfu-remake](https://github.com/Nerei/kinfu_remake).
Note that the KinectFusion algorithm was patented and its use may be restricted by
the list of patents mentioned in README.md file in this module directory.
That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use KinectFusion.
*/
class CV_EXPORTS_W ColoredKinFu
{
public:
CV_WRAP static Ptr<ColoredKinFu> create(const Ptr<Params>& _params);
virtual ~ColoredKinFu();
/** @brief Get current parameters */
virtual const Params& getParams() const = 0;
/** @brief Renders a volume into an image
Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
Light pose is fixed in KinFu params.
@param image resulting image
*/
CV_WRAP virtual void render(OutputArray image) const = 0;
/** @brief Renders a volume into an image
Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
Light pose is fixed in KinFu params.
@param image resulting image
@param cameraPose pose of camera to render from. If empty then render from current pose
which is a last frame camera pose.
*/
CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose) const = 0;
/** @brief Gets points and normals of current 3d mesh
The order of normals corresponds to order of points.
The order of points is undefined.
@param points vector of points which are 4-float vectors
@param normals vector of normals which are 4-float vectors
*/
CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0;
/** @brief Gets points of current 3d mesh
The order of points is undefined.
@param points vector of points which are 4-float vectors
*/
CV_WRAP virtual void getPoints(OutputArray points) const = 0;
/** @brief Calculates normals for given points
@param points input vector of points which are 4-float vectors
@param normals output vector of corresponding normals which are 4-float vectors
*/
CV_WRAP virtual void getNormals(InputArray points, OutputArray normals) const = 0;
/** @brief Resets the algorithm
Clears current model and resets a pose.
*/
CV_WRAP virtual void reset() = 0;
/** @brief Get current pose in voxel space */
virtual const Affine3f getPose() const = 0;
/** @brief Process next depth frame
@param depth input Mat of depth frame
@param rgb input Mat of rgb (colored) frame
@return true if succeeded to align new frame with current scene, false if opposite
*/
CV_WRAP virtual bool update(InputArray depth, InputArray rgb) = 0;
};
//! @}
}
}
#endif

1193
3rdparty/opencv/inc/opencv2/rgbd/depth.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,62 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#ifndef OPENCV_RGBD_POSE_GRAPH_HPP
#define OPENCV_RGBD_POSE_GRAPH_HPP
#include "opencv2/core/affine.hpp"
#include "opencv2/core/quaternion.hpp"
namespace cv
{
namespace kinfu
{
namespace detail
{
// ATTENTION! This class is used internally in Large KinFu.
// It has been pushed to publicly available headers for tests only.
// Source compatibility of this API is not guaranteed in the future.
// This class provides tools to solve so-called pose graph problem often arisen in SLAM problems
// The pose graph format, cost function and optimization techniques
// repeat the ones used in Ceres 3D Pose Graph Optimization:
// http://ceres-solver.org/nnls_tutorial.html#other-examples, pose_graph_3d.cc bullet
class CV_EXPORTS_W PoseGraph
{
public:
static Ptr<PoseGraph> create();
virtual ~PoseGraph();
// Node may have any id >= 0
virtual void addNode(size_t _nodeId, const Affine3d& _pose, bool fixed) = 0;
virtual bool isNodeExist(size_t nodeId) const = 0;
virtual bool setNodeFixed(size_t nodeId, bool fixed) = 0;
virtual bool isNodeFixed(size_t nodeId) const = 0;
virtual Affine3d getNodePose(size_t nodeId) const = 0;
virtual std::vector<size_t> getNodesIds() const = 0;
virtual size_t getNumNodes() const = 0;
// Edges have consequent indices starting from 0
virtual void addEdge(size_t _sourceNodeId, size_t _targetNodeId, const Affine3f& _transformation,
const Matx66f& _information = Matx66f::eye()) = 0;
virtual size_t getEdgeStart(size_t i) const = 0;
virtual size_t getEdgeEnd(size_t i) const = 0;
virtual size_t getNumEdges() const = 0;
// checks if graph is connected and each edge connects exactly 2 nodes
virtual bool isValid() const = 0;
// Termination criteria are max number of iterations and min relative energy change to current energy
// Returns number of iterations elapsed or -1 if max number of iterations was reached or failed to optimize
virtual int optimize(const cv::TermCriteria& tc = cv::TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 100, 1e-6)) = 0;
// calculate cost function based on current nodes parameters
virtual double calcEnergy() const = 0;
};
} // namespace detail
} // namespace kinfu
} // namespace cv
#endif /* ifndef OPENCV_RGBD_POSE_GRAPH_HPP */

View File

@@ -0,0 +1,120 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
#ifndef __OPENCV_RGBD_DYNAFU_HPP__
#define __OPENCV_RGBD_DYNAFU_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/affine.hpp"
#include "kinfu.hpp"
namespace cv {
namespace dynafu {
/** @brief DynamicFusion implementation
This class implements a 3d reconstruction algorithm as described in @cite dynamicfusion.
It takes a sequence of depth images taken from depth sensor
(or any depth images source such as stereo camera matching algorithm or even raymarching renderer).
The output can be obtained as a vector of points and their normals
or can be Phong-rendered from given camera pose.
It extends the KinectFusion algorithm to handle non-rigidly deforming scenes by maintaining a sparse
set of nodes covering the geometry such that each node contains a warp to transform it from a canonical
space to the live frame.
An internal representation of a model is a voxel cuboid that keeps TSDF values
which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF).
There is no interface to that representation yet.
Note that DynamicFusion is based on the KinectFusion algorithm which is patented and its use may be
restricted by the list of patents mentioned in README.md file in this module directory.
That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use DynamicFusion.
*/
/** Backwards compatibility for old versions */
using Params = kinfu::Params;
class CV_EXPORTS_W DynaFu
{
public:
CV_WRAP static Ptr<DynaFu> create(const Ptr<kinfu::Params>& _params);
virtual ~DynaFu();
/** @brief Get current parameters */
virtual const kinfu::Params& getParams() const = 0;
/** @brief Renders a volume into an image
Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
Light pose is fixed in DynaFu params.
@param image resulting image
@param cameraPose pose of camera to render from. If empty then render from current pose
which is a last frame camera pose.
*/
CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose = Matx44f::eye()) const = 0;
/** @brief Gets points and normals of current 3d mesh
The order of normals corresponds to order of points.
The order of points is undefined.
@param points vector of points which are 4-float vectors
@param normals vector of normals which are 4-float vectors
*/
CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0;
/** @brief Gets points of current 3d mesh
The order of points is undefined.
@param points vector of points which are 4-float vectors
*/
CV_WRAP virtual void getPoints(OutputArray points) const = 0;
/** @brief Calculates normals for given points
@param points input vector of points which are 4-float vectors
@param normals output vector of corresponding normals which are 4-float vectors
*/
CV_WRAP virtual void getNormals(InputArray points, OutputArray normals) const = 0;
/** @brief Resets the algorithm
Clears current model and resets a pose.
*/
CV_WRAP virtual void reset() = 0;
/** @brief Get current pose in voxel space */
virtual const Affine3f getPose() const = 0;
/** @brief Process next depth frame
Integrates depth into voxel space with respect to its ICP-calculated pose.
Input image is converted to CV_32F internally if has another type.
@param depth one-channel image which size and depth scale is described in algorithm's parameters
@return true if succeeded to align new frame with current scene, false if opposite
*/
CV_WRAP virtual bool update(InputArray depth) = 0;
virtual std::vector<Point3f> getNodesPos() const = 0;
virtual void marchCubes(OutputArray vertices, OutputArray edges) const = 0;
virtual void renderSurface(OutputArray depthImage, OutputArray vertImage, OutputArray normImage, bool warp=true) = 0;
};
//! @}
}
}
#endif

View File

@@ -0,0 +1,81 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#ifndef __OPENCV_RGBD_INTRINSICS_HPP__
#define __OPENCV_RGBD_INTRINSICS_HPP__
#include "opencv2/core/matx.hpp"
namespace cv
{
namespace kinfu
{
struct Intr
{
/** @brief Camera intrinsics */
/** Reprojects screen point to camera space given z coord. */
struct Reprojector
{
Reprojector() {}
inline Reprojector(Intr intr)
{
fxinv = 1.f/intr.fx, fyinv = 1.f/intr.fy;
cx = intr.cx, cy = intr.cy;
}
template<typename T>
inline cv::Point3_<T> operator()(cv::Point3_<T> p) const
{
T x = p.z * (p.x - cx) * fxinv;
T y = p.z * (p.y - cy) * fyinv;
return cv::Point3_<T>(x, y, p.z);
}
float fxinv, fyinv, cx, cy;
};
/** Projects camera space vector onto screen */
struct Projector
{
inline Projector(Intr intr) : fx(intr.fx), fy(intr.fy), cx(intr.cx), cy(intr.cy) { }
template<typename T>
inline cv::Point_<T> operator()(cv::Point3_<T> p) const
{
T invz = T(1)/p.z;
T x = fx*(p.x*invz) + cx;
T y = fy*(p.y*invz) + cy;
return cv::Point_<T>(x, y);
}
template<typename T>
inline cv::Point_<T> operator()(cv::Point3_<T> p, cv::Point3_<T>& pixVec) const
{
T invz = T(1)/p.z;
pixVec = cv::Point3_<T>(p.x*invz, p.y*invz, 1);
T x = fx*pixVec.x + cx;
T y = fy*pixVec.y + cy;
return cv::Point_<T>(x, y);
}
float fx, fy, cx, cy;
};
Intr() : fx(), fy(), cx(), cy() { }
Intr(float _fx, float _fy, float _cx, float _cy) : fx(_fx), fy(_fy), cx(_cx), cy(_cy) { }
Intr(cv::Matx33f m) : fx(m(0, 0)), fy(m(1, 1)), cx(m(0, 2)), cy(m(1, 2)) { }
// scale intrinsics to pyramid level
inline Intr scale(int pyr) const
{
float factor = (1.f /(1 << pyr));
return Intr(fx*factor, fy*factor, cx*factor, cy*factor);
}
inline Reprojector makeReprojector() const { return Reprojector(*this); }
inline Projector makeProjector() const { return Projector(*this); }
inline cv::Matx33f getMat() const { return Matx33f(fx, 0, cx, 0, fy, cy, 0, 0, 1); }
float fx, fy, cx, cy;
};
} // namespace rgbd
} // namespace cv
#endif

View File

@@ -0,0 +1,270 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
#ifndef __OPENCV_RGBD_KINFU_HPP__
#define __OPENCV_RGBD_KINFU_HPP__
#include "opencv2/core.hpp"
#include "opencv2/core/affine.hpp"
#include <opencv2/rgbd/volume.hpp>
namespace cv {
namespace kinfu {
//! @addtogroup kinect_fusion
//! @{
struct CV_EXPORTS_W Params
{
CV_WRAP Params(){}
/**
* @brief Constructor for Params
* Sets the initial pose of the TSDF volume.
* @param volumeInitialPoseRot rotation matrix
* @param volumeInitialPoseTransl translation vector
*/
CV_WRAP Params(Matx33f volumeInitialPoseRot, Vec3f volumeInitialPoseTransl)
{
setInitialVolumePose(volumeInitialPoseRot,volumeInitialPoseTransl);
}
/**
* @brief Constructor for Params
* Sets the initial pose of the TSDF volume.
* @param volumeInitialPose 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume
*/
CV_WRAP Params(Matx44f volumeInitialPose)
{
setInitialVolumePose(volumeInitialPose);
}
/**
* @brief Set Initial Volume Pose
* Sets the initial pose of the TSDF volume.
* @param R rotation matrix
* @param t translation vector
*/
CV_WRAP void setInitialVolumePose(Matx33f R, Vec3f t);
/**
* @brief Set Initial Volume Pose
* Sets the initial pose of the TSDF volume.
* @param homogen_tf 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume
*/
CV_WRAP void setInitialVolumePose(Matx44f homogen_tf);
/**
* @brief Default parameters
* A set of parameters which provides better model quality, can be very slow.
*/
CV_WRAP static Ptr<Params> defaultParams();
/** @brief Coarse parameters
A set of parameters which provides better speed, can fail to match frames
in case of rapid sensor motion.
*/
CV_WRAP static Ptr<Params> coarseParams();
/** @brief HashTSDF parameters
A set of parameters suitable for use with HashTSDFVolume
*/
CV_WRAP static Ptr<Params> hashTSDFParams(bool isCoarse);
/** @brief ColoredTSDF parameters
A set of parameters suitable for use with ColoredTSDFVolume
*/
CV_WRAP static Ptr<Params> coloredTSDFParams(bool isCoarse);
/** @brief frame size in pixels */
CV_PROP_RW Size frameSize;
/** @brief rgb frame size in pixels */
CV_PROP_RW kinfu::VolumeType volumeType;
/** @brief camera intrinsics */
CV_PROP_RW Matx33f intr;
/** @brief rgb camera intrinsics */
CV_PROP_RW Matx33f rgb_intr;
/** @brief pre-scale per 1 meter for input values
Typical values are:
* 5000 per 1 meter for the 16-bit PNG files of TUM database
* 1000 per 1 meter for Kinect 2 device
* 1 per 1 meter for the 32-bit float images in the ROS bag files
*/
CV_PROP_RW float depthFactor;
/** @brief Depth sigma in meters for bilateral smooth */
CV_PROP_RW float bilateral_sigma_depth;
/** @brief Spatial sigma in pixels for bilateral smooth */
CV_PROP_RW float bilateral_sigma_spatial;
/** @brief Kernel size in pixels for bilateral smooth */
CV_PROP_RW int bilateral_kernel_size;
/** @brief Number of pyramid levels for ICP */
CV_PROP_RW int pyramidLevels;
/** @brief Resolution of voxel space
Number of voxels in each dimension.
*/
CV_PROP_RW Vec3i volumeDims;
/** @brief Size of voxel in meters */
CV_PROP_RW float voxelSize;
/** @brief Minimal camera movement in meters
Integrate new depth frame only if camera movement exceeds this value.
*/
CV_PROP_RW float tsdf_min_camera_movement;
/** @brief initial volume pose in meters */
Affine3f volumePose;
/** @brief distance to truncate in meters
Distances to surface that exceed this value will be truncated to 1.0.
*/
CV_PROP_RW float tsdf_trunc_dist;
/** @brief max number of frames per voxel
Each voxel keeps running average of distances no longer than this value.
*/
CV_PROP_RW int tsdf_max_weight;
/** @brief A length of one raycast step
How much voxel sizes we skip each raycast step
*/
CV_PROP_RW float raycast_step_factor;
// gradient delta in voxel sizes
// fixed at 1.0f
// float gradient_delta_factor;
/** @brief light pose for rendering in meters */
CV_PROP_RW Vec3f lightPose;
/** @brief distance theshold for ICP in meters */
CV_PROP_RW float icpDistThresh;
/** angle threshold for ICP in radians */
CV_PROP_RW float icpAngleThresh;
/** number of ICP iterations for each pyramid level */
CV_PROP_RW std::vector<int> icpIterations;
/** @brief Threshold for depth truncation in meters
All depth values beyond this threshold will be set to zero
*/
CV_PROP_RW float truncateThreshold;
};
/** @brief KinectFusion implementation
This class implements a 3d reconstruction algorithm described in
@cite kinectfusion paper.
It takes a sequence of depth images taken from depth sensor
(or any depth images source such as stereo camera matching algorithm or even raymarching renderer).
The output can be obtained as a vector of points and their normals
or can be Phong-rendered from given camera pose.
An internal representation of a model is a voxel cuboid that keeps TSDF values
which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF).
There is no interface to that representation yet.
KinFu uses OpenCL acceleration automatically if available.
To enable or disable it explicitly use cv::setUseOptimized() or cv::ocl::setUseOpenCL().
This implementation is based on [kinfu-remake](https://github.com/Nerei/kinfu_remake).
Note that the KinectFusion algorithm was patented and its use may be restricted by
the list of patents mentioned in README.md file in this module directory.
That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use KinectFusion.
*/
class CV_EXPORTS_W KinFu
{
public:
CV_WRAP static Ptr<KinFu> create(const Ptr<Params>& _params);
virtual ~KinFu();
/** @brief Get current parameters */
virtual const Params& getParams() const = 0;
/** @brief Renders a volume into an image
Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
Light pose is fixed in KinFu params.
@param image resulting image
*/
CV_WRAP virtual void render(OutputArray image) const = 0;
/** @brief Renders a volume into an image
Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
Light pose is fixed in KinFu params.
@param image resulting image
@param cameraPose pose of camera to render from. If empty then render from current pose
which is a last frame camera pose.
*/
CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose) const = 0;
/** @brief Gets points and normals of current 3d mesh
The order of normals corresponds to order of points.
The order of points is undefined.
@param points vector of points which are 4-float vectors
@param normals vector of normals which are 4-float vectors
*/
CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0;
/** @brief Gets points of current 3d mesh
The order of points is undefined.
@param points vector of points which are 4-float vectors
*/
CV_WRAP virtual void getPoints(OutputArray points) const = 0;
/** @brief Calculates normals for given points
@param points input vector of points which are 4-float vectors
@param normals output vector of corresponding normals which are 4-float vectors
*/
CV_WRAP virtual void getNormals(InputArray points, OutputArray normals) const = 0;
/** @brief Resets the algorithm
Clears current model and resets a pose.
*/
CV_WRAP virtual void reset() = 0;
/** @brief Get current pose in voxel space */
virtual const Affine3f getPose() const = 0;
/** @brief Process next depth frame
Integrates depth into voxel space with respect to its ICP-calculated pose.
Input image is converted to CV_32F internally if has another type.
@param depth one-channel image which size and depth scale is described in algorithm's parameters
@return true if succeeded to align new frame with current scene, false if opposite
*/
CV_WRAP virtual bool update(InputArray depth) = 0;
};
//! @}
}
}
#endif

View File

@@ -0,0 +1,146 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this
// module's directory
#ifndef __OPENCV_RGBD_LARGEKINFU_HPP__
#define __OPENCV_RGBD_LARGEKINFU_HPP__
#include <opencv2/rgbd/volume.hpp>
#include "opencv2/core.hpp"
#include "opencv2/core/affine.hpp"
namespace cv
{
namespace large_kinfu
{
struct CV_EXPORTS_W Params
{
/** @brief Default parameters
A set of parameters which provides better model quality, can be very slow.
*/
CV_WRAP static Ptr<Params> defaultParams();
/** @brief Coarse parameters
A set of parameters which provides better speed, can fail to match frames
in case of rapid sensor motion.
*/
CV_WRAP static Ptr<Params> coarseParams();
/** @brief HashTSDF parameters
A set of parameters suitable for use with HashTSDFVolume
*/
CV_WRAP static Ptr<Params> hashTSDFParams(bool isCoarse);
/** @brief frame size in pixels */
CV_PROP_RW Size frameSize;
/** @brief camera intrinsics */
CV_PROP_RW Matx33f intr;
/** @brief rgb camera intrinsics */
CV_PROP_RW Matx33f rgb_intr;
/** @brief pre-scale per 1 meter for input values
Typical values are:
* 5000 per 1 meter for the 16-bit PNG files of TUM database
* 1000 per 1 meter for Kinect 2 device
* 1 per 1 meter for the 32-bit float images in the ROS bag files
*/
CV_PROP_RW float depthFactor;
/** @brief Depth sigma in meters for bilateral smooth */
CV_PROP_RW float bilateral_sigma_depth;
/** @brief Spatial sigma in pixels for bilateral smooth */
CV_PROP_RW float bilateral_sigma_spatial;
/** @brief Kernel size in pixels for bilateral smooth */
CV_PROP_RW int bilateral_kernel_size;
/** @brief Number of pyramid levels for ICP */
CV_PROP_RW int pyramidLevels;
/** @brief Minimal camera movement in meters
Integrate new depth frame only if camera movement exceeds this value.
*/
CV_PROP_RW float tsdf_min_camera_movement;
/** @brief light pose for rendering in meters */
CV_PROP_RW Vec3f lightPose;
/** @brief distance theshold for ICP in meters */
CV_PROP_RW float icpDistThresh;
/** @brief angle threshold for ICP in radians */
CV_PROP_RW float icpAngleThresh;
/** @brief number of ICP iterations for each pyramid level */
CV_PROP_RW std::vector<int> icpIterations;
/** @brief Threshold for depth truncation in meters
All depth values beyond this threshold will be set to zero
*/
CV_PROP_RW float truncateThreshold;
/** @brief Volume parameters
*/
kinfu::VolumeParams volumeParams;
};
/** @brief Large Scale Dense Depth Fusion implementation
This class implements a 3d reconstruction algorithm for larger environments using
Spatially hashed TSDF volume "Submaps".
It also runs a periodic posegraph optimization to minimize drift in tracking over long sequences.
Currently the algorithm does not implement a relocalization or loop closure module.
Potentially a Bag of words implementation or RGBD relocalization as described in
Glocker et al. ISMAR 2013 will be implemented
It takes a sequence of depth images taken from depth sensor
(or any depth images source such as stereo camera matching algorithm or even raymarching
renderer). The output can be obtained as a vector of points and their normals or can be
Phong-rendered from given camera pose.
An internal representation of a model is a spatially hashed voxel cube that stores TSDF values
which represent the distance to the closest surface (for details read the @cite kinectfusion article
about TSDF). There is no interface to that representation yet.
For posegraph optimization, a Submap abstraction over the Volume class is created.
New submaps are added to the model when there is low visibility overlap between current viewing frustrum
and the existing volume/model. Multiple submaps are simultaneously tracked and a posegraph is created and
optimized periodically.
LargeKinfu does not use any OpenCL acceleration yet.
To enable or disable it explicitly use cv::setUseOptimized() or cv::ocl::setUseOpenCL().
This implementation is inspired from Kintinuous, InfiniTAM and other SOTA algorithms
You need to set the OPENCV_ENABLE_NONFREE option in CMake to use KinectFusion.
*/
class CV_EXPORTS_W LargeKinfu
{
public:
CV_WRAP static Ptr<LargeKinfu> create(const Ptr<Params>& _params);
virtual ~LargeKinfu() = default;
virtual const Params& getParams() const = 0;
CV_WRAP virtual void render(OutputArray image) const = 0;
CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose) const = 0;
CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0;
CV_WRAP virtual void getPoints(OutputArray points) const = 0;
CV_WRAP virtual void getNormals(InputArray points, OutputArray normals) const = 0;
CV_WRAP virtual void reset() = 0;
virtual const Affine3f getPose() const = 0;
CV_WRAP virtual bool update(InputArray depth) = 0;
};
} // namespace large_kinfu
} // namespace cv
#endif

View File

@@ -0,0 +1,435 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_WillowGarage.md file found in this module's directory
#ifndef __OPENCV_RGBD_LINEMOD_HPP__
#define __OPENCV_RGBD_LINEMOD_HPP__
#include "opencv2/core.hpp"
#include <map>
/****************************************************************************************\
* LINE-MOD *
\****************************************************************************************/
namespace cv {
namespace linemod {
//! @addtogroup rgbd
//! @{
/**
* \brief Discriminant feature described by its location and label.
*/
struct CV_EXPORTS_W_SIMPLE Feature
{
CV_PROP_RW int x; ///< x offset
CV_PROP_RW int y; ///< y offset
CV_PROP_RW int label; ///< Quantization
CV_WRAP Feature() : x(0), y(0), label(0) {}
CV_WRAP Feature(int x, int y, int label);
void read(const FileNode& fn);
void write(FileStorage& fs) const;
};
inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
struct CV_EXPORTS_W_SIMPLE Template
{
CV_PROP int width;
CV_PROP int height;
CV_PROP int pyramid_level;
CV_PROP std::vector<Feature> features;
void read(const FileNode& fn);
void write(FileStorage& fs) const;
};
/**
* \brief Represents a modality operating over an image pyramid.
*/
class CV_EXPORTS_W QuantizedPyramid
{
public:
// Virtual destructor
virtual ~QuantizedPyramid() {}
/**
* \brief Compute quantized image at current pyramid level for online detection.
*
* \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
* representing its classification.
*/
CV_WRAP virtual void quantize(CV_OUT Mat& dst) const =0;
/**
* \brief Extract most discriminant features at current pyramid level to form a new template.
*
* \param[out] templ The new template.
*/
CV_WRAP virtual bool extractTemplate(CV_OUT Template& templ) const =0;
/**
* \brief Go to the next pyramid level.
*
* \todo Allow pyramid scale factor other than 2
*/
CV_WRAP virtual void pyrDown() =0;
protected:
/// Candidate feature with a score
struct Candidate
{
Candidate(int x, int y, int label, float score);
/// Sort candidates with high score to the front
bool operator<(const Candidate& rhs) const
{
return score > rhs.score;
}
Feature f;
float score;
};
/**
* \brief Choose candidate features so that they are not bunched together.
*
* \param[in] candidates Candidate features sorted by score.
* \param[out] features Destination vector of selected features.
* \param[in] num_features Number of candidates to select.
* \param[in] distance Hint for desired distance between features.
*/
static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
std::vector<Feature>& features,
size_t num_features, float distance);
};
inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}
/**
* \brief Interface for modalities that plug into the LINE template matching representation.
*
* \todo Max response, to allow optimization of summing (255/MAX) features as uint8
*/
class CV_EXPORTS_W Modality
{
public:
// Virtual destructor
virtual ~Modality() {}
/**
* \brief Form a quantized image pyramid from a source image.
*
* \param[in] src The source image. Type depends on the modality.
* \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
* in quantized image and cannot be extracted as features.
*/
CV_WRAP Ptr<QuantizedPyramid> process(const Mat& src,
const Mat& mask = Mat()) const
{
return processImpl(src, mask);
}
CV_WRAP virtual String name() const =0;
CV_WRAP virtual void read(const FileNode& fn) =0;
virtual void write(FileStorage& fs) const =0;
/**
* \brief Create modality by name.
*
* The following modality types are supported:
* - "ColorGradient"
* - "DepthNormal"
*/
CV_WRAP static Ptr<Modality> create(const String& modality_type);
/**
* \brief Load a modality from file.
*/
CV_WRAP static Ptr<Modality> create(const FileNode& fn);
protected:
// Indirection is because process() has a default parameter.
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
const Mat& mask) const =0;
};
/**
* \brief Modality that computes quantized gradient orientations from a color image.
*/
class CV_EXPORTS_W ColorGradient : public Modality
{
public:
/**
* \brief Default constructor. Uses reasonable default parameter values.
*/
ColorGradient();
/**
* \brief Constructor.
*
* \param weak_threshold When quantizing, discard gradients with magnitude less than this.
* \param num_features How many features a template must contain.
* \param strong_threshold Consider as candidate features only gradients whose norms are
* larger than this.
*/
ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
CV_WRAP static Ptr<ColorGradient> create(float weak_threshold, size_t num_features, float strong_threshold);
virtual String name() const CV_OVERRIDE;
virtual void read(const FileNode& fn) CV_OVERRIDE;
virtual void write(FileStorage& fs) const CV_OVERRIDE;
CV_PROP float weak_threshold;
CV_PROP size_t num_features;
CV_PROP float strong_threshold;
protected:
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
const Mat& mask) const CV_OVERRIDE;
};
/**
* \brief Modality that computes quantized surface normals from a dense depth map.
*/
class CV_EXPORTS_W DepthNormal : public Modality
{
public:
/**
* \brief Default constructor. Uses reasonable default parameter values.
*/
DepthNormal();
/**
* \brief Constructor.
*
* \param distance_threshold Ignore pixels beyond this distance.
* \param difference_threshold When computing normals, ignore contributions of pixels whose
* depth difference with the central pixel is above this threshold.
* \param num_features How many features a template must contain.
* \param extract_threshold Consider as candidate feature only if there are no differing
* orientations within a distance of extract_threshold.
*/
DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
int extract_threshold);
CV_WRAP static Ptr<DepthNormal> create(int distance_threshold, int difference_threshold,
size_t num_features, int extract_threshold);
virtual String name() const CV_OVERRIDE;
virtual void read(const FileNode& fn) CV_OVERRIDE;
virtual void write(FileStorage& fs) const CV_OVERRIDE;
CV_PROP int distance_threshold;
CV_PROP int difference_threshold;
CV_PROP size_t num_features;
CV_PROP int extract_threshold;
protected:
virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
const Mat& mask) const CV_OVERRIDE;
};
/**
* \brief Debug function to colormap a quantized image for viewing.
*/
CV_EXPORTS_W void colormap(const Mat& quantized, CV_OUT Mat& dst);
/**
* \brief Debug function to draw linemod features
* @param img
* @param templates see @ref Detector::addTemplate
* @param tl template bbox top-left offset see @ref Detector::addTemplate
* @param size marker size see @ref cv::drawMarker
*/
CV_EXPORTS_W void drawFeatures(InputOutputArray img, const std::vector<Template>& templates, const Point2i& tl, int size = 10);
/**
* \brief Represents a successful template match.
*/
struct CV_EXPORTS_W_SIMPLE Match
{
CV_WRAP Match()
{
}
CV_WRAP Match(int x, int y, float similarity, const String& class_id, int template_id);
/// Sort matches with high similarity to the front
bool operator<(const Match& rhs) const
{
// Secondarily sort on template_id for the sake of duplicate removal
if (similarity != rhs.similarity)
return similarity > rhs.similarity;
else
return template_id < rhs.template_id;
}
bool operator==(const Match& rhs) const
{
return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
}
CV_PROP_RW int x;
CV_PROP_RW int y;
CV_PROP_RW float similarity;
CV_PROP_RW String class_id;
CV_PROP_RW int template_id;
};
inline
Match::Match(int _x, int _y, float _similarity, const String& _class_id, int _template_id)
: x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
{}
/**
* \brief Object detector using the LINE template matching algorithm with any set of
* modalities.
*/
class CV_EXPORTS_W Detector
{
public:
/**
* \brief Empty constructor, initialize with read().
*/
CV_WRAP Detector();
/**
* \brief Constructor.
*
* \param modalities Modalities to use (color gradients, depth normals, ...).
* \param T_pyramid Value of the sampling step T at each pyramid level. The
* number of pyramid levels is T_pyramid.size().
*/
CV_WRAP Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
/**
* \brief Detect objects by template matching.
*
* Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
*
* \param sources Source images, one for each modality.
* \param threshold Similarity threshold, a percentage between 0 and 100.
* \param[out] matches Template matches, sorted by similarity score.
* \param class_ids If non-empty, only search for the desired object classes.
* \param[out] quantized_images Optionally return vector<Mat> of quantized images.
* \param masks The masks for consideration during matching. The masks should be CV_8UC1
* where 255 represents a valid pixel. If non-empty, the vector must be
* the same size as sources. Each element must be
* empty or the same size as its corresponding source.
*/
CV_WRAP void match(const std::vector<Mat>& sources, float threshold, CV_OUT std::vector<Match>& matches,
const std::vector<String>& class_ids = std::vector<String>(),
OutputArrayOfArrays quantized_images = noArray(),
const std::vector<Mat>& masks = std::vector<Mat>()) const;
/**
* \brief Add new object template.
*
* \param sources Source images, one for each modality.
* \param class_id Object class ID.
* \param object_mask Mask separating object from background.
* \param[out] bounding_box Optionally return bounding box of the extracted features.
*
* \return Template ID, or -1 if failed to extract a valid template.
*/
CV_WRAP int addTemplate(const std::vector<Mat>& sources, const String& class_id,
const Mat& object_mask, CV_OUT Rect* bounding_box = NULL);
/**
* \brief Add a new object template computed by external means.
*/
CV_WRAP int addSyntheticTemplate(const std::vector<Template>& templates, const String& class_id);
/**
* \brief Get the modalities used by this detector.
*
* You are not permitted to add/remove modalities, but you may dynamic_cast them to
* tweak parameters.
*/
CV_WRAP const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
/**
* \brief Get sampling step T at pyramid_level.
*/
CV_WRAP int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
/**
* \brief Get number of pyramid levels used by this detector.
*/
CV_WRAP int pyramidLevels() const { return pyramid_levels; }
/**
* \brief Get the template pyramid identified by template_id.
*
* For example, with 2 modalities (Gradient, Normal) and two pyramid levels
* (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
*/
CV_WRAP const std::vector<Template>& getTemplates(const String& class_id, int template_id) const;
CV_WRAP int numTemplates() const;
CV_WRAP int numTemplates(const String& class_id) const;
CV_WRAP int numClasses() const { return static_cast<int>(class_templates.size()); }
CV_WRAP std::vector<String> classIds() const;
CV_WRAP void read(const FileNode& fn);
void write(FileStorage& fs) const;
String readClass(const FileNode& fn, const String &class_id_override = "");
void writeClass(const String& class_id, FileStorage& fs) const;
CV_WRAP void readClasses(const std::vector<String>& class_ids,
const String& format = "templates_%s.yml.gz");
CV_WRAP void writeClasses(const String& format = "templates_%s.yml.gz") const;
protected:
std::vector< Ptr<Modality> > modalities;
int pyramid_levels;
std::vector<int> T_at_level;
typedef std::vector<Template> TemplatePyramid;
typedef std::map<String, std::vector<TemplatePyramid> > TemplatesMap;
TemplatesMap class_templates;
typedef std::vector<Mat> LinearMemories;
// Indexed as [pyramid level][modality][quantized label]
typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;
void matchClass(const LinearMemoryPyramid& lm_pyramid,
const std::vector<Size>& sizes,
float threshold, std::vector<Match>& matches,
const String& class_id,
const std::vector<TemplatePyramid>& template_pyramids) const;
};
/**
* \brief Factory function for detector using LINE algorithm with color gradients.
*
* Default parameter settings suitable for VGA images.
*/
CV_EXPORTS_W Ptr<linemod::Detector> getDefaultLINE();
/**
* \brief Factory function for detector using LINE-MOD algorithm with color gradients
* and depth normals.
*
* Default parameter settings suitable for VGA images.
*/
CV_EXPORTS_W Ptr<linemod::Detector> getDefaultLINEMOD();
//! @}
} // namespace linemod
} // namespace cv
#endif // __OPENCV_OBJDETECT_LINEMOD_HPP__

View File

@@ -0,0 +1,124 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this
// module's directory
#ifndef __OPENCV_RGBD_VOLUME_H__
#define __OPENCV_RGBD_VOLUME_H__
#include "intrinsics.hpp"
#include "opencv2/core/affine.hpp"
namespace cv
{
namespace kinfu
{
class CV_EXPORTS_W Volume
{
public:
Volume(float _voxelSize, Matx44f _pose, float _raycastStepFactor)
: voxelSize(_voxelSize),
voxelSizeInv(1.0f / voxelSize),
pose(_pose),
raycastStepFactor(_raycastStepFactor)
{
}
virtual ~Volume(){};
virtual void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose,
const kinfu::Intr& intrinsics, const int frameId = 0) = 0;
virtual void integrate(InputArray _depth, InputArray _rgb, float depthFactor,
const Matx44f& cameraPose, const kinfu::Intr& intrinsics,
const Intr& rgb_intrinsics, const int frameId = 0) = 0;
virtual void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics,
const Size& frameSize, OutputArray points, OutputArray normals) const = 0;
virtual void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize,
OutputArray points, OutputArray normals, OutputArray colors) const = 0;
virtual void fetchNormals(InputArray points, OutputArray _normals) const = 0;
virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const = 0;
virtual void reset() = 0;
public:
const float voxelSize;
const float voxelSizeInv;
const Affine3f pose;
const float raycastStepFactor;
};
enum class VolumeType
{
TSDF = 0,
HASHTSDF = 1,
COLOREDTSDF = 2
};
struct CV_EXPORTS_W VolumeParams
{
/** @brief Type of Volume
Values can be TSDF (single volume) or HASHTSDF (hashtable of volume units)
*/
CV_PROP_RW VolumeType type;
/** @brief Resolution of voxel space
Number of voxels in each dimension.
Applicable only for TSDF Volume.
HashTSDF volume only supports equal resolution in all three dimensions
*/
CV_PROP_RW Vec3i resolution;
/** @brief Resolution of volumeUnit in voxel space
Number of voxels in each dimension for volumeUnit
Applicable only for hashTSDF.
*/
CV_PROP_RW int unitResolution = {0};
/** @brief Initial pose of the volume in meters */
Affine3f pose;
/** @brief Length of voxels in meters */
CV_PROP_RW float voxelSize;
/** @brief TSDF truncation distance
Distances greater than value from surface will be truncated to 1.0
*/
CV_PROP_RW float tsdfTruncDist;
/** @brief Max number of frames to integrate per voxel
Represents the max number of frames over which a running average
of the TSDF is calculated for a voxel
*/
CV_PROP_RW int maxWeight;
/** @brief Threshold for depth truncation in meters
Truncates the depth greater than threshold to 0
*/
CV_PROP_RW float depthTruncThreshold;
/** @brief Length of single raycast step
Describes the percentage of voxel length that is skipped per march
*/
CV_PROP_RW float raycastStepFactor;
/** @brief Default set of parameters that provide higher quality reconstruction
at the cost of slow performance.
*/
CV_WRAP static Ptr<VolumeParams> defaultParams(VolumeType _volumeType);
/** @brief Coarse set of parameters that provides relatively higher performance
at the cost of reconstrution quality.
*/
CV_WRAP static Ptr<VolumeParams> coarseParams(VolumeType _volumeType);
};
Ptr<Volume> makeVolume(const VolumeParams& _volumeParams);
CV_EXPORTS_W Ptr<Volume> makeVolume(VolumeType _volumeType, float _voxelSize, Matx44f _pose,
float _raycastStepFactor, float _truncDist, int _maxWeight,
float _truncateThreshold, Vec3i _resolution);
} // namespace kinfu
} // namespace cv
#endif