Commit 5aafae16 authored by Patrik Huber's avatar Patrik Huber

Added a nonlinear camera estimation algorithm that directly estimates rendering parameters

parent 6a62c5ae
/*
* Eos - A 3D Morphable Model fitting library written in modern C++11/14.
*
* File: include/eos/fitting/detail/nonlinear_camera_estimation_detail.hpp
*
* Copyright 2015 Patrik Huber
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef NONLINEARCAMERAESTIMATION_DETAIL_HPP_
#define NONLINEARCAMERAESTIMATION_DETAIL_HPP_
#include "glm/gtc/matrix_transform.hpp"
#include "Eigen/Geometry"
#include "opencv2/core/core.hpp"
#include <vector>
namespace eos {
namespace fitting {
namespace detail {
// ret: 3rd entry is the z
// radians
// expects the landmark points to be in opencv convention, i.e. origin TL
glm::vec3 project_ortho(glm::vec3 point, float rot_x_pitch, float rot_y_yaw, float rot_z_roll, float tx, float ty, float frustum_scale, /* fixed params now: */ int width, int height)
{
// We could use quaternions in here, to be independent of the RPY... etc convention.
// Then, the user can decompose the quaternion as he wishes to. But then we'd have to estimate 4 parameters?
// This can of course be optimised, but we keep it this way while we're debugging and as long as it's not a performance issue.
auto rot_mtx_x = glm::rotate(glm::mat4(1.0f), rot_x_pitch, glm::vec3{ 1.0f, 0.0f, 0.0f });
auto rot_mtx_y = glm::rotate(glm::mat4(1.0f), rot_y_yaw, glm::vec3{ 0.0f, 1.0f, 0.0f });
auto rot_mtx_z = glm::rotate(glm::mat4(1.0f), rot_z_roll, glm::vec3{ 0.0f, 0.0f, 1.0f });
auto t_mtx = glm::translate(glm::mat4(1.0f), glm::vec3{ tx, ty, 0.0f }); // glm: Col-major memory layout. [] gives the column
// Note/Todo: Is this the full ortho? n/f missing? or do we need to multiply it with Proj...? See Shirley CG!
// glm::frustum()?
const float aspect = static_cast<float>(width) / height;
auto ortho_mtx = glm::ortho(-1.0f * aspect * frustum_scale, 1.0f * aspect * frustum_scale, -1.0f * frustum_scale, 1.0f * frustum_scale);
glm::vec4 viewport(0, height, width, -height); // flips y, origin top-left, like in OpenCV
// P = RPY * P
glm::vec3 res = glm::project(point, t_mtx * rot_mtx_z * rot_mtx_x * rot_mtx_y, ortho_mtx, viewport);
return res;
};
/**
* @brief Generic functor for Eigen's optimisation algorithms.
*/
template<typename _Scalar, int NX = Eigen::Dynamic, int NY = Eigen::Dynamic>
struct Functor
{
typedef _Scalar Scalar;
enum {
InputsAtCompileTime = NX,
ValuesAtCompileTime = NY
};
typedef Eigen::Matrix<Scalar, InputsAtCompileTime, 1> InputType;
typedef Eigen::Matrix<Scalar, ValuesAtCompileTime, 1> ValueType;
typedef Eigen::Matrix<Scalar, ValuesAtCompileTime, InputsAtCompileTime> JacobianType;
const int m_inputs, m_values;
Functor() : m_inputs(InputsAtCompileTime), m_values(ValuesAtCompileTime) {}
Functor(int inputs, int values) : m_inputs(inputs), m_values(values) {}
int inputs() const { return m_inputs; }
int values() const { return m_values; }
};
/**
* @brief LevenbergMarquardt cost function for the orthographic camera estimation.
*/
struct OrthographicParameterProjection : Functor<double>
{
public:
// Creates a new OrthographicParameterProjection object with given data.
OrthographicParameterProjection(std::vector<cv::Vec2f> image_points, std::vector<cv::Vec4f> model_points, int width, int height) : Functor<double>(6, image_points.size()), image_points(image_points), model_points(model_points), width(width), height(height) {};
// x = current params, fvec = the errors/differences of the proj with current params and the GT (image_points)
int operator()(const Eigen::VectorXd& x, Eigen::VectorXd& fvec) const
{
const float aspect = static_cast<float>(width) / height;
for (int i = 0; i < values(); i++)
{
// opencv to glm:
glm::vec3 point_3d(model_points[i][0], model_points[i][1], model_points[i][2]);
// projection given current params x:
glm::vec3 proj_with_current_param_esti = project_ortho(point_3d, x[0], x[1], x[2], x[3], x[4], x[5], width, height);
cv::Vec2f proj_point_2d(proj_with_current_param_esti.x, proj_with_current_param_esti.y);
// diff of current proj to ground truth, our error
auto diff = cv::norm(proj_point_2d, image_points[i]);
// fvec should contain the differences
// don't square it.
fvec[i] = diff;
}
return 0;
};
private:
std::vector<cv::Vec2f> image_points;
std::vector<cv::Vec4f> model_points;
int width;
int height;
};
} /* namespace detail */
} /* namespace fitting */
} /* namespace eos */
#endif /* NONLINEARCAMERAESTIMATION_DETAIL_HPP_ */
/*
* Eos - A 3D Morphable Model fitting library written in modern C++11/14.
*
* File: include/eos/fitting/nonlinear_camera_estimation.hpp
*
* Copyright 2015 Patrik Huber
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef NONLINEARCAMERAESTIMATION_HPP_
#define NONLINEARCAMERAESTIMATION_HPP_
#include "eos/fitting/detail/nonlinear_camera_estimation_detail.hpp"
#include "glm/gtc/matrix_transform.hpp"
#include "Eigen/Geometry"
#include "unsupported/Eigen/NonLinearOptimization"
#include "opencv2/core/core.hpp"
#include <vector>
#include <cassert>
namespace eos {
namespace fitting {
/**
* @brief A class representing a camera viewing frustum. At the
* moment used as orthographic camera only.
*/
struct Frustum
{
float l, r, b, t;
// optional<float> n, f;
};
/**
* @brief Represents a set of estimated model parameters (rotation, translation) and
* camera parameters (viewing frustum).
*
* Strictly speaking, the estimated rotation and translation are not camera parameters, they
* are the values that transform the model from model-space to camera-space, so they are the
* inverse of the camera position.
* The camera frustum describes the size of the viewing plane of the camera.
*
* Together, these parameters fully describe the imaging process of a given model instance
* (under an orthographic projection).
*
* The rotation values are given in radians and estimated using the RPY convention.
* Yaw is applied first to the model, then pitch, then roll (R * P * Y * vertex).
*/
struct CameraParameters
{
float r_x; // pitch
float r_y; // yaw. positive means subject is looking left (we see his/her right cheek).
float r_z; // roll
float t_x;
float t_y;
Frustum frustum;
};
/**
* @brief This algorithm estimates the rotation angles and translation of the model, as
* well as the viewing frustum of the camera, given a set of corresponding 2D-3D points.
*
* It assumes an orthographic camera and estimates 6 parameters,
* [r_x, r_y, r_z, t_x, t_y, frustum_scale], where the first five describe how to transform
* the model, and the last one describes the cameras viewing frustum (see CameraParameters).
* This 2D-3D correspondence problem is solved using Eigen's LevenbergMarquardt algorithm.
*
* The method is slightly inspired by "Computer Vision: Models Learning and Inference",
* Simon J.D. Prince, 2012, but different in a lot of respects.
*
* Eigen's LM implementation requires at least 6 data points, so we require >= 6 corresponding points.
*
* Notes/improvements:
* The algorithm works reliable as it is, however, it could be improved with the following:
* - A better initial guess (see e.g. Prince)
* - Using the analytic derivatives instead of Eigen::NumericalDiff - they're easy to calculate.
*
* @param[in] image_points A list of 2D image points.
* @param[in] model_points Corresponding points of a 3D model.
* @param[in] width Width of the image (or viewport).
* @param[in] height Height of the image (or viewport).
* @return The estimated model and camera parameters.
*/
CameraParameters estimate_orthographic_camera(std::vector<cv::Vec2f> image_points, std::vector<cv::Vec4f> model_points, int width, int height)
{
using cv::Mat;
assert(image_points.size() == model_points.size());
assert(image_points.size() >= 6); // Number of correspondence points given needs to be equal to or larger than 6
const float aspect = static_cast<float>(width) / height;
// Set up the initial parameter vector and the cost function:
int num_params = 6;
Eigen::VectorXd parameters; // [rot_x_pitch, rot_y_yaw, rot_z_roll, t_x, t_y, frustum_scale]
parameters.setConstant(num_params, 0.0); // Set all 6 values to zero (except frustum_scale, see next line)
parameters[5] = 110.0; // This is just a rough hand-chosen scaling estimate - we could do a lot better. But it works.
detail::OrthographicParameterProjection cost_function(image_points, model_points, width, height);
// Note: we have analytical derivatives, so we should use them!
Eigen::NumericalDiff<detail::OrthographicParameterProjection> cost_function_with_derivative(cost_function, 0.0001);
// I had to change the default value of epsfcn, it works well around 0.0001. It couldn't produce the derivative with the default, I guess the changes in the gradient were too small.
Eigen::LevenbergMarquardt<Eigen::NumericalDiff<detail::OrthographicParameterProjection>> lm(cost_function_with_derivative);
auto info = lm.minimize(parameters); // we could or should use the return value
// 'parameters' contains the solution now.
Frustum camera_frustum{ -1.0f * aspect * parameters[5], 1.0f * aspect * parameters[5], -1.0f * parameters[5], 1.0f * parameters[5] };
return CameraParameters{ static_cast<float>(parameters[0]), static_cast<float>(parameters[1]), static_cast<float>(parameters[2]), static_cast<float>(parameters[3]), static_cast<float>(parameters[4]), camera_frustum };
};
} /* namespace fitting */
} /* namespace eos */
#endif /* NONLINEARCAMERAESTIMATION_HPP_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment