Added a nonlinear camera estimation algorithm that directly estimates rendering parameters

5aafae16 · Patrik Huber · 6a62c5ae · 5aafae16 · 5aafae16
Commit 5aafae16 authored Aug 09, 2015 by Patrik Huber
2 changed files
--- a/include/eos/fitting/detail/nonlinear_camera_estimation_detail.hpp
+++ b/include/eos/fitting/detail/nonlinear_camera_estimation_detail.hpp
+/*
+ * Eos - A 3D Morphable Model fitting library written in modern C++11/14.
+ *
+ * File: include/eos/fitting/detail/nonlinear_camera_estimation_detail.hpp
+ *
+ * Copyright 2015 Patrik Huber
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef NONLINEARCAMERAESTIMATION_DETAIL_HPP_
+#define NONLINEARCAMERAESTIMATION_DETAIL_HPP_
+
+#include "glm/gtc/matrix_transform.hpp"
+
+#include "Eigen/Geometry"
+
+#include "opencv2/core/core.hpp"
+
+#include <vector>
+
+namespace eos {
+	namespace fitting {
+		namespace detail {
+
+
+// ret: 3rd entry is the z
+// radians
+// expects the landmark points to be in opencv convention, i.e. origin TL
+glm::vec3 project_ortho(glm::vec3 point, float rot_x_pitch, float rot_y_yaw, float rot_z_roll, float tx, float ty, float frustum_scale, /* fixed params now: */ int width, int height)
+{
+	// We could use quaternions in here, to be independent of the RPY... etc convention.
+	// Then, the user can decompose the quaternion as he wishes to. But then we'd have to estimate 4 parameters?
+	// This can of course be optimised, but we keep it this way while we're debugging and as long as it's not a performance issue.
+	auto rot_mtx_x = glm::rotate(glm::mat4(1.0f), rot_x_pitch, glm::vec3{ 1.0f, 0.0f, 0.0f });
+	auto rot_mtx_y = glm::rotate(glm::mat4(1.0f), rot_y_yaw, glm::vec3{ 0.0f, 1.0f, 0.0f });
+	auto rot_mtx_z = glm::rotate(glm::mat4(1.0f), rot_z_roll, glm::vec3{ 0.0f, 0.0f, 1.0f });
+	auto t_mtx = glm::translate(glm::mat4(1.0f), glm::vec3{ tx, ty, 0.0f }); // glm: Col-major memory layout. [] gives the column
+
+	// Note/Todo: Is this the full ortho? n/f missing? or do we need to multiply it with Proj...? See Shirley CG!
+	// glm::frustum()?
+	const float aspect = static_cast<float>(width) / height;
+	auto ortho_mtx = glm::ortho(-1.0f * aspect * frustum_scale, 1.0f * aspect * frustum_scale, -1.0f * frustum_scale, 1.0f * frustum_scale);
+
+	glm::vec4 viewport(0, height, width, -height); // flips y, origin top-left, like in OpenCV
+	// P = RPY * P
+	glm::vec3 res = glm::project(point, t_mtx * rot_mtx_z * rot_mtx_x * rot_mtx_y, ortho_mtx, viewport);
+	return res;
+};
+
+/**
+ * @brief Generic functor for Eigen's optimisation algorithms.
+ */
+template<typename _Scalar, int NX = Eigen::Dynamic, int NY = Eigen::Dynamic>
+struct Functor
+{
+	typedef _Scalar Scalar;
+	enum {
+		InputsAtCompileTime = NX,
+		ValuesAtCompileTime = NY
+	};
+	typedef Eigen::Matrix<Scalar, InputsAtCompileTime, 1> InputType;
+	typedef Eigen::Matrix<Scalar, ValuesAtCompileTime, 1> ValueType;
+	typedef Eigen::Matrix<Scalar, ValuesAtCompileTime, InputsAtCompileTime> JacobianType;
+
+	const int m_inputs, m_values;
+
+	Functor() : m_inputs(InputsAtCompileTime), m_values(ValuesAtCompileTime) {}
+	Functor(int inputs, int values) : m_inputs(inputs), m_values(values) {}
+
+	int inputs() const { return m_inputs; }
+	int values() const { return m_values; }
+};
+
+/**
+ * @brief LevenbergMarquardt cost function for the orthographic camera estimation.
+ */
+struct OrthographicParameterProjection : Functor<double>
+{
+public:
+	// Creates a new OrthographicParameterProjection object with given data.
+	OrthographicParameterProjection(std::vector<cv::Vec2f> image_points, std::vector<cv::Vec4f> model_points, int width, int height) : Functor<double>(6, image_points.size()), image_points(image_points), model_points(model_points), width(width), height(height) {};
+
+	// x = current params, fvec = the errors/differences of the proj with current params and the GT (image_points)
+	int operator()(const Eigen::VectorXd& x, Eigen::VectorXd& fvec) const
+	{
+		const float aspect = static_cast<float>(width) / height;
+		for (int i = 0; i < values(); i++)
+		{
+			// opencv to glm:
+			glm::vec3 point_3d(model_points[i][0], model_points[i][1], model_points[i][2]);
+			// projection given current params x:
+			glm::vec3 proj_with_current_param_esti = project_ortho(point_3d, x[0], x[1], x[2], x[3], x[4], x[5], width, height);
+			cv::Vec2f proj_point_2d(proj_with_current_param_esti.x, proj_with_current_param_esti.y);
+			// diff of current proj to ground truth, our error
+			auto diff = cv::norm(proj_point_2d, image_points[i]);
+			// fvec should contain the differences
+			// don't square it.
+			fvec[i] = diff;
+		}
+		return 0;
+	};
+
+private:
+	std::vector<cv::Vec2f> image_points;
+	std::vector<cv::Vec4f> model_points;
+	int width;
+	int height;
+};
+
+		} /* namespace detail */
+	} /* namespace fitting */
+} /* namespace eos */
+
+#endif /* NONLINEARCAMERAESTIMATION_DETAIL_HPP_ */
--- a/include/eos/fitting/nonlinear_camera_estimation.hpp
+++ b/include/eos/fitting/nonlinear_camera_estimation.hpp
+/*
+ * Eos - A 3D Morphable Model fitting library written in modern C++11/14.
+ *
+ * File: include/eos/fitting/nonlinear_camera_estimation.hpp
+ *
+ * Copyright 2015 Patrik Huber
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifndef NONLINEARCAMERAESTIMATION_HPP_
+#define NONLINEARCAMERAESTIMATION_HPP_
+
+#include "eos/fitting/detail/nonlinear_camera_estimation_detail.hpp"
+
+#include "glm/gtc/matrix_transform.hpp"
+
+#include "Eigen/Geometry"
+#include "unsupported/Eigen/NonLinearOptimization"
+
+#include "opencv2/core/core.hpp"
+
+#include <vector>
+#include <cassert>
+
+namespace eos {
+	namespace fitting {
+
+/**
+ * @brief A class representing a camera viewing frustum. At the
+ * moment used as orthographic camera only.
+ */
+struct Frustum
+{
+	float l, r, b, t;
+	// optional<float> n, f;
+};
+
+/**
+ * @brief Represents a set of estimated model parameters (rotation, translation) and
+ * camera parameters (viewing frustum).
+ *
+ * Strictly speaking, the estimated rotation and translation are not camera parameters, they
+ * are the values that transform the model from model-space to camera-space, so they are the
+ * inverse of the camera position.
+ * The camera frustum describes the size of the viewing plane of the camera.
+ *
+ * Together, these parameters fully describe the imaging process of a given model instance
+ * (under an orthographic projection).
+ *
+ * The rotation values are given in radians and estimated using the RPY convention.
+ * Yaw is applied first to the model, then pitch, then roll (R * P * Y * vertex).
+ */
+struct CameraParameters
+{
+	float r_x; // pitch
+	float r_y; // yaw. positive means subject is looking left (we see his/her right cheek).
+	float r_z; // roll
+	float t_x;
+	float t_y;
+	Frustum frustum;
+};
+
+/**
+ * @brief This algorithm estimates the rotation angles and translation of the model, as
+ * well as the viewing frustum of the camera, given a set of corresponding 2D-3D points.
+ *
+ * It assumes an orthographic camera and estimates 6 parameters,
+ * [r_x, r_y, r_z, t_x, t_y, frustum_scale], where the first five describe how to transform
+ * the model, and the last one describes the cameras viewing frustum (see CameraParameters).
+ * This 2D-3D correspondence problem is solved using Eigen's LevenbergMarquardt algorithm.
+ *
+ * The method is slightly inspired by "Computer Vision: Models Learning and Inference",
+ * Simon J.D. Prince, 2012, but different in a lot of respects.
+ *
+ * Eigen's LM implementation requires at least 6 data points, so we require >= 6 corresponding points.
+ *
+ * Notes/improvements:
+ * The algorithm works reliable as it is, however, it could be improved with the following:
+ *  - A better initial guess (see e.g. Prince)
+ *  - Using the analytic derivatives instead of Eigen::NumericalDiff - they're easy to calculate.
+ *
+ * @param[in] image_points A list of 2D image points.
+ * @param[in] model_points Corresponding points of a 3D model.
+ * @param[in] width Width of the image (or viewport).
+ * @param[in] height Height of the image (or viewport).
+ * @return The estimated model and camera parameters.
+ */
+CameraParameters estimate_orthographic_camera(std::vector<cv::Vec2f> image_points, std::vector<cv::Vec4f> model_points, int width, int height)
+{
+	using cv::Mat;
+	assert(image_points.size() == model_points.size());
+	assert(image_points.size() >= 6); // Number of correspondence points given needs to be equal to or larger than 6
+
+	const float aspect = static_cast<float>(width) / height;
+
+	// Set up the initial parameter vector and the cost function:
+	int num_params = 6;
+	Eigen::VectorXd parameters; // [rot_x_pitch, rot_y_yaw, rot_z_roll, t_x, t_y, frustum_scale]
+	parameters.setConstant(num_params, 0.0); // Set all 6 values to zero (except frustum_scale, see next line)
+	parameters[5] = 110.0; // This is just a rough hand-chosen scaling estimate - we could do a lot better. But it works.
+	detail::OrthographicParameterProjection cost_function(image_points, model_points, width, height);
+
+	// Note: we have analytical derivatives, so we should use them!
+	Eigen::NumericalDiff<detail::OrthographicParameterProjection> cost_function_with_derivative(cost_function, 0.0001);
+	// I had to change the default value of epsfcn, it works well around 0.0001. It couldn't produce the derivative with the default, I guess the changes in the gradient were too small.
+
+	Eigen::LevenbergMarquardt<Eigen::NumericalDiff<detail::OrthographicParameterProjection>> lm(cost_function_with_derivative);
+	auto info = lm.minimize(parameters); // we could or should use the return value
+	// 'parameters' contains the solution now.
+
+	Frustum camera_frustum{ -1.0f * aspect * parameters[5], 1.0f * aspect * parameters[5], -1.0f * parameters[5], 1.0f * parameters[5] };
+	return CameraParameters{ static_cast<float>(parameters[0]), static_cast<float>(parameters[1]), static_cast<float>(parameters[2]), static_cast<float>(parameters[3]), static_cast<float>(parameters[4]), camera_frustum };
+};
+
+	} /* namespace fitting */
+} /* namespace eos */
+
+#endif /* NONLINEARCAMERAESTIMATION_HPP_ */