Commit c5628500 authored by Richard Torenvliet's avatar Richard Torenvliet

Test with OpenMP GPU - added for occluding boundary vertices

parent e861a303
...@@ -113,6 +113,7 @@ set(HEADERS ...@@ -113,6 +113,7 @@ set(HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/include/eos/video/Keyframe.hpp ${CMAKE_CURRENT_SOURCE_DIR}/include/eos/video/Keyframe.hpp
) )
# Set OpenMP compiler flags if possible
find_package(OpenMP) find_package(OpenMP)
if (OPENMP_FOUND) if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
......
...@@ -42,9 +42,10 @@ namespace eos { ...@@ -42,9 +42,10 @@ namespace eos {
struct FittingResult struct FittingResult
{ {
RenderingParameters rendering_parameters; RenderingParameters rendering_parameters;
core::LandmarkCollection<cv::Vec2f> landmarks;
std::vector<float> pca_shape_coefficients; std::vector<float> pca_shape_coefficients;
std::vector<float> blendshape_coefficients; std::vector<float> blendshape_coefficients;
core::LandmarkCollection<cv::Vec2f> landmarks;
core::Mesh mesh;
}; };
} /* namespace fitting */ } /* namespace fitting */
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include <utility> #include <utility>
#include <cstddef> #include <cstddef>
namespace eos { namespace eos {
namespace fitting { namespace fitting {
...@@ -391,7 +392,11 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c ...@@ -391,7 +392,11 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c
} }
return { image_points, vertex_indices }; return { image_points, vertex_indices };
}; };
#ifdef _OPENMP
/** /**
* OpenMP version
*
* @brief Computes the vertices that lie on occluding boundaries, given a particular pose. * @brief Computes the vertices that lie on occluding boundaries, given a particular pose.
* *
* This algorithm computes the edges that lie on occluding boundaries of the mesh. * This algorithm computes the edges that lie on occluding boundaries of the mesh.
...@@ -404,7 +409,7 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c ...@@ -404,7 +409,7 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c
* @param[in] R The rotation (pose) under which the occluding boundaries should be computed. * @param[in] R The rotation (pose) under which the occluding boundaries should be computed.
* @return A vector with unique vertex id's making up the edges. * @return A vector with unique vertex id's making up the edges.
*/ */
inline std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& mesh, const morphablemodel::EdgeTopology& edge_topology, glm::mat4x4 R) std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& mesh, const morphablemodel::EdgeTopology& edge_topology, glm::mat4x4 R)
{ {
// Rotate the mesh: // Rotate the mesh:
std::vector<glm::vec4> rotated_vertices; std::vector<glm::vec4> rotated_vertices;
...@@ -423,7 +428,8 @@ inline std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& m ...@@ -423,7 +428,8 @@ inline std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& m
// Find occluding edges: // Find occluding edges:
std::vector<int> occluding_edges_indices; std::vector<int> occluding_edges_indices;
for (int edge_idx = 0; edge_idx < edge_topology.adjacent_faces.size(); ++edge_idx) // For each edge... Ef contains the indices of the two adjacent faces for (int edge_idx = 0; edge_idx < edge_topology.adjacent_faces.size();
++edge_idx) // For each edge... Ef contains the indices of the two adjacent faces
{ {
const auto &edge = edge_topology.adjacent_faces[edge_idx]; const auto &edge = edge_topology.adjacent_faces[edge_idx];
if (edge[0] == 0) // ==> NOTE/Todo Need to change this if we use 0-based indexing! if (edge[0] == 0) // ==> NOTE/Todo Need to change this if we use 0-based indexing!
...@@ -455,65 +461,81 @@ inline std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& m ...@@ -455,65 +461,81 @@ inline std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& m
// Remove duplicate vertex id's (std::unique works only on sorted sequences): // Remove duplicate vertex id's (std::unique works only on sorted sequences):
std::sort(begin(occluding_vertices), end(occluding_vertices)); std::sort(begin(occluding_vertices), end(occluding_vertices));
occluding_vertices.erase(std::unique(begin(occluding_vertices), end(occluding_vertices)), end(occluding_vertices)); occluding_vertices.erase(std::unique(begin(occluding_vertices), end(occluding_vertices)), end(occluding_vertices));
// Perform ray-casting to find out which vertices are not visible (i.e. self-occluded): // Perform ray-casting to find out which vertices are not visible (i.e. self-occluded):
std::vector<bool> visibility; // Use map to allow parallelism, all vertex_ids are unique
std::vector<int> vertex_id_visible_map(occluding_vertices.size());
int NUM_THREADS = 4;
auto t1 = std::chrono::high_resolution_clock::now(); auto t1 = std::chrono::high_resolution_clock::now();
// #pragma omp target map(to: occluding_vertices, mesh, rotated_vertices)
// we shoot the ray from the vertex towards the camera // #pragma omp parallel for
glm::vec3 ray_direction(0.0f, 0.0f, 1.0f); #pragma omp target map(alloc:vertex_id_visible_map) map(from:occluding_vertices, rotated_vertices, mesh)
{
#pragma omp parallel for schedule num_threads(NUM_THREADS) #pragma omp parallel for
for(int i = 0; i < occluding_vertices.size(); i++) { for (int i = 0; i < occluding_vertices.size(); i++) {
const auto vertex_idx = occluding_vertices[i]; const auto vertex_idx = occluding_vertices[i];
bool visible = true; int visible = 1;
glm::vec3 ray_origin(rotated_vertices[vertex_idx]); glm::vec3 ray_origin(rotated_vertices[vertex_idx]);
// we shoot the ray from the vertex towards the camera
glm::vec3 ray_direction(0.0f, 0.0f, 1.0f);
// For every tri of the rotated mesh: // For every tri of the rotated mesh:
for(int j = 0; j < mesh.tvi.size(); j++) { for (int j = 0; j < mesh.tvi.size(); j++) {
auto tri = mesh.tvi[j]; auto tri = mesh.tvi[j];
auto &v0 = rotated_vertices[tri[0]]; auto &v0 = rotated_vertices[tri[0]];
auto &v1 = rotated_vertices[tri[1]]; auto &v1 = rotated_vertices[tri[1]];
auto &v2 = rotated_vertices[tri[2]]; auto &v2 = rotated_vertices[tri[2]];
auto intersect = ray_triangle_intersect(ray_origin, ray_direction, glm::vec3(v0), glm::vec3(v1), glm::vec3(v2), false); auto intersect = ray_triangle_intersect(ray_origin,
ray_direction,
glm::vec3(v0),
glm::vec3(v1),
glm::vec3(v2),
false);
// first is bool intersect, second is the distance t // first is bool intersect, second is the distance t
if (intersect.first == true) { if (intersect.first == true) {
// We've hit a triangle. Ray hit its own triangle. If it's behind the ray origin, ignore the intersection: // We've hit a triangle. Ray hit its own triangle. If it's behind the ray origin, ignore the intersection:
// Check if in front or behind? // Check if in front or behind?
if (intersect.second.get() <= 1e-4) if (intersect.second.get() <= 1e-4) {
{
continue; // the intersection is behind the vertex, we don't care about it continue; // the intersection is behind the vertex, we don't care about it
} }
// Otherwise, we've hit a genuine triangle, and the vertex is not visible: // Otherwise, we've hit a genuine triangle, and the vertex is not visible:
visible = false; visible = 0;
break; break;
} }
} }
visibility.push_back(visible); vertex_id_visible_map[i] = visible;
} }
}
auto t2 = std::chrono::high_resolution_clock::now(); auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "test derp 1: " << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count() << "ms" << std::endl;
// Remove vertices from occluding boundary list that are not visible: // copy the results to final vertex ids
std::vector<int> final_vertex_ids; std::vector<int> final_vertex_ids;
for (int i = 0; i < occluding_vertices.size(); ++i) for (int i = 0; i < occluding_vertices.size(); ++i) {
{ const auto vertex_idx = occluding_vertices[i];
if (visibility[i] == true) int visible = vertex_id_visible_map[i];
{ if (visible == 1) {
final_vertex_ids.push_back(occluding_vertices[i]); final_vertex_ids.push_back(vertex_idx);
} }
} }
auto final_timing = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
printf("S %lu %lld ms (mean: %f)\n",
final_vertex_ids.size(),
final_timing,
static_cast<float>(final_timing) / final_vertex_ids.size()
);
return final_vertex_ids; return final_vertex_ids;
}; };
/** /**
*
* OpenMP version
*
* @brief For a given list of 2D edge points, find corresponding 3D vertex IDs. * @brief For a given list of 2D edge points, find corresponding 3D vertex IDs.
* *
* This algorithm first computes the 3D mesh's occluding boundary vertices under * This algorithm first computes the 3D mesh's occluding boundary vertices under
...@@ -593,6 +615,7 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c ...@@ -593,6 +615,7 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c
} }
return { image_points, vertex_indices }; return { image_points, vertex_indices };
}; };
#endif // _OPENMP
......
This diff is collapsed.
...@@ -315,8 +315,8 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel( ...@@ -315,8 +315,8 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
#pragma omp parallel num_threads(num_threads) #pragma omp parallel num_threads(num_threads)
{ {
#pragma omp for #pragma omp for
for (int i = 0; i < num_landmarks; ++i) for (int i = 0; i < num_landmarks; ++i) {
{ // calculate index from landmark offset (not always the same landmarks amount).
int index = (landmark_offset_index[k] * 4) + 4 * i; int index = (landmark_offset_index[k] * 4) + 4 * i;
int offset = landmark_offset_index[k] + i; int offset = landmark_offset_index[k] + i;
auto vertex_id = vertex_ids[k][i]; auto vertex_id = vertex_ids[k][i];
...@@ -328,16 +328,14 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel( ...@@ -328,16 +328,14 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
basis_rows_.block(0, 0, basis_rows_.rows(), num_coeffs_to_fit); basis_rows_.block(0, 0, basis_rows_.rows(), num_coeffs_to_fit);
// Set y with landmark coordinates (will be used in linear lsq later) // Set y with landmark coordinates (will be used in linear lsq later)
#pragma omp simd #pragma omp simd
for (int j = 0; j < 2; j++) for (int j = 0; j < 2; j++) {
{
y(3 * offset + j) = landmarks[k][i][j]; y(3 * offset + j) = landmarks[k][i][j];
} }
// Set v_bar with face mesh (will be used in linear lsq later) // Set v_bar with face mesh (will be used in linear lsq later)
#pragma omp simd #pragma omp simd
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++) {
{
v_bar(4 * offset + j) = base_face[k](vertex_id * 3 + j); v_bar(4 * offset + j) = base_face[k](vertex_id * 3 + j);
} }
} }
...@@ -368,13 +366,13 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel( ...@@ -368,13 +366,13 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
auto t2 = std::chrono::high_resolution_clock::now(); auto t2 = std::chrono::high_resolution_clock::now();
t1 = std::chrono::high_resolution_clock::now();
// fill P with coefficients // fill P with coefficients
P.setFromTriplets(P_coefficients.begin(), P_coefficients.end()); P.setFromTriplets(P_coefficients.begin(), P_coefficients.end());
MatrixXf one; MatrixXf one;
MatrixXf two; MatrixXf two;
MatrixXf A; MatrixXf A;
MatrixXf b; MatrixXf b;
MatrixXf rhs;
#pragma omp parallel sections num_threads(2) #pragma omp parallel sections num_threads(2)
{ {
...@@ -382,6 +380,7 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel( ...@@ -382,6 +380,7 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
{ {
A = P * V_hat_h; // camera matrix times the basis A = P * V_hat_h; // camera matrix times the basis
one = A.transpose() * Omega.asDiagonal(); one = A.transpose() * Omega.asDiagonal();
rhs = -one * b; // It's -A^t*Omega^t*b, but we don't need to transpose Omega, since it's a diagonal matrix, and Omega^t = Omega.
} }
#pragma omp section #pragma omp section
...@@ -392,12 +391,8 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel( ...@@ -392,12 +391,8 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
} }
const MatrixXf AtOmegaAReg = one * A + two; const MatrixXf AtOmegaAReg = one * A + two;
const MatrixXf rhs = -one * b; // It's -A^t*Omega^t*b, but we don't need to transpose Omega, since it's a diagonal matrix, and Omega^t = Omega.
const VectorXf c_s = AtOmegaAReg.colPivHouseholderQr().solve(rhs); const VectorXf c_s = AtOmegaAReg.colPivHouseholderQr().solve(rhs);
t2 = std::chrono::high_resolution_clock::now();
std::cout << "matrix xf " << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() << "ms" << std::endl;
return std::vector<float>(c_s.data(), c_s.data() + c_s.size()); return std::vector<float>(c_s.data(), c_s.data() + c_s.size());
}; };
#endif #endif
......
...@@ -141,6 +141,7 @@ inline std::pair<cv::Mat, cv::Mat> render( ...@@ -141,6 +141,7 @@ inline std::pair<cv::Mat, cv::Mat> render(
// bool enable_texturing = false; Maybe re-add later, not sure // bool enable_texturing = false; Maybe re-add later, not sure
// take a cv::Mat texture instead and convert to Texture internally? no, we don't want to recreate mipmap levels on each render() call. // take a cv::Mat texture instead and convert to Texture internally? no, we don't want to recreate mipmap levels on each render() call.
auto t1 = std::chrono::high_resolution_clock::now();
assert(mesh.vertices.size() == mesh.colors.size() || mesh.colors.empty()); // The number of vertices has to be equal for both shape and colour, or, alternatively, it has to be a shape-only model. assert(mesh.vertices.size() == mesh.colors.size() || mesh.colors.empty()); // The number of vertices has to be equal for both shape and colour, or, alternatively, it has to be a shape-only model.
assert(mesh.vertices.size() == mesh.texcoords.size() || mesh.texcoords.empty()); // same for the texcoords assert(mesh.vertices.size() == mesh.texcoords.size() || mesh.texcoords.empty()); // same for the texcoords
// another assert: If cv::Mat texture != empty, then we need texcoords? // another assert: If cv::Mat texture != empty, then we need texcoords?
...@@ -177,77 +178,102 @@ inline std::pair<cv::Mat, cv::Mat> render( ...@@ -177,77 +178,102 @@ inline std::pair<cv::Mat, cv::Mat> render(
// Prepare the rasterisation stage. // Prepare the rasterisation stage.
// For every vertex/tri: // For every vertex/tri:
vector<detail::TriangleToRasterize> triangles_to_raster; vector<detail::TriangleToRasterize> triangles_to_raster;
for (const auto& tri_indices : mesh.tvi) { //#pragma omp target
// Todo: Split this whole stuff up. Make a "clip" function, ... rename "processProspective..".. what is "process"... get rid of "continue;"-stuff by moving stuff inside process... // {
// classify vertices visibility with respect to the planes of the view frustum //#pragma omp parallel for
// we're in clip-coords (NDC), so just check if outside [-1, 1] x ... for(int i = 0; i < mesh.tvi.size(); i++) {
// Actually we're in clip-coords and it's not the same as NDC. We're only in NDC after the division by w. const auto tri_indices = mesh.tvi[i];
// We should do the clipping in clip-coords though. See http://www.songho.ca/opengl/gl_projectionmatrix.html for more details. // Todo: Split this whole stuff up. Make a "clip" function, ... rename "processProspective..".. what is "process"... get rid of "continue;"-stuff by moving stuff inside process...
// However, when comparing against w_c below, we might run into the trouble of the sign again in the affine case. // classify vertices visibility with respect to the planes of the view frustum
// 'w' is always positive, as it is -z_camspace, and all z_camspace are negative. // we're in clip-coords (NDC), so just check if outside [-1, 1] x ...
unsigned char visibility_bits[3]; // Actually we're in clip-coords and it's not the same as NDC. We're only in NDC after the division by w.
for (unsigned char k = 0; k < 3; k++) // We should do the clipping in clip-coords though. See http://www.songho.ca/opengl/gl_projectionmatrix.html for more details.
{ // However, when comparing against w_c below, we might run into the trouble of the sign again in the affine case.
visibility_bits[k] = 0; // 'w' is always positive, as it is -z_camspace, and all z_camspace are negative.
float x_cc = clipspace_vertices[tri_indices[k]].position[0]; unsigned char visibility_bits[3];
float y_cc = clipspace_vertices[tri_indices[k]].position[1]; for (unsigned char k = 0; k < 3; k++) {
float z_cc = clipspace_vertices[tri_indices[k]].position[2]; visibility_bits[k] = 0;
float w_cc = clipspace_vertices[tri_indices[k]].position[3]; float x_cc = clipspace_vertices[tri_indices[k]].position[0];
if (x_cc < -w_cc) // true if outside of view frustum. False if on or inside the plane. float y_cc = clipspace_vertices[tri_indices[k]].position[1];
visibility_bits[k] |= 1; // set bit if outside of frustum float z_cc = clipspace_vertices[tri_indices[k]].position[2];
if (x_cc > w_cc) float w_cc = clipspace_vertices[tri_indices[k]].position[3];
visibility_bits[k] |= 2; if (x_cc < -w_cc) // true if outside of view frustum. False if on or inside the plane.
if (y_cc < -w_cc) visibility_bits[k] |= 1; // set bit if outside of frustum
visibility_bits[k] |= 4; if (x_cc > w_cc)
if (y_cc > w_cc) visibility_bits[k] |= 2;
visibility_bits[k] |= 8; if (y_cc < -w_cc)
if (enable_near_clipping && z_cc < -w_cc) // near plane frustum clipping visibility_bits[k] |= 4;
visibility_bits[k] |= 16; if (y_cc > w_cc)
if (enable_far_clipping && z_cc > w_cc) // far plane frustum clipping visibility_bits[k] |= 8;
visibility_bits[k] |= 32; if (enable_near_clipping && z_cc < -w_cc) // near plane frustum clipping
} // if all bits are 0, then it's inside the frustum visibility_bits[k] |= 16;
// all vertices are not visible - reject the triangle. if (enable_far_clipping && z_cc > w_cc) // far plane frustum clipping
if ((visibility_bits[0] & visibility_bits[1] & visibility_bits[2]) > 0) visibility_bits[k] |= 32;
{ } // if all bits are 0, then it's inside the frustum
continue; // all vertices are not visible - reject the triangle.
} if ((visibility_bits[0] & visibility_bits[1] & visibility_bits[2]) > 0)
// all vertices are visible - pass the whole triangle to the rasterizer. = All bits of all 3 triangles are 0. {
if ((visibility_bits[0] | visibility_bits[1] | visibility_bits[2]) == 0) continue;
{
boost::optional<detail::TriangleToRasterize> t = detail::process_prospective_tri(clipspace_vertices[tri_indices[0]], clipspace_vertices[tri_indices[1]], clipspace_vertices[tri_indices[2]], viewport_width, viewport_height, enable_backface_culling);
if (t) {
triangles_to_raster.push_back(*t);
} }
continue; // all vertices are visible - pass the whole triangle to the rasterizer. = All bits of all 3 triangles are 0.
} if ((visibility_bits[0] | visibility_bits[1] | visibility_bits[2]) == 0)
// at this moment the triangle is known to be intersecting one of the view frustum's planes
std::vector<detail::Vertex<float>> vertices;
vertices.push_back(clipspace_vertices[tri_indices[0]]);
vertices.push_back(clipspace_vertices[tri_indices[1]]);
vertices.push_back(clipspace_vertices[tri_indices[2]]);
// split the triangle if it intersects the near plane:
if (enable_near_clipping)
{
vertices = detail::clip_polygon_to_plane_in_4d(vertices, glm::tvec4<float>(0.0f, 0.0f, -1.0f, -1.0f)); // "Normal" (or "4D hyperplane") of the near-plane. I tested it and it works like this but I'm a little bit unsure because Songho says the normal of the near-plane is (0,0,-1,1) (maybe I have to switch around the < 0 checks in the function?)
}
// triangulation of the polygon formed of vertices array
if (vertices.size() >= 3)
{
for (unsigned char k = 0; k < vertices.size() - 2; k++)
{ {
boost::optional<detail::TriangleToRasterize> t = detail::process_prospective_tri(vertices[0], vertices[1 + k], vertices[2 + k], viewport_width, viewport_height, enable_backface_culling); boost::optional<detail::TriangleToRasterize> t =
if (t) { detail::process_prospective_tri(clipspace_vertices[tri_indices[0]],
clipspace_vertices[tri_indices[1]],
clipspace_vertices[tri_indices[2]],
viewport_width,
viewport_height,
enable_backface_culling);
if (t)
{
triangles_to_raster.push_back(*t); triangles_to_raster.push_back(*t);
} }
continue;
}
// at this moment the triangle is known to be intersecting one of the view frustum's planes
std::vector<detail::Vertex<float>> vertices;
vertices.push_back(clipspace_vertices[tri_indices[0]]);
vertices.push_back(clipspace_vertices[tri_indices[1]]);
vertices.push_back(clipspace_vertices[tri_indices[2]]);
// split the triangle if it intersects the near plane:
if (enable_near_clipping)
{
vertices = detail::clip_polygon_to_plane_in_4d(vertices,
glm::tvec4<float>(0.0f,
0.0f,
-1.0f,
-1.0f)); // "Normal" (or "4D hyperplane") of the near-plane. I tested it and it works like this but I'm a little bit unsure because Songho says the normal of the near-plane is (0,0,-1,1) (maybe I have to switch around the < 0 checks in the function?)
}
// triangulation of the polygon formed of vertices array
if (vertices.size() >= 3)
{
for (unsigned char k = 0; k < vertices.size() - 2; k++)
{
boost::optional<detail::TriangleToRasterize> t = detail::process_prospective_tri(vertices[0],
vertices[1 + k],
vertices[2 + k],
viewport_width,
viewport_height,
enable_backface_culling);
if (t)
{
triangles_to_raster.push_back(*t);
}
}
} }
} }
} // }
// Fragment/pixel shader: Colour the pixel values // Fragment/pixel shader: Colour the pixel values
for (const auto& tri : triangles_to_raster) { for (const auto& tri : triangles_to_raster) {
detail::raster_triangle(tri, colourbuffer, depthbuffer, texture, enable_far_clipping); detail::raster_triangle(tri, colourbuffer, depthbuffer, texture, enable_far_clipping);
} }
auto t2 = std::chrono::high_resolution_clock::now();
auto final_timing = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
printf("Tri %lu %lld ms\n", triangles_to_raster.size(), final_timing);
return std::make_pair(colourbuffer, depthbuffer); return std::make_pair(colourbuffer, depthbuffer);
}; };
......
This diff is collapsed.
...@@ -39,8 +39,13 @@ add_executable(accuracy-evaluation accuracy-evaluation.cpp) ...@@ -39,8 +39,13 @@ add_executable(accuracy-evaluation accuracy-evaluation.cpp)
target_link_libraries(accuracy-evaluation eos ${OpenCV_LIBS} ${Boost_LIBRARIES}) target_link_libraries(accuracy-evaluation eos ${OpenCV_LIBS} ${Boost_LIBRARIES})
target_link_libraries(accuracy-evaluation "$<$<CXX_COMPILER_ID:GNU>:-pthread>$<$<CXX_COMPILER_ID:Clang>:-pthreads>") target_link_libraries(accuracy-evaluation "$<$<CXX_COMPILER_ID:GNU>:-pthread>$<$<CXX_COMPILER_ID:Clang>:-pthreads>")
add_executable(openmp-test openmp-test.cpp)
target_link_libraries(openmp-test eos ${OpenCV_LIBS} ${Boost_LIBRARIES})
target_link_libraries(openmp-test "$<$<CXX_COMPILER_ID:GNU>:-pthread>$<$<CXX_COMPILER_ID:Clang>:-pthreads>")
# Install targets: # Install targets:
install(TARGETS scm-to-cereal DESTINATION bin) install(TARGETS scm-to-cereal DESTINATION bin)
install(TARGETS bfm-binary-to-cereal DESTINATION bin) install(TARGETS bfm-binary-to-cereal DESTINATION bin)
install(TARGETS edgestruct-csv-to-json DESTINATION bin) install(TARGETS edgestruct-csv-to-json DESTINATION bin)
install(TARGETS accuracy-evaluation DESTINATION bin) install(TARGETS accuracy-evaluation DESTINATION bin)
install(TARGETS openmp-test DESTINATION bin)
...@@ -48,6 +48,7 @@ using eos::core::Landmark; ...@@ -48,6 +48,7 @@ using eos::core::Landmark;
using eos::core::LandmarkCollection; using eos::core::LandmarkCollection;
using eos::video::BufferedVideoIterator; using eos::video::BufferedVideoIterator;
using eos::video::WeightedIsomapAveraging; using eos::video::WeightedIsomapAveraging;
using eos::video::ReconstructionVideoWriter;
using cv::Mat; using cv::Mat;
using cv::Vec2f; using cv::Vec2f;
using cv::Vec3f; using cv::Vec3f;
...@@ -192,8 +193,13 @@ void render_output( ...@@ -192,8 +193,13 @@ void render_output(
boost::property_tree::ptree settings, boost::property_tree::ptree settings,
int n_iter) { int n_iter) {
std::string output_path = settings.get<std::string>("output.output_path", "/tmp");
float merge_isomap_face_angle = settings.get<float>("output.merge_isomap_face_angle", 60.f);
bool make_video = settings.get<bool>("output.save_video", false);
bool show_video = settings.get<bool>("output.show_video", false);
Mat merged_isomap; Mat merged_isomap;
WeightedIsomapAveraging isomap_averaging(60.f); // merge all triangles that are facing <60° towards the camera WeightedIsomapAveraging isomap_averaging(merge_isomap_face_angle); // merge all triangles that are facing <60° towards the camera
eos::video::PcaCoefficientMerging pca_shape_merging; eos::video::PcaCoefficientMerging pca_shape_merging;
auto landmark_mapper = reconstruction_data.landmark_mapper; auto landmark_mapper = reconstruction_data.landmark_mapper;
...@@ -208,10 +214,9 @@ void render_output( ...@@ -208,10 +214,9 @@ void render_output(
auto unmodified_frame = frame.clone(); auto unmodified_frame = frame.clone();
int frame_number = key_frames[i].frame_number; int frame_number = key_frames[i].frame_number;
float yaw_angle = glm::degrees(glm::yaw(rendering_paramss[i].get_rotation())); float yaw_angle = key_frames[i].yaw_angle;// glm::degrees(glm::yaw(rendering_paramss[i].get_rotation()));
cv::imwrite("/tmp/eos/" + cv::imwrite("/tmp/eos/" + std::to_string(frame_number) + "_" + std::to_string(yaw_angle) + ".png", frame);
std::to_string(frame_number) + "_" + std::to_string(yaw_angle) + ".png", frame);
// Extract the texture using the fitted mesh from this frame: // Extract the texture using the fitted mesh from this frame:
Mat affine_cam = fitting::get_3x4_affine_camera_matrix(rendering_paramss[i], frame.cols, frame.rows); Mat affine_cam = fitting::get_3x4_affine_camera_matrix(rendering_paramss[i], frame.cols, frame.rows);
...@@ -302,8 +307,20 @@ void render_output( ...@@ -302,8 +307,20 @@ void render_output(
false, false,
rendering); rendering);
cv::imshow("render", rendering); // if(save_video) {
cv::waitKey(10); // cv::imshow("render", rendering);
// cv::waitKey(1);
// }
//
// if(show_video) {
// cv::imshow("render", rendering);
// cv::waitKey(1);
// }
//
// eos::video::ReconstructionVideoWriter outputVideo;
// Size S = Size((int) inputVideo.get(CV_CAP_PROP_FRAME_WIDTH), //Acquire input size
// (int) inputVideo.get(CV_CAP_PROP_FRAME_HEIGHT));
// outputVideo.open(NAME , ex, inputVideo.get(CV_CAP_PROP_FPS),S, true);
} }
/** /**
...@@ -334,8 +351,10 @@ void evaluate_results( ...@@ -334,8 +351,10 @@ void evaluate_results(
boost::property_tree::ptree settings, boost::property_tree::ptree settings,
int n_iter) { int n_iter) {
WeightedIsomapAveraging isomap_averaging(10.f); // merge all triangles that are facing <60° towards the camera std::string output_path = settings.get<std::string>("output.output_path", "/tmp");
float merge_isomap_face_angle = settings.get<float>("output.merge_isomap_face_angle", 60.f);
WeightedIsomapAveraging isomap_averaging(merge_isomap_face_angle); // merge all triangles that are facing <60° towards the camera
Mat merged_isomap; Mat merged_isomap;
fs::path outputfilebase = annotations[0]; fs::path outputfilebase = annotations[0];
...@@ -377,11 +396,11 @@ void evaluate_results( ...@@ -377,11 +396,11 @@ void evaluate_results(
rendering_params.get_projection(), rendering_params.get_projection(),
fitting::get_opencv_viewport(frame_width, frame_height)); fitting::get_opencv_viewport(frame_width, frame_height));
bool eyes_open = isomap_averaging.has_eyes_open(frame, landmarks); // bool eyes_open = isomap_averaging.has_eyes_open(frame, landmarks);
//
if (!eyes_open) { // if (!eyes_open) {
continue; // continue;
} // }
// cv::imshow("Img", outimg); // cv::imshow("Img", outimg);
// cv::waitKey(0); // cv::waitKey(0);
...@@ -649,19 +668,21 @@ int main(int argc, char *argv[]) { ...@@ -649,19 +668,21 @@ int main(int argc, char *argv[]) {
// Start getting video frames: // Start getting video frames:
vid_iterator.start(); vid_iterator.start();
// vid_writer.start();
// Count the amount of iterations: // Count the amount of iterations:
int n_iter = 0; int n_iter = 0;
while(vid_iterator.is_playing()) { while(vid_iterator.is_playing()) {
auto key_frames = vid_iterator.get_keyframes(); auto key_frames = vid_iterator.get_keyframes();
// Generate a sublist of the total available landmark list: // Generate a sublist of the total available landmark list:
auto landmark_sublist = sample_landmarks(key_frames, landmark_list); auto landmark_sublist = sample_landmarks(key_frames, landmark_list);
// std::cout << vid_iterator.to_string() << std::endl;
// it makes no sense to update pca_coeff if nothing in the buffer has changed: // it makes no sense to update pca_coeff if nothing in the buffer has changed:
if (vid_iterator.has_changed()) { if (vid_iterator.has_changed()) {
std::cout << "Going to reconstruct with " << key_frames.size() << " images."<< std::endl; std::cout << "Going to reconstruct with " << key_frames.size() << " images."<< std::endl;
// Fit shape and pose: // Fit shape and pose:
auto t1 = std::chrono::high_resolution_clock::now(); auto t1 = std::chrono::high_resolution_clock::now();
std::tie(meshs, rendering_paramss) = fitting::fit_shape_and_pose_multi_parallel( std::tie(meshs, rendering_paramss) = fitting::fit_shape_and_pose_multi_parallel(
...@@ -681,7 +702,8 @@ int main(int argc, char *argv[]) { ...@@ -681,7 +702,8 @@ int main(int argc, char *argv[]) {
boost::none, boost::none,
pca_shape_coefficients, pca_shape_coefficients,
blendshape_coefficients, blendshape_coefficients,
fitted_image_points fitted_image_points,
settings
); );
auto t2 = std::chrono::high_resolution_clock::now(); auto t2 = std::chrono::high_resolution_clock::now();
...@@ -690,62 +712,62 @@ int main(int argc, char *argv[]) { ...@@ -690,62 +712,62 @@ int main(int argc, char *argv[]) {
<< "ms, mean(" << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() / key_frames.size() << "ms, mean(" << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() / key_frames.size()
<< "ms)" << std::endl; << "ms)" << std::endl;
vid_iterator.update_reconstruction_coeff(pca_shape_coefficients);
evaluate_results(
key_frames,
rendering_paramss,
meshs,
pca_shape_coefficients,
blendshape_coefficients,
fitted_image_points,
annotations,
reconstruction_data,
settings,
n_iter
);
} else {
std::cout << "No reconstruction - buffer did not change" << std::endl;
}
// // Render output: // evaluate_results(
// render_output(
// key_frames, // key_frames,
// rendering_paramss, // rendering_paramss,
// landmark_sublist,
// meshs, // meshs,
// pca_shape_coefficients, // pca_shape_coefficients,
// blendshape_coefficients, // blendshape_coefficients,
// fitted_image_points, // fitted_image_points,
// annotations, // annotations,
// reconstruction_data, // reconstruction_data,
// vid_iterator.last_frame,
// vid_iterator.last_frame_number,
// settings, // settings,
// n_iter // n_iter
// ); // );
} else {
// std::cout << "No reconstruction - buffer did not change" << std::endl;
}
// Get new frames: // Get new frames:
n_iter++; n_iter++;
} }
auto key_frames = vid_iterator.get_keyframes(); // vid_writer.__stop();
if(settings.get<bool>("output.make_video", false)) {
std::cout << "Going to reconstruct with " << key_frames.size() << " images."<< std::endl; ReconstructionVideoWriter vid_writer;
evaluate_results( try {
key_frames, vid_writer = ReconstructionVideoWriter(videofile.string(), reconstruction_data, settings);
rendering_paramss, } catch(std::runtime_error &e) {
meshs, std::cout << e.what() << std::endl;
pca_shape_coefficients, return EXIT_FAILURE;
blendshape_coefficients, }
fitted_image_points, // Render output:
annotations, std::cout << "Waiting for video to be completed..." << std::endl;
reconstruction_data, vid_writer.update_reconstruction_coeff(pca_shape_coefficients);
settings,
n_iter while (vid_writer.next()) {
); printf("%d/%d\r", vid_writer.get_frame_number(), vid_iterator.get_frame_number());
}
}
// auto key_frames = vid_iterator.get_keyframes();
// std::cout << "Going to reconstruct with " << key_frames.size() << " images."<< std::endl;
//
// evaluate_results(
// key_frames,
// rendering_paramss,
// meshs,
// pca_shape_coefficients,
// blendshape_coefficients,
// fitted_image_points,
// annotations,
// reconstruction_data,
// settings,
// n_iter
// );
//todo: we could build our final obj here? //todo: we could build our final obj here?
......
#include "opencv2/core/core.hpp"
#include "opencv2/opencv.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <complex>
#include <iostream>
#include <omp.h>
typedef std::complex<double> complex;
/**
* Calculates distance between two images, returns -1 if dims are not the same.
*
* @param one
* @param two
* @return
*/
float calculate_distance_images(cv::Mat one, cv::Mat two) {
int width = one.cols;
int height = one.rows;
if(width != two.cols || height != two.rows) {
return -1;
}
float distance = 0.0;
for (int y = 0; y < height - 1; y++) {
for (int x = 0; x < width -1; x++) {
distance += fabs(one.at<uchar>(y, x) - two.at<uchar>(y, x));
}
}
return distance;
}
void cpu(cv::Mat A, cv::Mat Anew) {
int n = A.rows;
int m = A.cols;
float error = 1000;
float tol = 30;
int iter = 0;
int iter_max = 100;
auto t1 = std::chrono::high_resolution_clock::now();
while (error > tol && iter < iter_max)
{
error = 0.0;
#pragma omp parallel num_threads(4)
{
#pragma omp for reduction(max:error)
for (int j = 1; j < n - 1; j++)
{
for (int i = 1; i < m - 1; i++)
{
Anew.at<uchar>(j, i) = 0.25 * (A.at<uchar>(j, i + 1) + A.at<uchar>(j, i - 1)
+ A.at<uchar>(j - 1, i) + A.at<uchar>(j + 1, i));
error = fmax(error, fabs(Anew.at<uchar>(j, i) - A.at<uchar>(j, i)));
}
}
}
iter++;
}
}
void gpu(cv::Mat A, cv::Mat Anew) {
int n = A.rows;
int m = A.cols;
float error = 1000;
float tol = 30;
int iter = 0;
int iter_max = 100;
auto t1 = std::chrono::high_resolution_clock::now();
while ( error > tol && iter < iter_max )
{
error = 0.0;
#pragma omp target
{
#pragma omp parallel for reduction(max:error)
for (int j = 1; j < n - 1; j++)
{
for (int i = 1; i < m - 1; i++)
{
Anew.at<uchar>(j, i) = 0.25 * (A.at<uchar>(j, i + 1) + A.at<uchar>(j, i - 1)
+ A.at<uchar>(j - 1, i) + A.at<uchar>(j + 1, i));
error = fmax(error, fabs(Anew.at<uchar>(j, i) - A.at<uchar>(j, i)));
}
}
iter++;
}
}
}
int main(int argc, char** argv)
{
cv::Mat original = cv::imread("/data/img.jpg");
cv::cvtColor(original, original, CV_BGR2GRAY);
cv::Mat gpuImage = original.clone();
cv::Mat gpuImageNew = original.clone();
auto t1 = std::chrono::high_resolution_clock::now();
gpu(gpuImage, gpuImageNew);
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "gpu time: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count()
<< "ms" << std::endl;
cv::Mat cpuImage = original.clone();
cv::Mat cpuImageNew = original.clone();
t1 = std::chrono::high_resolution_clock::now();
cpu(cpuImage, cpuImageNew);
t2 = std::chrono::high_resolution_clock::now();
float distance = calculate_distance_images(gpuImageNew, cpuImageNew);
std::cout << "cpu time: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count()
<< "ms" << std::endl;
std::cout << "Distance gpu - cpu: " << distance << std::endl;
cv::imshow("gpu", gpuImageNew);
cv::imshow("cpu", cpuImageNew);
cv::waitKey(0);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment