Commit c5628500 authored by Richard Torenvliet's avatar Richard Torenvliet

Test with OpenMP GPU - added for occluding boundary vertices

parent e861a303
......@@ -113,6 +113,7 @@ set(HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/include/eos/video/Keyframe.hpp
)
# Set OpenMP compiler flags if possible
find_package(OpenMP)
if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
......
......@@ -42,9 +42,10 @@ namespace eos {
struct FittingResult
{
RenderingParameters rendering_parameters;
core::LandmarkCollection<cv::Vec2f> landmarks;
std::vector<float> pca_shape_coefficients;
std::vector<float> blendshape_coefficients;
core::LandmarkCollection<cv::Vec2f> landmarks;
core::Mesh mesh;
};
} /* namespace fitting */
......
......@@ -45,6 +45,7 @@
#include <utility>
#include <cstddef>
namespace eos {
namespace fitting {
......@@ -391,7 +392,11 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c
}
return { image_points, vertex_indices };
};
#ifdef _OPENMP
/**
* OpenMP version
*
* @brief Computes the vertices that lie on occluding boundaries, given a particular pose.
*
* This algorithm computes the edges that lie on occluding boundaries of the mesh.
......@@ -404,7 +409,7 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c
* @param[in] R The rotation (pose) under which the occluding boundaries should be computed.
* @return A vector with unique vertex id's making up the edges.
*/
inline std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& mesh, const morphablemodel::EdgeTopology& edge_topology, glm::mat4x4 R)
std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& mesh, const morphablemodel::EdgeTopology& edge_topology, glm::mat4x4 R)
{
// Rotate the mesh:
std::vector<glm::vec4> rotated_vertices;
......@@ -423,7 +428,8 @@ inline std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& m
// Find occluding edges:
std::vector<int> occluding_edges_indices;
for (int edge_idx = 0; edge_idx < edge_topology.adjacent_faces.size(); ++edge_idx) // For each edge... Ef contains the indices of the two adjacent faces
for (int edge_idx = 0; edge_idx < edge_topology.adjacent_faces.size();
++edge_idx) // For each edge... Ef contains the indices of the two adjacent faces
{
const auto &edge = edge_topology.adjacent_faces[edge_idx];
if (edge[0] == 0) // ==> NOTE/Todo Need to change this if we use 0-based indexing!
......@@ -455,65 +461,81 @@ inline std::vector<int> occluding_boundary_vertices_parallel(const core::Mesh& m
// Remove duplicate vertex id's (std::unique works only on sorted sequences):
std::sort(begin(occluding_vertices), end(occluding_vertices));
occluding_vertices.erase(std::unique(begin(occluding_vertices), end(occluding_vertices)), end(occluding_vertices));
// Perform ray-casting to find out which vertices are not visible (i.e. self-occluded):
std::vector<bool> visibility;
// Use map to allow parallelism, all vertex_ids are unique
std::vector<int> vertex_id_visible_map(occluding_vertices.size());
int NUM_THREADS = 4;
auto t1 = std::chrono::high_resolution_clock::now();
// we shoot the ray from the vertex towards the camera
glm::vec3 ray_direction(0.0f, 0.0f, 1.0f);
#pragma omp parallel for schedule num_threads(NUM_THREADS)
for(int i = 0; i < occluding_vertices.size(); i++) {
// #pragma omp target map(to: occluding_vertices, mesh, rotated_vertices)
// #pragma omp parallel for
#pragma omp target map(alloc:vertex_id_visible_map) map(from:occluding_vertices, rotated_vertices, mesh)
{
#pragma omp parallel for
for (int i = 0; i < occluding_vertices.size(); i++) {
const auto vertex_idx = occluding_vertices[i];
bool visible = true;
int visible = 1;
glm::vec3 ray_origin(rotated_vertices[vertex_idx]);
// we shoot the ray from the vertex towards the camera
glm::vec3 ray_direction(0.0f, 0.0f, 1.0f);
// For every tri of the rotated mesh:
for(int j = 0; j < mesh.tvi.size(); j++) {
for (int j = 0; j < mesh.tvi.size(); j++) {
auto tri = mesh.tvi[j];
auto &v0 = rotated_vertices[tri[0]];
auto &v1 = rotated_vertices[tri[1]];
auto &v2 = rotated_vertices[tri[2]];
auto intersect = ray_triangle_intersect(ray_origin, ray_direction, glm::vec3(v0), glm::vec3(v1), glm::vec3(v2), false);
auto intersect = ray_triangle_intersect(ray_origin,
ray_direction,
glm::vec3(v0),
glm::vec3(v1),
glm::vec3(v2),
false);
// first is bool intersect, second is the distance t
if (intersect.first == true) {
// We've hit a triangle. Ray hit its own triangle. If it's behind the ray origin, ignore the intersection:
// Check if in front or behind?
if (intersect.second.get() <= 1e-4)
{
if (intersect.second.get() <= 1e-4) {
continue; // the intersection is behind the vertex, we don't care about it
}
// Otherwise, we've hit a genuine triangle, and the vertex is not visible:
visible = false;
visible = 0;
break;
}
}
visibility.push_back(visible);
vertex_id_visible_map[i] = visible;
}
}
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "test derp 1: " << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count() << "ms" << std::endl;
// Remove vertices from occluding boundary list that are not visible:
// copy the results to final vertex ids
std::vector<int> final_vertex_ids;
for (int i = 0; i < occluding_vertices.size(); ++i)
{
if (visibility[i] == true)
{
final_vertex_ids.push_back(occluding_vertices[i]);
for (int i = 0; i < occluding_vertices.size(); ++i) {
const auto vertex_idx = occluding_vertices[i];
int visible = vertex_id_visible_map[i];
if (visible == 1) {
final_vertex_ids.push_back(vertex_idx);
}
}
auto final_timing = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
printf("S %lu %lld ms (mean: %f)\n",
final_vertex_ids.size(),
final_timing,
static_cast<float>(final_timing) / final_vertex_ids.size()
);
return final_vertex_ids;
};
/**
*
* OpenMP version
*
* @brief For a given list of 2D edge points, find corresponding 3D vertex IDs.
*
* This algorithm first computes the 3D mesh's occluding boundary vertices under
......@@ -593,6 +615,7 @@ inline std::pair<std::vector<cv::Vec2f>, std::vector<int>> find_occluding_edge_c
}
return { image_points, vertex_indices };
};
#endif // _OPENMP
......
This diff is collapsed.
......@@ -315,8 +315,8 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
#pragma omp parallel num_threads(num_threads)
{
#pragma omp for
for (int i = 0; i < num_landmarks; ++i)
{
for (int i = 0; i < num_landmarks; ++i) {
// calculate index from landmark offset (not always the same landmarks amount).
int index = (landmark_offset_index[k] * 4) + 4 * i;
int offset = landmark_offset_index[k] + i;
auto vertex_id = vertex_ids[k][i];
......@@ -328,16 +328,14 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
basis_rows_.block(0, 0, basis_rows_.rows(), num_coeffs_to_fit);
// Set y with landmark coordinates (will be used in linear lsq later)
#pragma omp simd
for (int j = 0; j < 2; j++)
{
#pragma omp simd
for (int j = 0; j < 2; j++) {
y(3 * offset + j) = landmarks[k][i][j];
}
// Set v_bar with face mesh (will be used in linear lsq later)
#pragma omp simd
for (int j = 0; j < 3; j++)
{
#pragma omp simd
for (int j = 0; j < 3; j++) {
v_bar(4 * offset + j) = base_face[k](vertex_id * 3 + j);
}
}
......@@ -368,13 +366,13 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
auto t2 = std::chrono::high_resolution_clock::now();
t1 = std::chrono::high_resolution_clock::now();
// fill P with coefficients
P.setFromTriplets(P_coefficients.begin(), P_coefficients.end());
MatrixXf one;
MatrixXf two;
MatrixXf A;
MatrixXf b;
MatrixXf rhs;
#pragma omp parallel sections num_threads(2)
{
......@@ -382,6 +380,7 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
{
A = P * V_hat_h; // camera matrix times the basis
one = A.transpose() * Omega.asDiagonal();
rhs = -one * b; // It's -A^t*Omega^t*b, but we don't need to transpose Omega, since it's a diagonal matrix, and Omega^t = Omega.
}
#pragma omp section
......@@ -392,12 +391,8 @@ inline std::vector<float> fit_shape_to_landmarks_linear_multi_parallel(
}
const MatrixXf AtOmegaAReg = one * A + two;
const MatrixXf rhs = -one * b; // It's -A^t*Omega^t*b, but we don't need to transpose Omega, since it's a diagonal matrix, and Omega^t = Omega.
const VectorXf c_s = AtOmegaAReg.colPivHouseholderQr().solve(rhs);
t2 = std::chrono::high_resolution_clock::now();
std::cout << "matrix xf " << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() << "ms" << std::endl;
return std::vector<float>(c_s.data(), c_s.data() + c_s.size());
};
#endif
......
......@@ -141,6 +141,7 @@ inline std::pair<cv::Mat, cv::Mat> render(
// bool enable_texturing = false; Maybe re-add later, not sure
// take a cv::Mat texture instead and convert to Texture internally? no, we don't want to recreate mipmap levels on each render() call.
auto t1 = std::chrono::high_resolution_clock::now();
assert(mesh.vertices.size() == mesh.colors.size() || mesh.colors.empty()); // The number of vertices has to be equal for both shape and colour, or, alternatively, it has to be a shape-only model.
assert(mesh.vertices.size() == mesh.texcoords.size() || mesh.texcoords.empty()); // same for the texcoords
// another assert: If cv::Mat texture != empty, then we need texcoords?
......@@ -177,7 +178,11 @@ inline std::pair<cv::Mat, cv::Mat> render(
// Prepare the rasterisation stage.
// For every vertex/tri:
vector<detail::TriangleToRasterize> triangles_to_raster;
for (const auto& tri_indices : mesh.tvi) {
//#pragma omp target
// {
//#pragma omp parallel for
for(int i = 0; i < mesh.tvi.size(); i++) {
const auto tri_indices = mesh.tvi[i];
// Todo: Split this whole stuff up. Make a "clip" function, ... rename "processProspective..".. what is "process"... get rid of "continue;"-stuff by moving stuff inside process...
// classify vertices visibility with respect to the planes of the view frustum
// we're in clip-coords (NDC), so just check if outside [-1, 1] x ...
......@@ -186,8 +191,7 @@ inline std::pair<cv::Mat, cv::Mat> render(
// However, when comparing against w_c below, we might run into the trouble of the sign again in the affine case.
// 'w' is always positive, as it is -z_camspace, and all z_camspace are negative.
unsigned char visibility_bits[3];
for (unsigned char k = 0; k < 3; k++)
{
for (unsigned char k = 0; k < 3; k++) {
visibility_bits[k] = 0;
float x_cc = clipspace_vertices[tri_indices[k]].position[0];
float y_cc = clipspace_vertices[tri_indices[k]].position[1];
......@@ -214,8 +218,15 @@ inline std::pair<cv::Mat, cv::Mat> render(
// all vertices are visible - pass the whole triangle to the rasterizer. = All bits of all 3 triangles are 0.
if ((visibility_bits[0] | visibility_bits[1] | visibility_bits[2]) == 0)
{
boost::optional<detail::TriangleToRasterize> t = detail::process_prospective_tri(clipspace_vertices[tri_indices[0]], clipspace_vertices[tri_indices[1]], clipspace_vertices[tri_indices[2]], viewport_width, viewport_height, enable_backface_culling);
if (t) {
boost::optional<detail::TriangleToRasterize> t =
detail::process_prospective_tri(clipspace_vertices[tri_indices[0]],
clipspace_vertices[tri_indices[1]],
clipspace_vertices[tri_indices[2]],
viewport_width,
viewport_height,
enable_backface_culling);
if (t)
{
triangles_to_raster.push_back(*t);
}
continue;
......@@ -228,7 +239,11 @@ inline std::pair<cv::Mat, cv::Mat> render(
// split the triangle if it intersects the near plane:
if (enable_near_clipping)
{
vertices = detail::clip_polygon_to_plane_in_4d(vertices, glm::tvec4<float>(0.0f, 0.0f, -1.0f, -1.0f)); // "Normal" (or "4D hyperplane") of the near-plane. I tested it and it works like this but I'm a little bit unsure because Songho says the normal of the near-plane is (0,0,-1,1) (maybe I have to switch around the < 0 checks in the function?)
vertices = detail::clip_polygon_to_plane_in_4d(vertices,
glm::tvec4<float>(0.0f,
0.0f,
-1.0f,
-1.0f)); // "Normal" (or "4D hyperplane") of the near-plane. I tested it and it works like this but I'm a little bit unsure because Songho says the normal of the near-plane is (0,0,-1,1) (maybe I have to switch around the < 0 checks in the function?)
}
// triangulation of the polygon formed of vertices array
......@@ -236,18 +251,29 @@ inline std::pair<cv::Mat, cv::Mat> render(
{
for (unsigned char k = 0; k < vertices.size() - 2; k++)
{
boost::optional<detail::TriangleToRasterize> t = detail::process_prospective_tri(vertices[0], vertices[1 + k], vertices[2 + k], viewport_width, viewport_height, enable_backface_culling);
if (t) {
boost::optional<detail::TriangleToRasterize> t = detail::process_prospective_tri(vertices[0],
vertices[1 + k],
vertices[2 + k],
viewport_width,
viewport_height,
enable_backface_culling);
if (t)
{
triangles_to_raster.push_back(*t);
}
}
}
}
// }
// Fragment/pixel shader: Colour the pixel values
for (const auto& tri : triangles_to_raster) {
detail::raster_triangle(tri, colourbuffer, depthbuffer, texture, enable_far_clipping);
}
auto t2 = std::chrono::high_resolution_clock::now();
auto final_timing = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
printf("Tri %lu %lld ms\n", triangles_to_raster.size(), final_timing);
return std::make_pair(colourbuffer, depthbuffer);
};
......
This diff is collapsed.
......@@ -39,8 +39,13 @@ add_executable(accuracy-evaluation accuracy-evaluation.cpp)
target_link_libraries(accuracy-evaluation eos ${OpenCV_LIBS} ${Boost_LIBRARIES})
target_link_libraries(accuracy-evaluation "$<$<CXX_COMPILER_ID:GNU>:-pthread>$<$<CXX_COMPILER_ID:Clang>:-pthreads>")
add_executable(openmp-test openmp-test.cpp)
target_link_libraries(openmp-test eos ${OpenCV_LIBS} ${Boost_LIBRARIES})
target_link_libraries(openmp-test "$<$<CXX_COMPILER_ID:GNU>:-pthread>$<$<CXX_COMPILER_ID:Clang>:-pthreads>")
# Install targets:
install(TARGETS scm-to-cereal DESTINATION bin)
install(TARGETS bfm-binary-to-cereal DESTINATION bin)
install(TARGETS edgestruct-csv-to-json DESTINATION bin)
install(TARGETS accuracy-evaluation DESTINATION bin)
install(TARGETS openmp-test DESTINATION bin)
......@@ -48,6 +48,7 @@ using eos::core::Landmark;
using eos::core::LandmarkCollection;
using eos::video::BufferedVideoIterator;
using eos::video::WeightedIsomapAveraging;
using eos::video::ReconstructionVideoWriter;
using cv::Mat;
using cv::Vec2f;
using cv::Vec3f;
......@@ -192,8 +193,13 @@ void render_output(
boost::property_tree::ptree settings,
int n_iter) {
std::string output_path = settings.get<std::string>("output.output_path", "/tmp");
float merge_isomap_face_angle = settings.get<float>("output.merge_isomap_face_angle", 60.f);
bool make_video = settings.get<bool>("output.save_video", false);
bool show_video = settings.get<bool>("output.show_video", false);
Mat merged_isomap;
WeightedIsomapAveraging isomap_averaging(60.f); // merge all triangles that are facing <60° towards the camera
WeightedIsomapAveraging isomap_averaging(merge_isomap_face_angle); // merge all triangles that are facing <60° towards the camera
eos::video::PcaCoefficientMerging pca_shape_merging;
auto landmark_mapper = reconstruction_data.landmark_mapper;
......@@ -208,10 +214,9 @@ void render_output(
auto unmodified_frame = frame.clone();
int frame_number = key_frames[i].frame_number;
float yaw_angle = glm::degrees(glm::yaw(rendering_paramss[i].get_rotation()));
float yaw_angle = key_frames[i].yaw_angle;// glm::degrees(glm::yaw(rendering_paramss[i].get_rotation()));
cv::imwrite("/tmp/eos/" +
std::to_string(frame_number) + "_" + std::to_string(yaw_angle) + ".png", frame);
cv::imwrite("/tmp/eos/" + std::to_string(frame_number) + "_" + std::to_string(yaw_angle) + ".png", frame);
// Extract the texture using the fitted mesh from this frame:
Mat affine_cam = fitting::get_3x4_affine_camera_matrix(rendering_paramss[i], frame.cols, frame.rows);
......@@ -302,8 +307,20 @@ void render_output(
false,
rendering);
cv::imshow("render", rendering);
cv::waitKey(10);
// if(save_video) {
// cv::imshow("render", rendering);
// cv::waitKey(1);
// }
//
// if(show_video) {
// cv::imshow("render", rendering);
// cv::waitKey(1);
// }
//
// eos::video::ReconstructionVideoWriter outputVideo;
// Size S = Size((int) inputVideo.get(CV_CAP_PROP_FRAME_WIDTH), //Acquire input size
// (int) inputVideo.get(CV_CAP_PROP_FRAME_HEIGHT));
// outputVideo.open(NAME , ex, inputVideo.get(CV_CAP_PROP_FPS),S, true);
}
/**
......@@ -334,8 +351,10 @@ void evaluate_results(
boost::property_tree::ptree settings,
int n_iter) {
WeightedIsomapAveraging isomap_averaging(10.f); // merge all triangles that are facing <60° towards the camera
std::string output_path = settings.get<std::string>("output.output_path", "/tmp");
float merge_isomap_face_angle = settings.get<float>("output.merge_isomap_face_angle", 60.f);
WeightedIsomapAveraging isomap_averaging(merge_isomap_face_angle); // merge all triangles that are facing <60° towards the camera
Mat merged_isomap;
fs::path outputfilebase = annotations[0];
......@@ -377,11 +396,11 @@ void evaluate_results(
rendering_params.get_projection(),
fitting::get_opencv_viewport(frame_width, frame_height));
bool eyes_open = isomap_averaging.has_eyes_open(frame, landmarks);
if (!eyes_open) {
continue;
}
// bool eyes_open = isomap_averaging.has_eyes_open(frame, landmarks);
//
// if (!eyes_open) {
// continue;
// }
// cv::imshow("Img", outimg);
// cv::waitKey(0);
......@@ -649,19 +668,21 @@ int main(int argc, char *argv[]) {
// Start getting video frames:
vid_iterator.start();
// vid_writer.start();
// Count the amount of iterations:
int n_iter = 0;
while(vid_iterator.is_playing()) {
auto key_frames = vid_iterator.get_keyframes();
// Generate a sublist of the total available landmark list:
auto landmark_sublist = sample_landmarks(key_frames, landmark_list);
// std::cout << vid_iterator.to_string() << std::endl;
// it makes no sense to update pca_coeff if nothing in the buffer has changed:
if (vid_iterator.has_changed()) {
std::cout << "Going to reconstruct with " << key_frames.size() << " images."<< std::endl;
// Fit shape and pose:
auto t1 = std::chrono::high_resolution_clock::now();
std::tie(meshs, rendering_paramss) = fitting::fit_shape_and_pose_multi_parallel(
......@@ -681,7 +702,8 @@ int main(int argc, char *argv[]) {
boost::none,
pca_shape_coefficients,
blendshape_coefficients,
fitted_image_points
fitted_image_points,
settings
);
auto t2 = std::chrono::high_resolution_clock::now();
......@@ -690,62 +712,62 @@ int main(int argc, char *argv[]) {
<< "ms, mean(" << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count() / key_frames.size()
<< "ms)" << std::endl;
vid_iterator.update_reconstruction_coeff(pca_shape_coefficients);
evaluate_results(
key_frames,
rendering_paramss,
meshs,
pca_shape_coefficients,
blendshape_coefficients,
fitted_image_points,
annotations,
reconstruction_data,
settings,
n_iter
);
} else {
std::cout << "No reconstruction - buffer did not change" << std::endl;
}
// // Render output:
// render_output(
// evaluate_results(
// key_frames,
// rendering_paramss,
// landmark_sublist,
// meshs,
// pca_shape_coefficients,
// blendshape_coefficients,
// fitted_image_points,
// annotations,
// reconstruction_data,
// vid_iterator.last_frame,
// vid_iterator.last_frame_number,
// settings,
// n_iter
// );
} else {
// std::cout << "No reconstruction - buffer did not change" << std::endl;
}
// Get new frames:
n_iter++;
}
auto key_frames = vid_iterator.get_keyframes();
// vid_writer.__stop();
if(settings.get<bool>("output.make_video", false)) {
ReconstructionVideoWriter vid_writer;
try {
vid_writer = ReconstructionVideoWriter(videofile.string(), reconstruction_data, settings);
} catch(std::runtime_error &e) {
std::cout << e.what() << std::endl;
return EXIT_FAILURE;
}
// Render output:
std::cout << "Waiting for video to be completed..." << std::endl;
vid_writer.update_reconstruction_coeff(pca_shape_coefficients);
std::cout << "Going to reconstruct with " << key_frames.size() << " images."<< std::endl;
evaluate_results(
key_frames,
rendering_paramss,
meshs,
pca_shape_coefficients,
blendshape_coefficients,
fitted_image_points,
annotations,
reconstruction_data,
settings,
n_iter
);
while (vid_writer.next()) {
printf("%d/%d\r", vid_writer.get_frame_number(), vid_iterator.get_frame_number());
}
}
// auto key_frames = vid_iterator.get_keyframes();
// std::cout << "Going to reconstruct with " << key_frames.size() << " images."<< std::endl;
//
// evaluate_results(
// key_frames,
// rendering_paramss,
// meshs,
// pca_shape_coefficients,
// blendshape_coefficients,
// fitted_image_points,
// annotations,
// reconstruction_data,
// settings,
// n_iter
// );
//todo: we could build our final obj here?
......
#include "opencv2/core/core.hpp"
#include "opencv2/opencv.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <complex>
#include <iostream>
#include <omp.h>
typedef std::complex<double> complex;
/**
* Calculates distance between two images, returns -1 if dims are not the same.
*
* @param one
* @param two
* @return
*/
float calculate_distance_images(cv::Mat one, cv::Mat two) {
int width = one.cols;
int height = one.rows;
if(width != two.cols || height != two.rows) {
return -1;
}
float distance = 0.0;
for (int y = 0; y < height - 1; y++) {
for (int x = 0; x < width -1; x++) {
distance += fabs(one.at<uchar>(y, x) - two.at<uchar>(y, x));
}
}
return distance;
}
void cpu(cv::Mat A, cv::Mat Anew) {
int n = A.rows;
int m = A.cols;
float error = 1000;
float tol = 30;
int iter = 0;
int iter_max = 100;
auto t1 = std::chrono::high_resolution_clock::now();
while (error > tol && iter < iter_max)
{
error = 0.0;
#pragma omp parallel num_threads(4)
{
#pragma omp for reduction(max:error)
for (int j = 1; j < n - 1; j++)
{
for (int i = 1; i < m - 1; i++)
{
Anew.at<uchar>(j, i) = 0.25 * (A.at<uchar>(j, i + 1) + A.at<uchar>(j, i - 1)
+ A.at<uchar>(j - 1, i) + A.at<uchar>(j + 1, i));
error = fmax(error, fabs(Anew.at<uchar>(j, i) - A.at<uchar>(j, i)));
}
}
}
iter++;
}
}
void gpu(cv::Mat A, cv::Mat Anew) {
int n = A.rows;
int m = A.cols;
float error = 1000;
float tol = 30;
int iter = 0;
int iter_max = 100;
auto t1 = std::chrono::high_resolution_clock::now();
while ( error > tol && iter < iter_max )
{
error = 0.0;
#pragma omp target
{
#pragma omp parallel for reduction(max:error)
for (int j = 1; j < n - 1; j++)
{
for (int i = 1; i < m - 1; i++)
{
Anew.at<uchar>(j, i) = 0.25 * (A.at<uchar>(j, i + 1) + A.at<uchar>(j, i - 1)
+ A.at<uchar>(j - 1, i) + A.at<uchar>(j + 1, i));
error = fmax(error, fabs(Anew.at<uchar>(j, i) - A.at<uchar>(j, i)));
}
}
iter++;
}
}
}
int main(int argc, char** argv)
{
cv::Mat original = cv::imread("/data/img.jpg");
cv::cvtColor(original, original, CV_BGR2GRAY);
cv::Mat gpuImage = original.clone();
cv::Mat gpuImageNew = original.clone();
auto t1 = std::chrono::high_resolution_clock::now();
gpu(gpuImage, gpuImageNew);
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "gpu time: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count()
<< "ms" << std::endl;
cv::Mat cpuImage = original.clone();
cv::Mat cpuImageNew = original.clone();
t1 = std::chrono::high_resolution_clock::now();
cpu(cpuImage, cpuImageNew);
t2 = std::chrono::high_resolution_clock::now();
float distance = calculate_distance_images(gpuImageNew, cpuImageNew);
std::cout << "cpu time: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count()
<< "ms" << std::endl;
std::cout << "Distance gpu - cpu: " << distance << std::endl;
cv::imshow("gpu", gpuImageNew);
cv::imshow("cpu", cpuImageNew);
cv::waitKey(0);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment