From ba770cd524ea0c27854568ce6624ff10d19267ab Mon Sep 17 00:00:00 2001 From: Wangyida Date: Fri, 14 Aug 2015 08:19:29 +0800 Subject: [PATCH] modify the mean file of images using --- .../cnn_3dobj/include/opencv2/cnn_3dobj.hpp | 39 ++-- .../cnn_3dobj/samples/classifyIMG_demo.cpp | 159 ++++++++++------- .../samples/sphereview_3dobj_demo.cpp | 167 +++++++++--------- modules/cnn_3dobj/src/cnn_feature.cpp | 94 +++++----- modules/cnn_3dobj/src/cnn_sphereview.cpp | 24 +-- 5 files changed, 254 insertions(+), 229 deletions(-) diff --git a/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp b/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp index fd384992d..2b0d1ba52 100644 --- a/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp +++ b/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp @@ -91,7 +91,7 @@ namespace cnn_3dobj The class create some sphere views of camera towards a 3D object meshed from .ply files @cite hinterstoisser2008panter . */ - class CV_EXPORTS_W IcoSphere + class CV_EXPORTS_W icoSphere { private: float X; @@ -104,7 +104,7 @@ The class create some sphere views of camera towards a 3D object meshed from .pl std::vector CameraPos_temp; float radius; float diff; - IcoSphere(float radius_in, int depth_in); + icoSphere(float radius_in, int depth_in); /** @brief Make all view points having the some distance from the focal point used by the camera view. */ CV_WRAP void norm(float v[]); @@ -116,7 +116,7 @@ The class create some sphere views of camera towards a 3D object meshed from .pl CV_WRAP void subdivide(float v1[], float v2[], float v3[], int depth); /** @brief Make all view points having the some distance from the focal point used by the camera view. */ - CV_WRAP static uint32_t swap_endian(uint32_t val); + CV_WRAP static uint32_t swapEndian(uint32_t val); /** @brief Suit the position of bytes in 4 byte data structure for particular system. */ CV_WRAP cv::Point3d getCenter(cv::Mat cloud); @@ -133,52 +133,47 @@ The class create some sphere views of camera towards a 3D object meshed from .pl */ }; - class CV_EXPORTS_W DescriptorExtractor + class CV_EXPORTS_W descriptorExtractor { private: caffe::Net* net_; cv::Size input_geometry_; int num_channels_; cv::Mat mean_; - std::vector labels_; - void SetMean(const string& mean_file); + void setMean(const string& mean_file); /** @brief Load the mean file in binaryproto format. */ - void WrapInputLayer(std::vector* input_channels); + void wrapInputLayer(std::vector* input_channels); /** @brief Wrap the input layer of the network in separate cv::Mat objects(one per channel). This way we save one memcpy operation and we don't need to rely on cudaMemcpy2D. The last preprocessing operation will write the separate channels directly to the input layer. */ - void Preprocess(const cv::Mat& img, std::vector* input_channels, bool mean_subtract); + void preprocess(const cv::Mat& img, std::vector* input_channels, int net_ready); /** @brief Convert the input image to the input image format of the network. */ public: - DescriptorExtractor(); - void list_dir(const char *path,std::vector& files,bool r); + std::vector labels_; + descriptorExtractor(); + void listDir(const char *path,std::vector& files,bool r); /** @brief Get the file name from a root dictionary. */ - bool SetNet(const string& cpu_only, int device_id); + bool setNet(const string& cpu_only, int device_id); /** @brief Initiate a classification structure. */ - bool LoadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file); + int loadNet(bool netsetter, const string& model_file, const string& trained_file); /** @brief Initiate a classification structure. */ - void GetLabellist(const std::vector& name_gallery); - /** @brief Get the label of the gallery images for result displaying in prediction. + int loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file); + /** @brief Initiate a classification structure. */ - std::vector > Classify(const cv::Mat& reference, const cv::Mat& target, int N); - /** @brief Make a classification. + void getLabellist(const std::vector& name_gallery); + /** @brief Get the label of the gallery images for result displaying in prediction. */ - void Extract(bool net_ready, InputArray inputimg, OutputArray feature, bool mean_subtract, std::string feature_blob); + void extract(int net_ready, InputArray inputimg, OutputArray feature, std::string feature_blob); /** @brief Extract a single featrue of one image. */ - std::vector Argmax(const std::vector& v, int N); - /** @brief Find the N largest number. - */ }; //! @} } } - - #endif /* CNN_3DOBJ_HPP_ */ #endif diff --git a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp b/modules/cnn_3dobj/samples/classifyIMG_demo.cpp index eef51ab76..5c07e20bc 100644 --- a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp +++ b/modules/cnn_3dobj/samples/classifyIMG_demo.cpp @@ -38,70 +38,107 @@ using namespace cv; using namespace std; using namespace cv::cnn_3dobj; + +/* Return the indices of the top N values of vector v. */ +std::vector argmax(const std::vector& v, int N) +{ + std::vector > pairs; + for (size_t i = 0; i < v.size(); ++i) + pairs.push_back(std::make_pair(v[i], i)); + std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end()); + std::vector result; + for (int i = 0; i < N; ++i) + result.push_back(pairs[i].second); + return result; +}; + +/* Return the indices of the top N values of vector v. */ +std::vector > classify(const cv::Mat& reference, const cv::Mat& target, int N, std::vector labels_) +{ + std::vector output; + for (int i = 0; i < reference.rows; i++) + { + cv::Mat f1 = reference.row(i); + cv::Mat f2 = target; + cv::Mat output_temp = f1-f2; + output.push_back(cv::norm(output_temp)); + } + std::vector maxN = argmax(output, N); + std::vector > predictions; + for (int i = 0; i < N; ++i) + { + int idx = maxN[i]; + predictions.push_back(std::make_pair(labels_[idx], output[idx])); + } + return predictions; +}; + int main(int argc, char** argv) { - const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}" - "{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}" - "{caffemodel | ../data/3d_triplet_iter_20000.caffemodel | caffe model for feature exrtaction.}" - "{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}" - "{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}" - "{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}" - "{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}" - "{num_candidate | 6 | Number of candidates in gallery as the prediction result.}" - "{device | CPU | device}" - "{dev_id | 0 | dev_id}"; - cv::CommandLineParser parser(argc, argv, keys); - parser.about("Demo for object data classification and pose estimation"); - if (parser.has("help")) - { - parser.printMessage(); - return 0; - } - string src_dir = parser.get("src_dir"); - string caffemodel = parser.get("caffemodel"); - string network_forIMG = parser.get("network_forIMG"); - string mean_file = parser.get("mean_file"); - string target_img = parser.get("target_img"); - string feature_blob = parser.get("feature_blob"); - int num_candidate = parser.get("num_candidate"); - string device = parser.get("device"); - int dev_id = parser.get("dev_id"); + const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe. If there little variance in data such as human faces, you can add a mean_file, otherwise it is not so useful}" +"{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}" +"{caffemodel | ../data/3d_triplet_iter_20000.caffemodel | caffe model for feature exrtaction.}" +"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}" +"{mean_file | no | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images. If you want to use the mean file, you can set this as ../data/images_mean/triplet_mean.binaryproto.}" +"{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}" +"{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}" +"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}" +"{device | CPU | device}" +"{dev_id | 0 | dev_id}"; + cv::CommandLineParser parser(argc, argv, keys); + parser.about("Demo for object data classification and pose estimation"); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + string src_dir = parser.get("src_dir"); + string caffemodel = parser.get("caffemodel"); + string network_forIMG = parser.get("network_forIMG"); + string mean_file = parser.get("mean_file"); + string target_img = parser.get("target_img"); + string feature_blob = parser.get("feature_blob"); + int num_candidate = parser.get("num_candidate"); + string device = parser.get("device"); + int dev_id = parser.get("dev_id"); - cv::cnn_3dobj::DescriptorExtractor descriptor; - bool set_succeed = descriptor.SetNet(device, dev_id); - descriptor.LoadNet(set_succeed, network_forIMG, caffemodel, mean_file); - std::vector name_gallery; - descriptor.list_dir(src_dir.c_str(), name_gallery, false); - descriptor.GetLabellist(name_gallery); - for (unsigned int i = 0; i < name_gallery.size(); i++) { - name_gallery[i] = src_dir + name_gallery[i]; - } - std::vector img_gallery; - cv::Mat feature_reference; - for (unsigned int i = 0; i < name_gallery.size(); i++) { - img_gallery.push_back(cv::imread(name_gallery[i], -1)); - } - descriptor.FeatureExtract(img_gallery, feature_reference, false, feature_blob); + cv::cnn_3dobj::descriptorExtractor descriptor; + bool set_succeed = descriptor.setNet(device, dev_id); + int net_ready; + if (strcmp(mean_file.c_str(), "no") == 0) + net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel); + else + net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file); + std::vector name_gallery; + descriptor.listDir(src_dir.c_str(), name_gallery, false); + descriptor.getLabellist(name_gallery); + for (unsigned int i = 0; i < name_gallery.size(); i++) { + name_gallery[i] = src_dir + name_gallery[i]; + } + std::vector img_gallery; + cv::Mat feature_reference; + for (unsigned int i = 0; i < name_gallery.size(); i++) { + img_gallery.push_back(cv::imread(name_gallery[i], -1)); + } + descriptor.extract(net_ready, img_gallery, feature_reference, feature_blob); - std::cout << std::endl << "---------- Prediction for " - << target_img << " ----------" << std::endl; + std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl; - cv::Mat img = cv::imread(target_img, -1); - // CHECK(!img.empty()) << "Unable to decode image " << target_img; - std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl; - std::vector > prediction; - for (unsigned int i = 0; i < feature_reference.rows; i++) - std::cout << feature_reference.row(i) << endl; - cv::Mat feature_test; - descriptor.FeatureExtract(img, feature_test, false, feature_blob); - std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl; - prediction = descriptor.Classify(feature_reference, feature_test, num_candidate); - // Print the top N prediction. - std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl; - for (size_t i = 0; i < prediction.size(); ++i) { - std::pair p = prediction[i]; - std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" - << p.first << "\"" << std::endl; - } - return 0; + cv::Mat img = cv::imread(target_img, -1); + // CHECK(!img.empty()) << "Unable to decode image " << target_img; + std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl; + std::vector > prediction; + for (unsigned int i = 0; i < feature_reference.rows; i++) + std::cout << feature_reference.row(i) << endl; + cv::Mat feature_test; + descriptor.extract(net_ready, img, feature_test, feature_blob); + std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl; + prediction = classify(feature_reference, feature_test, num_candidate, descriptor.labels_); + // Print the top N prediction. + std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl; + for (size_t i = 0; i < prediction.size(); ++i) { + std::pair p = prediction[i]; + std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" << p.first << "\"" << std::endl; + } + return 0; } diff --git a/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp b/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp index 7e547ec6d..4e348dc14 100644 --- a/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp +++ b/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp @@ -40,89 +40,90 @@ using namespace cv; using namespace std; using namespace cv::cnn_3dobj; -int main(int argc, char *argv[]){ - const String keys = "{help | | demo :$ ./sphereview_test -ite_depth=2 -plymodel=../3Dmodel/ape.ply -imagedir=../data/images_ape/ -labeldir=../data/label_ape.txt -num_class=4 -label_class=0, then press 'q' to run the demo for images generation when you see the gray background and a coordinate.}" - "{ite_depth | 2 | Iteration of sphere generation.}" - "{plymodel | ../3Dmodel/ape.ply | path of the '.ply' file for image rendering. }" - "{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }" - "{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }" - "{num_class | 4 | total number of classes of models}" - "{label_class | 0 | class label of current .ply model}"; - cv::CommandLineParser parser(argc, argv, keys); - parser.about("Demo for Sphere View data generation"); - if (parser.has("help")) - { - parser.printMessage(); - return 0; - } - int ite_depth = parser.get("ite_depth"); - string plymodel = parser.get("plymodel"); - string imagedir = parser.get("imagedir"); - string labeldir = parser.get("labeldir"); - int num_class = parser.get("num_class"); - int label_class = parser.get("label_class"); - cv::cnn_3dobj::IcoSphere ViewSphere(10,ite_depth); - std::vector campos = ViewSphere.CameraPos; - std::fstream imglabel; - char* p=(char*)labeldir.data(); - imglabel.open(p, fstream::app|fstream::out); - bool camera_pov = (true); - /// Create a window - viz::Viz3d myWindow("Coordinate Frame"); - myWindow.setWindowSize(Size(64,64)); - /// Add coordinate axes - myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem()); - myWindow.setBackgroundColor(viz::Color::gray()); - myWindow.spin(); - /// Set background color - /// Let's assume camera has the following properties - /// Create a cloud widget. - viz::Mesh objmesh = viz::Mesh::load(plymodel); - Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud); - float radius = ViewSphere.getRadius(objmesh.cloud, cam_focal_point); - Point3d cam_y_dir(0.0f,0.0f,1.0f); - const char* headerPath = "./header_for_"; - const char* binaryPath = "./binary_"; - ViewSphere.createHeader((int)campos.size(), 64, 64, headerPath); - for(int pose = 0; pose < (int)campos.size(); pose++){ - char* temp = new char; - sprintf (temp,"%d",label_class); - string filename = temp; - filename += "_"; - sprintf (temp,"%d",pose); - filename += temp; - filename += ".png"; - imglabel << filename << ' ' << (int)(campos.at(pose).x*100) << ' ' << (int)(campos.at(pose).y*100) << ' ' << (int)(campos.at(pose).z*100) << endl; - filename = imagedir + filename; - /// We can get the pose of the cam using makeCameraPoses - Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point); - /// We can get the transformation matrix from camera coordinate system to global using - /// - makeTransformToGlobal. We need the axes of the camera - Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose)); - viz::WMesh mesh_widget(objmesh); - /// Pose of the widget in camera frame - Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)); - /// Pose of the widget in global frame - Affine3f cloud_pose_global = transform * cloud_pose; - /// Visualize camera frame - if (!camera_pov) - { - viz::WCameraPosition cpw(1); // Coordinate axes - viz::WCameraPosition cpw_frustum(Vec2f(0.5, 0.5)); // Camera frustum - myWindow.showWidget("CPW", cpw, cam_pose); - myWindow.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose); - } +int main(int argc, char *argv[]) +{ + const String keys = "{help | | demo :$ ./sphereview_test -ite_depth=2 -plymodel=../3Dmodel/ape.ply -imagedir=../data/images_ape/ -labeldir=../data/label_ape.txt -num_class=4 -label_class=0, then press 'q' to run the demo for images generation when you see the gray background and a coordinate.}" +"{ite_depth | 2 | Iteration of sphere generation.}" +"{plymodel | ../3Dmodel/ape.ply | path of the '.ply' file for image rendering. }" +"{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }" +"{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }" +"{num_class | 4 | total number of classes of models}" +"{label_class | 0 | class label of current .ply model}"; + cv::CommandLineParser parser(argc, argv, keys); + parser.about("Demo for Sphere View data generation"); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + int ite_depth = parser.get("ite_depth"); + string plymodel = parser.get("plymodel"); + string imagedir = parser.get("imagedir"); + string labeldir = parser.get("labeldir"); + int num_class = parser.get("num_class"); + int label_class = parser.get("label_class"); + cv::cnn_3dobj::icoSphere ViewSphere(10,ite_depth); + std::vector campos = ViewSphere.CameraPos; + std::fstream imglabel; + char* p=(char*)labeldir.data(); + imglabel.open(p, fstream::app|fstream::out); + bool camera_pov = (true); + /// Create a window + viz::Viz3d myWindow("Coordinate Frame"); + myWindow.setWindowSize(Size(64,64)); + /// Add coordinate axes + myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem()); + myWindow.setBackgroundColor(viz::Color::gray()); + myWindow.spin(); + /// Set background color + /// Let's assume camera has the following properties + /// Create a cloud widget. + viz::Mesh objmesh = viz::Mesh::load(plymodel); + Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud); + float radius = ViewSphere.getRadius(objmesh.cloud, cam_focal_point); + Point3d cam_y_dir(0.0f,0.0f,1.0f); + const char* headerPath = "./header_for_"; + const char* binaryPath = "./binary_"; + ViewSphere.createHeader((int)campos.size(), 64, 64, headerPath); + for(int pose = 0; pose < (int)campos.size(); pose++){ + char* temp = new char; + sprintf (temp,"%d",label_class); + string filename = temp; + filename += "_"; + sprintf (temp,"%d",pose); + filename += temp; + filename += ".png"; + imglabel << filename << ' ' << (int)(campos.at(pose).x*100) << ' ' << (int)(campos.at(pose).y*100) << ' ' << (int)(campos.at(pose).z*100) << endl; + filename = imagedir + filename; + /// We can get the pose of the cam using makeCameraPoses + Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point); + /// We can get the transformation matrix from camera coordinate system to global using + /// - makeTransformToGlobal. We need the axes of the camera + Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose)); + viz::WMesh mesh_widget(objmesh); + /// Pose of the widget in camera frame + Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)); + /// Pose of the widget in global frame + Affine3f cloud_pose_global = transform * cloud_pose; + /// Visualize camera frame + if (!camera_pov) + { + viz::WCameraPosition cpw(1); // Coordinate axes + viz::WCameraPosition cpw_frustum(Vec2f(0.5, 0.5)); // Camera frustum + myWindow.showWidget("CPW", cpw, cam_pose); + myWindow.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose); + } - /// Visualize widget - mesh_widget.setRenderingProperty(viz::LINE_WIDTH, 4.0); - myWindow.showWidget("ape", mesh_widget, cloud_pose_global); + /// Visualize widget + mesh_widget.setRenderingProperty(viz::LINE_WIDTH, 4.0); + myWindow.showWidget("ape", mesh_widget, cloud_pose_global); - /// Set the viewer pose to that of camera - if (camera_pov) - myWindow.setViewerPose(cam_pose); - myWindow.saveScreenshot(filename); - ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100)); - } - imglabel.close(); - return 1; + /// Set the viewer pose to that of camera + if (camera_pov) + myWindow.setViewerPose(cam_pose); + myWindow.saveScreenshot(filename); + ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100)); + } + imglabel.close(); + return 1; }; diff --git a/modules/cnn_3dobj/src/cnn_feature.cpp b/modules/cnn_3dobj/src/cnn_feature.cpp index cc365ce7d..b0879f0a7 100644 --- a/modules/cnn_3dobj/src/cnn_feature.cpp +++ b/modules/cnn_3dobj/src/cnn_feature.cpp @@ -6,8 +6,8 @@ namespace cv { namespace cnn_3dobj { - DescriptorExtractor::DescriptorExtractor(){}; - void DescriptorExtractor::list_dir(const char *path,vector& files,bool r) + descriptorExtractor::descriptorExtractor(){}; + void descriptorExtractor::listDir(const char *path,vector& files,bool r) { DIR *pDir; struct dirent *ent; @@ -25,7 +25,7 @@ namespace cnn_3dobj if(r) { sprintf(childpath, "%s/%s", path, ent->d_name); - DescriptorExtractor::list_dir(childpath,files,false); + descriptorExtractor::listDir(childpath,files,false); } } else @@ -36,7 +36,7 @@ namespace cnn_3dobj sort(files.begin(),files.end()); }; - bool DescriptorExtractor::SetNet(const string& cpu_only, int device_id) + bool descriptorExtractor::setNet(const string& cpu_only, int device_id) { if (strcmp(cpu_only.c_str(), "CPU") == 0 || strcmp(cpu_only.c_str(), "GPU") == 0) { @@ -59,9 +59,9 @@ namespace cnn_3dobj } }; - bool DescriptorExtractor::LoadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file) + int descriptorExtractor::loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file) { - bool net_ready = false; + int net_ready = 0; if (netsetter) { /* Load the network. */ @@ -77,58 +77,50 @@ namespace cnn_3dobj std::cout << "Input layer should have 1 or 3 channels." << std::endl; input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); /* Load the binaryproto mean file. */ - SetMean(mean_file); - net_ready = true; - return net_ready; + setMean(mean_file); + net_ready = 2; } else { std::cout << "Error: Device must be set in advance using SetNet function" << std::endl; - return net_ready; } + return net_ready; }; - void DescriptorExtractor::GetLabellist(const std::vector& name_gallery) + int descriptorExtractor::loadNet(bool netsetter, const string& model_file, const string& trained_file) { - for (unsigned int i = 0; i < name_gallery.size(); ++i) - labels_.push_back(name_gallery[i]); - }; - - /* Return the indices of the top N values of vector v. */ - std::vector DescriptorExtractor::Argmax(const std::vector& v, int N) - { - std::vector > pairs; - for (size_t i = 0; i < v.size(); ++i) - pairs.push_back(std::make_pair(v[i], i)); - std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end()); - std::vector result; - for (int i = 0; i < N; ++i) - result.push_back(pairs[i].second); - return result; - }; - - std::vector > DescriptorExtractor::Classify(const cv::Mat& reference, const cv::Mat& target, int N) - { - std::vector output; - for (int i = 0; i < reference.rows; i++) + int net_ready = 0; + if (netsetter) { - cv::Mat f1 = reference.row(i); - cv::Mat f2 = target; - cv::Mat output_temp = f1-f2; - output.push_back(cv::norm(output_temp)); + /* Load the network. */ + net_ = new Net(model_file, TEST); + net_->CopyTrainedLayersFrom(trained_file); + if (net_->num_inputs() != 1) + std::cout << "Network should have exactly one input." << std::endl; + if (net_->num_outputs() != 1) + std::cout << "Network should have exactly one output." << std::endl; + Blob* input_layer = net_->input_blobs()[0]; + num_channels_ = input_layer->channels(); + if (num_channels_ != 3 && num_channels_ != 1) + std::cout << "Input layer should have 1 or 3 channels." << std::endl; + input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); + net_ready = 1; } - std::vector maxN = Argmax(output, N); - std::vector > predictions; - for (int i = 0; i < N; ++i) + else { - int idx = maxN[i]; - predictions.push_back(std::make_pair(labels_[idx], output[idx])); + std::cout << "Error: Device must be set in advance using SetNet function" << std::endl; } - return predictions; + return net_ready; + }; + + void descriptorExtractor::getLabellist(const std::vector& name_gallery) + { + for (unsigned int i = 0; i < name_gallery.size(); ++i) + labels_.push_back(name_gallery[i]); }; /* Load the mean file in binaryproto format. */ - void DescriptorExtractor::SetMean(const string& mean_file) + void descriptorExtractor::setMean(const string& mean_file) { BlobProto blob_proto; ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); @@ -156,7 +148,7 @@ namespace cnn_3dobj mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean); }; - void DescriptorExtractor::Extract(bool net_ready, InputArray inputimg, OutputArray feature, bool mean_subtract, std::string featrue_blob) + void descriptorExtractor::extract(int net_ready, InputArray inputimg, OutputArray feature, std::string featrue_blob) { if (net_ready) { @@ -166,11 +158,11 @@ namespace cnn_3dobj /* Forward dimension change to all layers. */ net_->Reshape(); std::vector input_channels; - WrapInputLayer(&input_channels); + wrapInputLayer(&input_channels); if (inputimg.kind() == 65536) {/* this is a Mat */ Mat img = inputimg.getMat(); - Preprocess(img, &input_channels, mean_subtract); + preprocess(img, &input_channels, net_ready); net_->ForwardPrefilled(); /* Copy the output layer to a std::vector */ Blob* output_layer = net_->blob_by_name(featrue_blob).get(); @@ -187,7 +179,7 @@ namespace cnn_3dobj Mat feature_vector; for (unsigned int i = 0; i < img.size(); ++i) { - Preprocess(img[i], &input_channels, mean_subtract); + preprocess(img[i], &input_channels, net_ready); net_->ForwardPrefilled(); /* Copy the output layer to a std::vector */ Blob* output_layer = net_->blob_by_name(featrue_blob).get(); @@ -214,7 +206,7 @@ namespace cnn_3dobj * don't need to rely on cudaMemcpy2D. The last preprocessing * operation will write the separate channels directly to the input * layer. */ - void DescriptorExtractor::WrapInputLayer(std::vector* input_channels) + void descriptorExtractor::wrapInputLayer(std::vector* input_channels) { Blob* input_layer = net_->input_blobs()[0]; int width = input_layer->width(); @@ -228,8 +220,8 @@ namespace cnn_3dobj } }; - void DescriptorExtractor::Preprocess(const cv::Mat& img, -std::vector* input_channels, bool mean_subtract) + void descriptorExtractor::preprocess(const cv::Mat& img, +std::vector* input_channels, int net_ready) { /* Convert the input image to the input image format of the network. */ cv::Mat sample; @@ -254,7 +246,7 @@ std::vector* input_channels, bool mean_subtract) else sample_resized.convertTo(sample_float, CV_32FC1); cv::Mat sample_normalized; - if (mean_subtract) + if (net_ready == 2) cv::subtract(sample_float, mean_, sample_normalized); else sample_normalized = sample_float; diff --git a/modules/cnn_3dobj/src/cnn_sphereview.cpp b/modules/cnn_3dobj/src/cnn_sphereview.cpp index eb80d808d..29d44827e 100644 --- a/modules/cnn_3dobj/src/cnn_sphereview.cpp +++ b/modules/cnn_3dobj/src/cnn_sphereview.cpp @@ -6,7 +6,7 @@ namespace cv { namespace cnn_3dobj { - IcoSphere::IcoSphere(float radius_in, int depth_in) + icoSphere::icoSphere(float radius_in, int depth_in) { X = 0.5f; @@ -35,7 +35,7 @@ namespace cnn_3dobj { for (int k = 0; k* temp = new std::vector; @@ -77,7 +77,7 @@ namespace cnn_3dobj CameraPos.push_back(temp_Campos); }; - void IcoSphere::subdivide(float v1[], float v2[], float v3[], int depth) + void icoSphere::subdivide(float v1[], float v2[], float v3[], int depth) { norm(v1); norm(v2); @@ -107,13 +107,13 @@ namespace cnn_3dobj subdivide(v12, v23, v31, depth - 1); }; - uint32_t IcoSphere::swap_endian(uint32_t val) + uint32_t icoSphere::swapEndian(uint32_t val) { val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); return (val << 16) | (val >> 16); }; - cv::Point3d IcoSphere::getCenter(cv::Mat cloud) + cv::Point3d icoSphere::getCenter(cv::Mat cloud) { Point3f* data = cloud.ptr(); Point3d dataout; @@ -129,7 +129,7 @@ namespace cnn_3dobj return dataout; }; - float IcoSphere::getRadius(cv::Mat cloud, cv::Point3d center) + float icoSphere::getRadius(cv::Mat cloud, cv::Point3d center) { float radiusCam = 0; Point3f* data = cloud.ptr(); @@ -149,7 +149,7 @@ namespace cnn_3dobj return radiusCam; }; - void IcoSphere::createHeader(int num_item, int rows, int cols, const char* headerPath) + void icoSphere::createHeader(int num_item, int rows, int cols, const char* headerPath) { char* a0 = (char*)malloc(1024); strcpy(a0, headerPath); @@ -165,17 +165,17 @@ namespace cnn_3dobj std::ofstream headerLabel(headerPathlab, ios::out|ios::binary); int headerimg[4] = {2051,num_item,rows,cols}; for (int i=0; i<4; i++) - headerimg[i] = swap_endian(headerimg[i]); + headerimg[i] = swapEndian(headerimg[i]); int headerlabel[2] = {2050,num_item}; for (int i=0; i<2; i++) - headerlabel[i] = swap_endian(headerlabel[i]); + headerlabel[i] = swapEndian(headerlabel[i]); headerImg.write(reinterpret_cast(headerimg), sizeof(int)*4); headerImg.close(); headerLabel.write(reinterpret_cast(headerlabel), sizeof(int)*2); headerLabel.close(); }; - void IcoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z) + void icoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z) { int isrgb = 0; cv::Mat ImgforBin = cv::imread(filenameImg, isrgb);