差分
このページの2つのバージョン間の差分を表示します。
次のリビジョン | 前のリビジョン 次のリビジョン両方とも次のリビジョン | ||
opencv_dnn:環境構築:dnn_with_cuda [2020/02/12 07:56] – 作成 baba | opencv_dnn:環境構築:dnn_with_cuda [2020/02/13 17:26] – [object_detection.cppの修正] baba | ||
---|---|---|---|
行 1: | 行 1: | ||
====== OpenCV DNN with CUDA ====== | ====== OpenCV DNN with CUDA ====== | ||
- | このページでは,OpenCVのdnnモジュールをcudaでinferenceさせるための環境構築に関してまとめます. | + | このページでは,OpenCVのdnnモジュールをcudaでinferenceさせるための環境構築に関してまとめます.もともとの動機は |
+ | * OpenCVのdnn inferenceをもっと早くしたい | ||
+ | なわけです.もちろんintelのinference engine ( https:// | ||
+ | |||
+ | 参考にした記事は以下となります.日本語でこのあたりをubuntu環境でやってる人がいなかったのでここに記しておくことにしました. | ||
+ | * https:// | ||
+ | * https:// | ||
+ | * https:// | ||
+ | * dnn with cudaを実装した人であるYashasSamagaさんがissueで色々質問に答えています. | ||
+ | |||
+ | 基本的には上記のリンクに書いてある通りにすればコンパイルできるんでないかなと思います. | ||
+ | |||
+ | ===== 環境構築 ===== | ||
+ | * Ubuntu 16.04 | ||
+ | * CUDA Toolkit 10.2 | ||
+ | * cuDNN 7.6.4 | ||
+ | |||
+ | cmakeでconfigureする際に,導入しているバージョンが複数ある場合は,適切なバージョンに変更するなどのマニュアル作業が生じます.例えばcuda10.2でやり | ||
+ | |||
+ | ==== object_detection.cppの修正 ==== | ||
+ | デフォルトのobject_detection.cppが若干古いのと扱いづらいところがあるので,以下のものと入れ替えてください. | ||
+ | 参照パスは opencv/ | ||
+ | |||
+ | <file .cpp object_detection.cpp> | ||
+ | #include < | ||
+ | #include < | ||
+ | |||
+ | #include < | ||
+ | #include < | ||
+ | #include < | ||
+ | |||
+ | #ifdef CV_CXX11 | ||
+ | #include < | ||
+ | #include < | ||
+ | #include < | ||
+ | #endif | ||
+ | |||
+ | #include " | ||
+ | |||
+ | std::string keys = | ||
+ | "{ help h | | Print help message. }" | ||
+ | "{ @alias | ||
+ | "{ zoo | models.yml | An optional path to file with preprocessing parameters }" | ||
+ | "{ device | ||
+ | "{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera. }" | ||
+ | "{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" | ||
+ | "{ classes | ||
+ | "{ thr | .5 | Confidence threshold. }" | ||
+ | "{ nms | .4 | Non-maximum suppression threshold. }" | ||
+ | "{ backend | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | "{ target | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | "{ async | 0 | Number of asynchronous forwards at the same time. " | ||
+ | " | ||
+ | |||
+ | using namespace cv; | ||
+ | using namespace dnn; | ||
+ | |||
+ | float confThreshold, | ||
+ | std:: | ||
+ | |||
+ | inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale, | ||
+ | const Scalar& mean, bool swapRB); | ||
+ | |||
+ | void postprocess(Mat& | ||
+ | |||
+ | void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); | ||
+ | |||
+ | void callback(int pos, void* userdata); | ||
+ | |||
+ | #ifdef CV_CXX11 | ||
+ | template < | ||
+ | class QueueFPS : public std:: | ||
+ | { | ||
+ | public: | ||
+ | QueueFPS() : counter(0) {} | ||
+ | |||
+ | void push(const T& entry) | ||
+ | { | ||
+ | std:: | ||
+ | |||
+ | std:: | ||
+ | counter += 1; | ||
+ | if (counter == 1) | ||
+ | { | ||
+ | // Start counting from a second frame (warmup). | ||
+ | tm.reset(); | ||
+ | tm.start(); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | T get() | ||
+ | { | ||
+ | std:: | ||
+ | T entry = this-> | ||
+ | this-> | ||
+ | return entry; | ||
+ | } | ||
+ | |||
+ | float getFPS() | ||
+ | { | ||
+ | tm.stop(); | ||
+ | double fps = counter / tm.getTimeSec(); | ||
+ | tm.start(); | ||
+ | return static_cast< | ||
+ | } | ||
+ | |||
+ | void clear() | ||
+ | { | ||
+ | std:: | ||
+ | while (!this-> | ||
+ | this-> | ||
+ | } | ||
+ | |||
+ | unsigned int counter; | ||
+ | |||
+ | private: | ||
+ | TickMeter tm; | ||
+ | std::mutex mutex; | ||
+ | }; | ||
+ | # | ||
+ | |||
+ | int main(int argc, char** argv) | ||
+ | { | ||
+ | CommandLineParser parser(argc, | ||
+ | |||
+ | const std::string modelName = parser.get< | ||
+ | const std::string zooFile = parser.get< | ||
+ | |||
+ | keys += genPreprocArguments(modelName, | ||
+ | |||
+ | parser = CommandLineParser(argc, | ||
+ | parser.about(" | ||
+ | if (argc == 1 || parser.has(" | ||
+ | { | ||
+ | parser.printMessage(); | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | confThreshold = parser.get< | ||
+ | nmsThreshold = parser.get< | ||
+ | float scale = parser.get< | ||
+ | Scalar mean = parser.get< | ||
+ | bool swapRB = parser.get< | ||
+ | int inpWidth = parser.get< | ||
+ | int inpHeight = parser.get< | ||
+ | size_t asyncNumReq = parser.get< | ||
+ | CV_Assert(parser.has(" | ||
+ | std::string modelPath = findFile(parser.get< | ||
+ | std::string configPath = findFile(parser.get< | ||
+ | |||
+ | // Open file with classes names. | ||
+ | if (parser.has(" | ||
+ | { | ||
+ | std::string file = parser.get< | ||
+ | std:: | ||
+ | if (!ifs.is_open()) | ||
+ | CV_Error(Error:: | ||
+ | std::string line; | ||
+ | while (std:: | ||
+ | { | ||
+ | classes.push_back(line); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | // Load a model. | ||
+ | Net net = readNet(modelPath, | ||
+ | net.setPreferableBackend(parser.get< | ||
+ | net.setPreferableTarget(parser.get< | ||
+ | std:: | ||
+ | |||
+ | // Create a window | ||
+ | static const std::string kWinName = "Deep learning object detection in OpenCV"; | ||
+ | namedWindow(kWinName, | ||
+ | int initialConf = (int)(confThreshold * 100); | ||
+ | createTrackbar(" | ||
+ | |||
+ | // Open a video file or an image file or a camera stream. | ||
+ | VideoCapture cap; | ||
+ | if (parser.has(" | ||
+ | cap.open(parser.get< | ||
+ | else | ||
+ | cap.open(parser.get< | ||
+ | |||
+ | #ifdef CV_CXX11 | ||
+ | bool process = true; | ||
+ | |||
+ | // Frames capturing thread | ||
+ | QueueFPS< | ||
+ | std::thread framesThread([& | ||
+ | Mat frame; | ||
+ | while (process) | ||
+ | { | ||
+ | cap >> frame; | ||
+ | if (!frame.empty()) | ||
+ | framesQueue.push(frame.clone()); | ||
+ | else | ||
+ | break; | ||
+ | } | ||
+ | }); | ||
+ | |||
+ | // Frames processing thread | ||
+ | QueueFPS< | ||
+ | QueueFPS< | ||
+ | std::thread processingThread([& | ||
+ | std:: | ||
+ | Mat blob; | ||
+ | while (process) | ||
+ | { | ||
+ | // Get a next frame | ||
+ | Mat frame; | ||
+ | { | ||
+ | if (!framesQueue.empty()) | ||
+ | { | ||
+ | frame = framesQueue.get(); | ||
+ | if (asyncNumReq) | ||
+ | { | ||
+ | if (futureOutputs.size() == asyncNumReq) | ||
+ | frame = Mat(); | ||
+ | } | ||
+ | else | ||
+ | framesQueue.clear(); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | // Process the frame | ||
+ | if (!frame.empty()) | ||
+ | { | ||
+ | preprocess(frame, | ||
+ | processedFramesQueue.push(frame); | ||
+ | |||
+ | if (asyncNumReq) | ||
+ | { | ||
+ | futureOutputs.push(net.forwardAsync()); | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | std:: | ||
+ | net.forward(outs, | ||
+ | predictionsQueue.push(outs); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | while (!futureOutputs.empty() && | ||
+ | | ||
+ | { | ||
+ | AsyncArray async_out = futureOutputs.front(); | ||
+ | futureOutputs.pop(); | ||
+ | Mat out; | ||
+ | async_out.get(out); | ||
+ | predictionsQueue.push({out}); | ||
+ | } | ||
+ | } | ||
+ | }); | ||
+ | |||
+ | // Postprocessing and rendering loop | ||
+ | while (waitKey(1) < 0) | ||
+ | { | ||
+ | if (predictionsQueue.empty()) | ||
+ | continue; | ||
+ | |||
+ | std:: | ||
+ | Mat frame = processedFramesQueue.get(); | ||
+ | |||
+ | postprocess(frame, | ||
+ | |||
+ | //if (predictionsQueue.counter > 1) | ||
+ | //{ | ||
+ | std::string label = format(" | ||
+ | putText(frame, | ||
+ | |||
+ | label = format(" | ||
+ | putText(frame, | ||
+ | |||
+ | label = format(" | ||
+ | putText(frame, | ||
+ | |||
+ | std:: | ||
+ | double freq = getTickFrequency() / 1000; | ||
+ | double t = net.getPerfProfile(layersTimes) / freq; | ||
+ | label = format(" | ||
+ | putText(frame, | ||
+ | //} | ||
+ | imshow(kWinName, | ||
+ | } | ||
+ | |||
+ | process = false; | ||
+ | framesThread.join(); | ||
+ | processingThread.join(); | ||
+ | |||
+ | #else // CV_CXX11 | ||
+ | if (asyncNumReq) | ||
+ | CV_Error(Error:: | ||
+ | |||
+ | // Process frames. | ||
+ | Mat frame, blob; | ||
+ | while (waitKey(1) < 0) | ||
+ | { | ||
+ | cap >> frame; | ||
+ | if (frame.empty()) | ||
+ | { | ||
+ | waitKey(); | ||
+ | break; | ||
+ | } | ||
+ | |||
+ | preprocess(frame, | ||
+ | |||
+ | std:: | ||
+ | net.forward(outs, | ||
+ | |||
+ | postprocess(frame, | ||
+ | |||
+ | // Put efficiency information. | ||
+ | std:: | ||
+ | double freq = getTickFrequency() / 1000; | ||
+ | double t = net.getPerfProfile(layersTimes) / freq; | ||
+ | std::string label = format(" | ||
+ | putText(frame, | ||
+ | |||
+ | imshow(kWinName, | ||
+ | } | ||
+ | # | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale, | ||
+ | const Scalar& mean, bool swapRB) | ||
+ | { | ||
+ | static Mat blob; | ||
+ | // Create a 4D blob from a frame. | ||
+ | if (inpSize.width <= 0) inpSize.width = frame.cols; | ||
+ | if (inpSize.height <= 0) inpSize.height = frame.rows; | ||
+ | blobFromImage(frame, | ||
+ | |||
+ | // Run a model. | ||
+ | net.setInput(blob, | ||
+ | if (net.getLayer(0)-> | ||
+ | { | ||
+ | resize(frame, | ||
+ | Mat imInfo = (Mat_< | ||
+ | net.setInput(imInfo, | ||
+ | } | ||
+ | } | ||
+ | |||
+ | void postprocess(Mat& | ||
+ | { | ||
+ | static std:: | ||
+ | static std::string outLayerType = net.getLayer(outLayers[0])-> | ||
+ | |||
+ | std:: | ||
+ | std:: | ||
+ | std:: | ||
+ | boxes.clear(); | ||
+ | if (net.getLayer(0)-> | ||
+ | { | ||
+ | /* | ||
+ | // Network produces output blob with a shape 1x1xNx7 where N is a number of | ||
+ | // detections and an every detection is a vector of values | ||
+ | // [batchId, classId, confidence, left, top, right, bottom] | ||
+ | CV_Assert(outs.size() == 1); | ||
+ | float* data = (float*)outs[0].data; | ||
+ | for (size_t i = 0; i < outs[0].total(); | ||
+ | { | ||
+ | float confidence = data[i + 2]; | ||
+ | if (confidence > confidenceThreshold) | ||
+ | { | ||
+ | int left = (int)data[i + 3]; | ||
+ | int top = (int)data[i + 4]; | ||
+ | int right = (int)data[i + 5]; | ||
+ | int bottom = (int)data[i + 6]; | ||
+ | int width = right - left + 1; | ||
+ | int height = bottom - top + 1; | ||
+ | classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. | ||
+ | // boxes.push_back(Rect(left, | ||
+ | confidences.push_back(confidence); | ||
+ | } | ||
+ | } | ||
+ | */ | ||
+ | } | ||
+ | else if (outLayerType == " | ||
+ | { | ||
+ | // Network produces output blob with a shape 1x1xNx7 where N is a number of | ||
+ | // detections and an every detection is a vector of values | ||
+ | // [batchId, classId, confidence, left, top, right, bottom] | ||
+ | CV_Assert(outs.size() > 0); | ||
+ | for (size_t k = 0; k < outs.size(); | ||
+ | { | ||
+ | float* data = (float*)outs[k].data; | ||
+ | for (size_t i = 0; i < outs[k].total(); | ||
+ | { | ||
+ | float confidence = data[i + 2]; | ||
+ | if (confidence > confThreshold) | ||
+ | { | ||
+ | int left = (int)data[i + 3]; | ||
+ | int top = (int)data[i + 4]; | ||
+ | int right = (int)data[i + 5]; | ||
+ | int bottom = (int)data[i + 6]; | ||
+ | int width = right - left + 1; | ||
+ | int height = bottom - top + 1; | ||
+ | if (width <= 2 || height <= 2) | ||
+ | { | ||
+ | left = (int)(data[i + 3] * frame.cols); | ||
+ | top = (int)(data[i + 4] * frame.rows); | ||
+ | right = (int)(data[i + 5] * frame.cols); | ||
+ | bottom = (int)(data[i + 6] * frame.rows); | ||
+ | width = right - left + 1; | ||
+ | height = bottom - top + 1; | ||
+ | } | ||
+ | classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id. | ||
+ | boxes.push_back(Rect(left, | ||
+ | confidences.push_back(confidence); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | else if (outLayerType == " | ||
+ | { | ||
+ | for (size_t i = 0; i < outs.size(); | ||
+ | { | ||
+ | // Network produces output blob with a shape NxC where N is a number of | ||
+ | // detected objects and C is a number of classes + 4 where the first 4 | ||
+ | // numbers are [center_x, center_y, width, height] | ||
+ | float* data = (float*)outs[i].data; | ||
+ | for (int j = 0; j < outs[i].rows; | ||
+ | { | ||
+ | Mat scores = outs[i].row(j).colRange(5, | ||
+ | Point classIdPoint; | ||
+ | double confidence; | ||
+ | minMaxLoc(scores, | ||
+ | if (confidence > confThreshold) | ||
+ | { | ||
+ | int centerX = (int)(data[0] * frame.cols); | ||
+ | int centerY = (int)(data[1] * frame.rows); | ||
+ | int width = (int)(data[2] * frame.cols); | ||
+ | int height = (int)(data[3] * frame.rows); | ||
+ | int left = centerX - width / 2; | ||
+ | int top = centerY - height / 2; | ||
+ | |||
+ | classIds.push_back(classIdPoint.x); | ||
+ | confidences.push_back((float)confidence); | ||
+ | boxes.push_back(Rect(left, | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | CV_Error(Error:: | ||
+ | |||
+ | |||
+ | std:: | ||
+ | NMSBoxes(boxes, | ||
+ | for (size_t i = 0; i < indices.size(); | ||
+ | { | ||
+ | int idx = indices[i]; | ||
+ | Rect box = boxes[idx]; | ||
+ | drawPred(classIds[idx], | ||
+ | box.x + box.width, box.y + box.height, frame); | ||
+ | } | ||
+ | |||
+ | |||
+ | } | ||
+ | |||
+ | void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) | ||
+ | { | ||
+ | rectangle(frame, | ||
+ | |||
+ | std::string label = format(" | ||
+ | if (!classes.empty()) | ||
+ | { | ||
+ | CV_Assert(classId < (int)classes.size()); | ||
+ | label = classes[classId] + ": " + label; | ||
+ | } | ||
+ | |||
+ | int baseLine; | ||
+ | Size labelSize = getTextSize(label, | ||
+ | |||
+ | top = max(top, labelSize.height); | ||
+ | rectangle(frame, | ||
+ | Point(left + labelSize.width, | ||
+ | putText(frame, | ||
+ | } | ||
+ | |||
+ | void callback(int pos, void*) | ||
+ | { | ||
+ | confThreshold = pos * 0.01f; | ||
+ | } | ||
+ | |||
+ | </ | ||
+ | |||
+ | ===== 実行 ===== | ||
+ | backend, targetに関しては下記を参照する.cudaを利用する場合,opencv-4.2.0の場合は backend=5, target=6 を引数で渡すことを忘れないように. | ||
+ | * https:// | ||