Hello! As the title says I'm trying to interface gstreamer pipeline(receiver) to opencv for some video frame processing and need to get the processed frame to another gstreamer pipeline(sender). Receiver pipeline: rtspsrc ! rtph264depay ! h264parse ! nvh264dec ! glcolorconvert ! appsink Sender pipeline: appsrc ! nvh264enc ! rtph264pay (rtsp-server will provide a stream for rtsp clients) Basically: Receiver -> OpenCV -> Sender Because I need to deliver 16(16 pipelines in, 16 pipelines out) 4k H264/H265 streams simultaneously I need to do all the heavy lifting with a GPU (I have access to Nvidia rtx 5000). So far I have managed to set up appsink and appsrc elements to copy GL textures to cuda memory and back. However the performance is poor, only with 1 output stream(sender pipeline) it managed to perform adequately. Running with 10 receiver streams and no sender streams attached it will also perform quite nicely. So I guess problem comes from sender side. I have not managed find the reason what is causing the bottleneck. Any suggestions or help is welcomed. Here is my (simplified) code Receiver side(appsink): // Callback from 'new-sample' signal static GstFlowReturn newSample(GstElement* sink, gpointer /*user_data*/) { GstSample* sample = nullptr; g_signal_emit_by_name(sink, "pull-sample", &sample); GstBuffer* buffer = gst_sample_get_buffer(sample); GstMemory* memory = gst_buffer_get_memory(buffer, 0); PullGpuMatData data; data.glMemory = GST_GL_MEMORY_CAST(memory); data.handler = d; gst_gl_context_thread_add( data.glMemory->mem.context, (GstGLContextThreadFunc)pullGpuMat, &data ); gst_memory_unref(memory); gst_sample_unref(sample); return GST_FLOW_OK; } static void pullGpuMat(GstGLContext* context, PullGpuMatData* data) { const int width = gst_gl_memory_get_texture_width(data->glMemory); const int height = gst_gl_memory_get_texture_height(data->glMemory); const guint textureId = gst_gl_memory_get_texture_id(data->glMemory); // Wraps a GL texture for OpenCV cv::ogl::Texture2D texture = cv::ogl::Texture2D({ width, height }, cv::ogl::Texture2D::Format::RGBA, textureId, false); cv::ogl::Buffer glBuffer; texture.copyTo(glBuffer, CV_8U, true); cv::cuda::GpuMat tempMat = glBuffer.mapDevice(); tempMat.copyTo(gpuMat); glBuffer.unmapDevice(); // Got the result in gpuMat } Sender side(appsrc): // For every sender pipeline a GstGLContext is created bool init() { GstGLDisplay* glDisplay = gst_gl_display_new(); glContext = gst_gl_context_new(glDisplay); GError *error = NULL; bool ret = gst_gl_context_create(glContext, 0, &error); gst_object_unref(glDisplay); return ret; } void pushFrameFromGpuMat(cv::cuda::GpuMat mat) { PushFrameData data; data.mat = mat; gst_gl_context_activate(glContext, TRUE); gst_gl_context_thread_add( glContext, (GstGLContextThreadFunc)pushFrame, &data ); gst_gl_context_activate(glContext, FALSE); // cuda memory release mat.release(); } static void pushFrame(GstGLContext* context, PushFrameData* data) { // Copy from cuda memory to a GL texture cv::ogl::Buffer glBuffer; glBuffer.copyFrom(mat); cv::ogl::Texture2D* texture = new cv::ogl::Texture2D(); // using heap as I need to release the texture manually texture->copyFrom(glBuffer); // Wrap the texture into GstGLMemory GstVideoInfo vinfo; gst_video_info_set_format(&vinfo, GST_VIDEO_FORMAT_RGBA, mat.cols, mat.rows); GstAllocator* allocator = GST_ALLOCATOR(gst_gl_memory_allocator_get_default(context)); FreeTextureData* data = new FreeTextureData; data->context = context; data->texture = texture; GstGLVideoAllocationParams* params = gst_gl_video_allocation_params_new_wrapped_texture( context, NULL, &vinfo, 0, NULL, GST_GL_TEXTURE_TARGET_2D, GST_GL_RGBA, texture->texId(), data, (GDestroyNotify)glMemoryFree); GstGLMemory* glMemory = GST_GL_MEMORY_CAST(gst_gl_base_memory_alloc( GST_GL_BASE_MEMORY_ALLOCATOR_CAST(allocator), (GstGLAllocationParams*) params)); gst_gl_allocation_params_free((GstGLAllocationParams *)params); gst_object_unref(allocator); // Attach GstGLMemory object into buffer, timestamp the buffer and push it downstream GstBuffer* buffer = gst_buffer_new(); gst_buffer_append_memory(buffer, GST_MEMORY_CAST(glMemory)); GST_BUFFER_PTS(buffer) = timestamp; GST_BUFFER_DURATION(buffer) = gst_util_uint64_scale_int(1, GST_SECOND, framerate); timestamp += GST_BUFFER_DURATION(buffer); GstFlowReturn ret; g_signal_emit_by_name(elems.src, "push-buffer", buffer, &ret); gst_buffer_unref(buffer); } // To free the wrapped texture static void glMemoryFree(FreeTextureData* data) { gst_gl_context_thread_add( data->context, (GstGLContextThreadFunc)releaseTexture, data ); delete data->texture; delete data; } static void releaseTexture(GstGLContext* , FreeTextureData* data) { data->texture->release(); } Viljar Hera _______________________________________________ gstreamer-devel mailing list [hidden email] https://lists.freedesktop.org/mailman/listinfo/gstreamer-devel |
Free forum by Nabble | Edit this page |