Interfacing gstreamer with OpenCV using opengl and cuda

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

Interfacing gstreamer with OpenCV using opengl and cuda

meistrimees@gmail.com
Hello!

As the title says I'm trying to interface gstreamer pipeline(receiver) to opencv for some video frame processing and need to get the processed frame to another gstreamer pipeline(sender).
Receiver pipeline:
rtspsrc ! rtph264depay ! h264parse ! nvh264dec ! glcolorconvert ! appsink

Sender pipeline:
appsrc ! nvh264enc ! rtph264pay (rtsp-server will provide a stream for rtsp clients)

Basically: Receiver -> OpenCV -> Sender

Because I need to deliver 16(16 pipelines in, 16 pipelines out) 4k H264/H265 streams simultaneously I need to do all the heavy lifting with a GPU (I have access to Nvidia rtx 5000).
So far I have managed to set up appsink and appsrc elements to copy GL textures to cuda memory and back. However the performance is poor, only with 1 output stream(sender pipeline) it managed to perform adequately.
Running with 10 receiver streams and no sender streams attached it will also perform quite nicely. So I guess problem comes from sender side.

I have not managed find the reason what is causing the bottleneck. Any suggestions or help is welcomed.

Here is my (simplified) code

Receiver side(appsink):

// Callback from 'new-sample' signal
static GstFlowReturn newSample(GstElement* sink, gpointer /*user_data*/)
{
    GstSample* sample = nullptr;
    g_signal_emit_by_name(sink, "pull-sample", &sample);

    GstBuffer* buffer = gst_sample_get_buffer(sample);
    GstMemory* memory = gst_buffer_get_memory(buffer, 0);

    PullGpuMatData data;
    data.glMemory = GST_GL_MEMORY_CAST(memory);
    data.handler = d;

    gst_gl_context_thread_add(
        data.glMemory->mem.context,
        (GstGLContextThreadFunc)pullGpuMat,
        &data
    );

    gst_memory_unref(memory);
    gst_sample_unref(sample);
    return GST_FLOW_OK;
}

static void pullGpuMat(GstGLContext* context, PullGpuMatData* data)
{
    const int width = gst_gl_memory_get_texture_width(data->glMemory);
    const int height = gst_gl_memory_get_texture_height(data->glMemory);
    const guint textureId = gst_gl_memory_get_texture_id(data->glMemory);
   
    // Wraps a GL texture for OpenCV
    cv::ogl::Texture2D texture = cv::ogl::Texture2D({ width, height },
                                                    cv::ogl::Texture2D::Format::RGBA,
                                                    textureId, false);
    cv::ogl::Buffer glBuffer;
    texture.copyTo(glBuffer, CV_8U, true);
   
    cv::cuda::GpuMat tempMat = glBuffer.mapDevice();
    tempMat.copyTo(gpuMat);
    glBuffer.unmapDevice();
   
    // Got the result in gpuMat
}


Sender side(appsrc):

// For every sender pipeline a GstGLContext is created
bool init()
{
    GstGLDisplay* glDisplay = gst_gl_display_new();
    glContext = gst_gl_context_new(glDisplay);

    GError *error = NULL;
    bool ret = gst_gl_context_create(glContext, 0, &error);
    gst_object_unref(glDisplay);
    return ret;
}


void pushFrameFromGpuMat(cv::cuda::GpuMat mat)
{
    PushFrameData data;
    data.mat = mat;
   
    gst_gl_context_activate(glContext, TRUE);
    gst_gl_context_thread_add(
        glContext,
        (GstGLContextThreadFunc)pushFrame,
        &data
    );
    gst_gl_context_activate(glContext, FALSE);
   
    // cuda memory release
    mat.release();
}

static void pushFrame(GstGLContext* context, PushFrameData* data)
{    
    // Copy from cuda memory to a GL texture
   
    cv::ogl::Buffer glBuffer;
    glBuffer.copyFrom(mat);
    cv::ogl::Texture2D* texture = new cv::ogl::Texture2D(); // using heap as I need to release the texture manually
    texture->copyFrom(glBuffer);
   
   
    // Wrap the texture into GstGLMemory
   
    GstVideoInfo vinfo;
    gst_video_info_set_format(&vinfo, GST_VIDEO_FORMAT_RGBA, mat.cols, mat.rows);
   
    GstAllocator* allocator = GST_ALLOCATOR(gst_gl_memory_allocator_get_default(context));

    FreeTextureData* data = new FreeTextureData;
    data->context = context;
    data->texture = texture;
   
    GstGLVideoAllocationParams* params = gst_gl_video_allocation_params_new_wrapped_texture(
      context, NULL, &vinfo, 0, NULL, GST_GL_TEXTURE_TARGET_2D, GST_GL_RGBA, texture->texId(),
      data, (GDestroyNotify)glMemoryFree);
     
    GstGLMemory* glMemory = GST_GL_MEMORY_CAST(gst_gl_base_memory_alloc(
      GST_GL_BASE_MEMORY_ALLOCATOR_CAST(allocator), (GstGLAllocationParams*) params));
   
    gst_gl_allocation_params_free((GstGLAllocationParams *)params);
    gst_object_unref(allocator);
   
   
    // Attach GstGLMemory object into buffer, timestamp the buffer and push it downstream
   
    GstBuffer* buffer = gst_buffer_new();
    gst_buffer_append_memory(buffer, GST_MEMORY_CAST(glMemory));
   
    GST_BUFFER_PTS(buffer) = timestamp;
    GST_BUFFER_DURATION(buffer) = gst_util_uint64_scale_int(1, GST_SECOND, framerate);
    timestamp += GST_BUFFER_DURATION(buffer);
   
    GstFlowReturn ret;
    g_signal_emit_by_name(elems.src, "push-buffer", buffer, &ret);
   
    gst_buffer_unref(buffer);
}

// To free the wrapped texture
static void glMemoryFree(FreeTextureData* data)
{
    gst_gl_context_thread_add(
      data->context,
      (GstGLContextThreadFunc)releaseTexture,
      data
    );
   
    delete data->texture;
    delete data;
}

static void releaseTexture(GstGLContext* , FreeTextureData* data)
{
    data->texture->release();
}


Viljar Hera

_______________________________________________
gstreamer-devel mailing list
[hidden email]
https://lists.freedesktop.org/mailman/listinfo/gstreamer-devel