elixir-webrtc · gBillal · Mar 8, 2025 · Mar 30, 2025 · Mar 30, 2025 · Mar 30, 2025
diff --git a/Makefile b/Makefile
@@ -16,8 +16,8 @@ XAV_VIDEO_CONVERTER_SO = $(PRIV_DIR)/libxavvideoconverter.so
 DECODER_HEADERS = $(XAV_DIR)/xav_decoder.h $(XAV_DIR)/decoder.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
 DECODER_SOURCES = $(XAV_DIR)/xav_decoder.c $(XAV_DIR)/decoder.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c
 
-ENCODER_HEADERS = $(XAV_DIR)/xav_encoder.h $(XAV_DIR)/encoder.h $(XAV_DIR)/utils.h
-ENCODER_SOURCES = $(XAV_DIR)/xav_encoder.c $(XAV_DIR)/encoder.c $(XAV_DIR)/utils.c
+ENCODER_HEADERS = $(XAV_DIR)/xav_encoder.h $(XAV_DIR)/encoder.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
+ENCODER_SOURCES = $(XAV_DIR)/xav_encoder.c $(XAV_DIR)/encoder.c $(XAV_DIR)/utils.c $(XAV_DIR)/channel_layout.c
 
 READER_HEADERS = $(XAV_DIR)/xav_reader.h $(XAV_DIR)/reader.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h
 READER_SOURCES = $(XAV_DIR)/xav_reader.c $(XAV_DIR)/reader.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c

diff --git a/c_src/xav/channel_layout.c b/c_src/xav/channel_layout.c
@@ -0,0 +1,34 @@
+#include "channel_layout.h"
+
+int xav_get_channel_layout(const char *name, struct ChannelLayout *layout) {
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  if (av_channel_layout_from_string(&layout->layout, name) < 0) {
+    return 0;
+  }
+#else
+  layout->layout = av_get_channel_layout(name);
+  if (layout->layout == 0) {
+    return 0;
+  }
+#endif
+
+  return 1;
+}
+
+int xav_set_channel_layout(AVCodecContext *ctx, struct ChannelLayout *layout) {
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  return av_channel_layout_copy(&ctx->ch_layout, &layout->layout);
+#else
+  ctx->channel_layout = layout->layout;
+  return 0;
+#endif
+}
+
+int xav_set_frame_channel_layout(AVFrame *frame, struct ChannelLayout *layout) {
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  return av_channel_layout_copy(&frame->ch_layout, &layout->layout);
+#else
+  frame->channel_layout = layout->layout;
+  return 0;
+#endif
+}
diff --git a/c_src/xav/channel_layout.h b/c_src/xav/channel_layout.h
@@ -1,6 +1,7 @@
 #ifndef CHANNEL_LAYOUT_H
 #define CHANNEL_LAYOUT_H
 #include <libavutil/channel_layout.h>
+#include <libavcodec/avcodec.h>
 
 struct ChannelLayout {
 #if LIBAVUTIL_VERSION_MAJOR >= 58
@@ -9,4 +10,8 @@ struct ChannelLayout {
   uint64_t layout;
 #endif
 };
-#endif
+
+int xav_get_channel_layout(const char *name, struct ChannelLayout *layout);
+int xav_set_channel_layout(AVCodecContext *ctx, struct ChannelLayout *layout);
+int xav_set_frame_channel_layout(AVFrame *frame, struct ChannelLayout *layout);
+#endif
diff --git a/c_src/xav/decoder.c b/c_src/xav/decoder.c
@@ -13,7 +13,7 @@ struct Decoder *decoder_alloc() {
   return decoder;
 }
 
-int decoder_init(struct Decoder *decoder, const AVCodec *codec) {
+int decoder_init(struct Decoder *decoder, const AVCodec *codec, int channels) {
   decoder->media_type = codec->type;
   decoder->codec = codec;
 
@@ -22,6 +22,10 @@ int decoder_init(struct Decoder *decoder, const AVCodec *codec) {
     return -1;
   }
 
+  if (codec->type == AVMEDIA_TYPE_AUDIO && channels != -1) {
+    decoder->c->channels = channels;
+  }
+
   decoder->frame = av_frame_alloc();
   if (!decoder->frame) {
     return -1;
@@ -32,11 +36,7 @@ int decoder_init(struct Decoder *decoder, const AVCodec *codec) {
     return -1;
   }
 
-  if (avcodec_open2(decoder->c, decoder->codec, NULL) < 0) {
-    return -1;
-  }
-
-  return 0;
+  return avcodec_open2(decoder->c, decoder->codec, NULL);
 }
 
 int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) {

diff --git a/c_src/xav/decoder.h b/c_src/xav/decoder.h
@@ -16,7 +16,7 @@ struct Decoder {
 
 struct Decoder *decoder_alloc();
 
-int decoder_init(struct Decoder *decoder, const AVCodec *codec);
+int decoder_init(struct Decoder *decoder, const AVCodec *codec, int channels);
 
 int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame);
 

diff --git a/c_src/xav/encoder.c b/c_src/xav/encoder.c
@@ -23,21 +23,27 @@ int encoder_init(struct Encoder *encoder, struct EncoderConfig *config) {
     return -1;
   }
 
-  encoder->c->width = config->width;
-  encoder->c->height = config->height;
-  encoder->c->pix_fmt = config->format;
-  encoder->c->time_base = config->time_base;
+  if (encoder->codec->type == AVMEDIA_TYPE_VIDEO) {
+    encoder->c->width = config->width;
+    encoder->c->height = config->height;
+    encoder->c->pix_fmt = config->format;
+    encoder->c->time_base = config->time_base;
 
-  if (config->profile != FF_PROFILE_UNKNOWN) {
-    encoder->c->profile = config->profile;
-  }
+    if (config->gop_size > 0) {
+      encoder->c->gop_size = config->gop_size;
+    }
 
-  if (config->gop_size > 0) {
-    encoder->c->gop_size = config->gop_size;
+    if (config->max_b_frames >= 0) {
+      encoder->c->max_b_frames = config->max_b_frames;
+    }
+  } else {
+    encoder->c->sample_fmt = config->sample_format;
+    encoder->c->sample_rate = config->sample_rate;
+    xav_set_channel_layout(encoder->c, &config->channel_layout);
   }
 
-  if (config->max_b_frames >= 0) {
-    encoder->c->max_b_frames = config->max_b_frames;
+  if (config->profile != FF_PROFILE_UNKNOWN) {
+    encoder->c->profile = config->profile;
   }
 
   AVDictionary *opts = NULL;

diff --git a/c_src/xav/encoder.h b/c_src/xav/encoder.h
@@ -1,3 +1,4 @@
+#include "channel_layout.h"
 #include "utils.h"
 #include <libavcodec/avcodec.h>
 
@@ -15,10 +16,13 @@ struct EncoderConfig {
   int width;
   int height;
   enum AVPixelFormat format;
+  enum AVSampleFormat sample_format;
   AVRational time_base;
   int gop_size;
   int max_b_frames;
   int profile;
+  int sample_rate;
+  struct ChannelLayout channel_layout;
 };
 
 struct Encoder *encoder_alloc();

diff --git a/c_src/xav/utils.c b/c_src/xav/utils.c
@@ -93,3 +93,11 @@ ERL_NIF_TERM xav_nif_packet_to_term(ErlNifEnv *env, AVPacket *packet) {
       enif_make_atom(env, packet->flags & AV_PKT_FLAG_KEY ? "true" : "false");
   return enif_make_tuple(env, 4, data_term, dts, pts, is_keyframe);
 }
+
+int xav_get_nb_channels(const AVFrame *frame) {
+  #if LIBAVUTIL_VERSION_MAJOR >= 58
+    return frame->ch_layout.nb_channels;
+  #else
+    return frame->channels;
+  #endif
+  }
diff --git a/c_src/xav/utils.h b/c_src/xav/utils.h
@@ -26,3 +26,4 @@ ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame);
 ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
                                          int out_size, enum AVSampleFormat out_format, int pts);
 ERL_NIF_TERM xav_nif_packet_to_term(ErlNifEnv *env, AVPacket *packet);
+int xav_get_nb_channels(const AVFrame *frame);
diff --git a/c_src/xav/xav_decoder.c b/c_src/xav/xav_decoder.c
@@ -13,13 +13,14 @@ void free_frames(AVFrame **frames, int size) {
 }
 
 ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-  if (argc != 6) {
+  if (argc != 7) {
     return xav_nif_raise(env, "invalid_arg_count");
   }
 
   ERL_NIF_TERM ret;
   char *codec_name = NULL;
   char *out_format = NULL;
+  int channels;
 
   // resolve codec
   if (!xav_nif_get_atom(env, argv[0], &codec_name)) {
@@ -37,8 +38,13 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     goto clean;
   }
 
+  if (!enif_get_int(env, argv[1], &channels)) {
+    ret = xav_nif_raise(env, "failed_to_get_int");
+    goto clean;
+  }
+
   // resolve output format
-  if (!xav_nif_get_atom(env, argv[1], &out_format)) {
+  if (!xav_nif_get_atom(env, argv[2], &out_format)) {
     ret = xav_nif_raise(env, "failed_to_get_atom");
     goto clean;
   }
@@ -61,25 +67,25 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
 
   // resolve other params
   int out_sample_rate;
-  if (!enif_get_int(env, argv[2], &out_sample_rate)) {
+  if (!enif_get_int(env, argv[3], &out_sample_rate)) {
     ret = xav_nif_raise(env, "invalid_out_sample_rate");
     goto clean;
   }
 
   int out_channels;
-  if (!enif_get_int(env, argv[3], &out_channels)) {
+  if (!enif_get_int(env, argv[4], &out_channels)) {
     ret = xav_nif_raise(env, "invalid_out_channels");
     goto clean;
   }
 
   int out_width;
-  if (!enif_get_int(env, argv[4], &out_width)) {
+  if (!enif_get_int(env, argv[5], &out_width)) {
     ret = xav_nif_raise(env, "failed_to_get_int");
     goto clean;
   }
 
   int out_height;
-  if (!enif_get_int(env, argv[5], &out_height)) {
+  if (!enif_get_int(env, argv[6], &out_height)) {
     ret = xav_nif_raise(env, "failed_to_get_int");
     goto clean;
   }
@@ -102,7 +108,7 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     goto clean;
   }
 
-  if (decoder_init(xav_decoder->decoder, codec) != 0) {
+  if (decoder_init(xav_decoder->decoder, codec, channels)) {
     ret = xav_nif_raise(env, "failed_to_init_decoder");
     goto clean;
   }
@@ -393,7 +399,7 @@ void free_xav_decoder(ErlNifEnv *env, void *obj) {
   }
 }
 
-static ErlNifFunc xav_funcs[] = {{"new", 6, new},
+static ErlNifFunc xav_funcs[] = {{"new", 7, new},
                                  {"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND},
                                  {"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND},
                                  {"pixel_formats", 0, pixel_formats},