Compare commits

..

5 Commits

Author SHA1 Message Date
Romain Vimont
455a802a16 swr 2023-02-26 13:06:32 +01:00
Romain Vimont
4162dd89bc audio_player WIP 2023-02-25 21:21:17 +01:00
Romain Vimont
60ab03d85c Add two-step write feature to bytebuf
If there is exactly one producer, then it can assume that the remaining
space in the buffer will only increase until it write something.

This assumption may allow the producer to write to the buffer (up to a
known safe size) without any synchronization mechanism, thus allowing
to read and write different parts of the buffer in parallel.

The producer can then commit the write with lock held, and update its
knowledge of the safe empty remaining space.
2023-02-25 21:21:17 +01:00
Romain Vimont
3f6ebf5d86 Introduce bytebuf util
Add a ring-buffer for bytes. It will be useful for buffering audio.
2023-02-25 21:21:17 +01:00
Romain Vimont
34ab105d10 Pass AVCodecContext to frame sinks
Frame consumers may need details about the frame format.
2023-02-25 21:21:17 +01:00
5 changed files with 122 additions and 33 deletions

View File

@@ -101,6 +101,7 @@ if not crossbuild_windows
dependency('libavformat', version: '>= 57.33'),
dependency('libavcodec', version: '>= 57.37'),
dependency('libavutil'),
dependency('libswresample'),
dependency('sdl2', version: '>= 2.0.5'),
]

View File

@@ -1,10 +1,15 @@
#include "audio_player.h"
#include <libavutil/opt.h>
#include "util/log.h"
/** Downcast frame_sink to sc_v4l2_sink */
#define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink)
#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_S16
#define SC_SDL_SAMPLE_FMT AUDIO_S16
void
sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
struct sc_audio_player *ap = userdata;
@@ -28,20 +33,29 @@ sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
}
}
static SDL_AudioFormat
sc_audio_player_ffmpeg_to_sdl_format(enum AVSampleFormat format) {
switch (format) {
case AV_SAMPLE_FMT_S16:
return AUDIO_S16;
case AV_SAMPLE_FMT_S32:
return AUDIO_S32;
case AV_SAMPLE_FMT_FLT:
return AUDIO_F32;
default:
LOGE("Unsupported FFmpeg sample format: %s",
av_get_sample_fmt_name(format));
return 0;
static size_t
sc_audio_player_get_swr_buf_size(struct sc_audio_player *ap, size_t samples) {
assert(ap->nb_channels);
assert(ap->out_bytes_per_sample);
return samples * ap->nb_channels * ap->out_bytes_per_sample;
}
static uint8_t *
sc_audio_player_get_swr_buf(struct sc_audio_player *ap, size_t min_samples) {
size_t min_buf_size = sc_audio_player_get_swr_buf_size(ap, min_samples);
if (min_buf_size < ap->swr_buf_alloc_size) {
size_t new_size = min_buf_size + 4096;
uint8_t *buf = realloc(ap->swr_buf, new_size);
if (!buf) {
LOG_OOM();
// Could not realloc to the requested size
return NULL;
}
ap->swr_buf = buf;
ap->swr_buf_alloc_size = new_size;
}
return ap->swr_buf;
}
static bool
@@ -49,20 +63,45 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
const AVCodecContext *ctx) {
struct sc_audio_player *ap = DOWNCAST(sink);
SDL_AudioFormat format =
sc_audio_player_ffmpeg_to_sdl_format(ctx->sample_fmt);
if (!format) {
// error already logged
//return false;
format = AUDIO_F32; // it's planar, but for now there is only 1 channel
SwrContext *swr_ctx = ap->swr_ctx;
assert(swr_ctx);
assert(ctx->sample_rate > 0);
assert(ctx->ch_layout.nb_channels > 0);
assert(!av_sample_fmt_is_planar(SC_AV_SAMPLE_FMT));
int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
assert(out_bytes_per_sample > 0);
av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0);
av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0);
av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0);
int ret = swr_init(swr_ctx);
if (ret) {
LOGE("Failed to initialize the resampling context");
return false;
}
ap->sample_rate = ctx->sample_rate;
ap->nb_channels = ctx->ch_layout.nb_channels;
ap->out_bytes_per_sample = out_bytes_per_sample;
size_t initial_swr_buf_size = sc_audio_player_get_swr_buf_size(ap, 4096);
ap->swr_buf = malloc(initial_swr_buf_size);
if (!ap->swr_buf) {
LOG_OOM();
return false;
}
LOGI("%d\n", ctx->sample_rate);
SDL_AudioSpec desired = {
.freq = ctx->sample_rate,
.format = format,
.channels = 1,
.samples = 2048,
.format = SC_SDL_SAMPLE_FMT,
.channels = ctx->ch_layout.nb_channels,
.samples = 512, // ~10ms at 48000Hz
.callback = sc_audio_player_sdl_callback,
.userdata = ap,
};
@@ -92,24 +131,41 @@ static bool
sc_audio_player_frame_sink_push(struct sc_frame_sink *sink, const AVFrame *frame) {
struct sc_audio_player *ap = DOWNCAST(sink);
const uint8_t *data = frame->data[0];
size_t size = frame->linesize[0];
SwrContext *swr_ctx = ap->swr_ctx;
int64_t delay = swr_get_delay(swr_ctx, ap->sample_rate);
// No need to av_rescale_rnd(), input and output sample rates are the same
int dst_nb_samples = delay + frame->nb_samples;
uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, frame->nb_samples);
if (!swr_buf) {
return false;
}
int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples,
(const uint8_t **) frame->data, frame->nb_samples);
if (ret < 0) {
LOGE("Resampling failed: %d", ret);
return false;
}
LOGI("ret=%d dst_nb_samples=%d\n", ret, dst_nb_samples);
size_t swr_buf_size = sc_audio_player_get_swr_buf_size(ap, ret);
LOGI("== swr_buf_size %lu", swr_buf_size);
// TODO convert to non planar format
// TODO then re-enable stereo
// TODO clock drift compensation
// It should almost always be possible to write without lock
bool can_write_without_lock = size <= ap->safe_empty_buffer;
bool can_write_without_lock = swr_buf_size <= ap->safe_empty_buffer;
if (can_write_without_lock) {
sc_bytebuf_prepare_write(&ap->buf, data, size);
sc_bytebuf_prepare_write(&ap->buf, swr_buf, swr_buf_size);
}
SDL_LockAudioDevice(ap->device);
if (can_write_without_lock) {
sc_bytebuf_commit_write(&ap->buf, size);
sc_bytebuf_commit_write(&ap->buf, swr_buf_size);
} else {
sc_bytebuf_write(&ap->buf, data, size);
sc_bytebuf_write(&ap->buf, swr_buf, swr_buf_size);
}
// The next time, it will remain at least the current empty space
@@ -128,8 +184,18 @@ sc_audio_player_init(struct sc_audio_player *ap,
return false;
}
ap->swr_ctx = swr_alloc();
if (!ap->swr_ctx) {
sc_bytebuf_destroy(&ap->buf);
LOG_OOM();
return false;
}
ap->safe_empty_buffer = sc_bytebuf_write_remaining(&ap->buf);
ap->swr_buf = NULL;
ap->swr_buf_alloc_size = 0;
assert(cbs && cbs->on_ended);
ap->cbs = cbs;
ap->cbs_userdata = cbs_userdata;
@@ -147,4 +213,6 @@ sc_audio_player_init(struct sc_audio_player *ap,
void
sc_audio_player_destroy(struct sc_audio_player *ap) {
sc_bytebuf_destroy(&ap->buf);
swr_free(&ap->swr_ctx);
free(ap->swr_buf);
}

View File

@@ -9,6 +9,7 @@
#include <util/thread.h>
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
#include <SDL2/SDL.h>
struct sc_audio_player {
@@ -21,6 +22,19 @@ struct sc_audio_player {
// Number of bytes which could be written without locking
size_t safe_empty_buffer;
struct SwrContext *swr_ctx;
// The sample rate is the same for input and output
unsigned sample_rate;
// The number of channels is the same for input and output
unsigned nb_channels;
unsigned out_bytes_per_sample;
// Target buffer for resampling
uint8_t *swr_buf;
size_t swr_buf_alloc_size;
const struct sc_audio_player_callbacks *cbs;
void *cbs_userdata;
};

View File

@@ -2,6 +2,7 @@
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/channel_layout.h>
#include "events.h"
#include "video_buffer.h"
@@ -48,8 +49,13 @@ sc_decoder_open(struct sc_decoder *decoder, const AVCodec *codec) {
decoder->codec_ctx->flags |= AV_CODEC_FLAG_LOW_DELAY;
if (codec->type == AVMEDIA_TYPE_VIDEO) {
// Only YUV 4:2:0 is supported, hardcode it
// Hardcoded video properties
decoder->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
} else {
// Hardcoded audio properties
decoder->codec_ctx->ch_layout =
(AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
decoder->codec_ctx->sample_rate = 48000;
}
if (avcodec_open2(decoder->codec_ctx, codec, NULL) < 0) {

View File

@@ -40,7 +40,7 @@ public final class AudioEncoder {
}
private static final int SAMPLE_RATE = 48000;
private static final int CHANNELS = 1;
private static final int CHANNELS = 2;
private static final int BUFFER_MS = 10; // milliseconds
private static final int BUFFER_SIZE = SAMPLE_RATE * CHANNELS * BUFFER_MS / 1000;