C语言中使用FFmpeg将视频流与音频流合并为一个文件

编写代码
#include <stdio.h>
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/opt.h>
#include <libavutil/timestamp.h>

typedef struct StreamContext {
    AVCodecContext* dec_ctx;
    AVCodecContext* enc_ctx;
    AVStream* dec_stream;
    AVStream* enc_stream;
} StreamContext;

static void log_packet(const AVFormatContext* fmt_ctx, const AVPacket* pkt, const char* tag) {
    AVRational* time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;

    printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
        tag,
        av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
        av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
        av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
        pkt->stream_index);
}

int main(int argc, char** argv) {
    if (argc != 4) {
        fprintf(stderr, "Usage: %s <input_video> <input_audio> <output_file>\n", argv[0]);
        return 1;
    }

    const char* video_file = argv[1];
    const char* audio_file = argv[2];
    const char* output_file = argv[3];

    int ret = 0;
    AVFormatContext* video_fmt_ctx = NULL;
    AVFormatContext* audio_fmt_ctx = NULL;
    AVFormatContext* output_fmt_ctx = NULL;
    StreamContext* stream_ctx = NULL;
    int video_stream_idx = -1;
    int audio_stream_idx = -1;
    int stream_count = 0;

    // 打开输入文件
    if ((ret = avformat_open_input(&video_fmt_ctx, video_file, NULL, NULL)) < 0) {
        fprintf(stderr, "Could not open video input file '%s'\n", video_file);
        goto end;
    }

    if ((ret = avformat_find_stream_info(video_fmt_ctx, NULL)) < 0) {
        fprintf(stderr, "Failed to retrieve video input stream information\n");
        goto end;
    }

    if ((ret = avformat_open_input(&audio_fmt_ctx, audio_file, NULL, NULL)) < 0) {
        fprintf(stderr, "Could not open audio input file '%s'\n", audio_file);
        goto end;
    }

    if ((ret = avformat_find_stream_info(audio_fmt_ctx, NULL)) < 0) {
        fprintf(stderr, "Failed to retrieve audio input stream information\n");
        goto end;
    }

    // 创建输出文件
    avformat_alloc_output_context2(&output_fmt_ctx, NULL, NULL, output_file);
    if (!output_fmt_ctx) {
        fprintf(stderr, "Could not create output context\n");
        ret = AVERROR_UNKNOWN;
        goto end;
    }

    stream_count = video_fmt_ctx->nb_streams + audio_fmt_ctx->nb_streams;
    stream_ctx = av_calloc(stream_count, sizeof(*stream_ctx));
    if (!stream_ctx) {
        ret = AVERROR(ENOMEM);
        goto end;
    }

    // 处理视频流
    for (unsigned int i = 0; i < video_fmt_ctx->nb_streams; i++) {
        AVStream* in_stream = video_fmt_ctx->streams[i];
        if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
            video_stream_idx = i;

            AVStream* out_stream = avformat_new_stream(output_fmt_ctx, NULL);
            if (!out_stream) {
                fprintf(stderr, "Failed allocating output stream\n");
                ret = AVERROR_UNKNOWN;
                goto end;
            }

            ret = avcodec_parameters_copy(out_stream->codecpar, in_stream->codecpar);
            if (ret < 0) {
                fprintf(stderr, "Failed to copy codec params\n");
                goto end;
            }

            stream_ctx[i].dec_stream = in_stream;
            stream_ctx[i].enc_stream = out_stream;
            out_stream->codecpar->codec_tag = 0;
        }
    }

    // 处理音频流
    for (unsigned int i = 0; i < audio_fmt_ctx->nb_streams; i++) {
        AVStream* in_stream = audio_fmt_ctx->streams[i];
        if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            audio_stream_idx = i;

            AVStream* out_stream = avformat_new_stream(output_fmt_ctx, NULL);
            if (!out_stream) {
                fprintf(stderr, "Failed allocating output stream\n");
                ret = AVERROR_UNKNOWN;
                goto end;
            }

            ret = avcodec_parameters_copy(out_stream->codecpar, in_stream->codecpar);
            if (ret < 0) {
                fprintf(stderr, "Failed to copy codec params\n");
                goto end;
            }

            stream_ctx[video_fmt_ctx->nb_streams + i].dec_stream = in_stream;
            stream_ctx[video_fmt_ctx->nb_streams + i].enc_stream = out_stream;
            out_stream->codecpar->codec_tag = 0;
        }
    }

    // 打开输出文件
    if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        ret = avio_open(&output_fmt_ctx->pb, output_file, AVIO_FLAG_WRITE);
        if (ret < 0) {
            fprintf(stderr, "Could not open output file '%s'\n", output_file);
            goto end;
        }
    }

    // 写入文件头
    ret = avformat_write_header(output_fmt_ctx, NULL);
    if (ret < 0) {
        fprintf(stderr, "Error occurred when opening output file\n");
        goto end;
    }

    // 读取视频包并写入
    AVPacket* pkt = av_packet_alloc();
    while (1) {
        AVFormatContext* input_fmt_ctx;
        int stream_idx;
        int base_stream_idx;

        if (av_read_frame(video_fmt_ctx, pkt) >= 0) {
            input_fmt_ctx = video_fmt_ctx;
            stream_idx = pkt->stream_index;
            base_stream_idx = 0;
        }
        else if (av_read_frame(audio_fmt_ctx, pkt) >= 0) {
            input_fmt_ctx = audio_fmt_ctx;
            stream_idx = pkt->stream_index;
            base_stream_idx = video_fmt_ctx->nb_streams;
        }
        else {
            break;
        }

        StreamContext* stream = &stream_ctx[base_stream_idx + stream_idx];

        // 转换时间基准
        pkt->pts = av_rescale_q_rnd(pkt->pts,
            stream->dec_stream->time_base,
            stream->enc_stream->time_base,
            AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
        pkt->dts = av_rescale_q_rnd(pkt->dts,
            stream->dec_stream->time_base,
            stream->enc_stream->time_base,
            AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
        pkt->duration = av_rescale_q(pkt->duration,
            stream->dec_stream->time_base,
            stream->enc_stream->time_base);
        pkt->stream_index = stream->enc_stream->index;

        log_packet(output_fmt_ctx, pkt, "out");

        ret = av_interleaved_write_frame(output_fmt_ctx, pkt);
        if (ret < 0) {
            fprintf(stderr, "Error muxing packet\n");
            break;
        }
        av_packet_unref(pkt);
    }
    av_packet_free(&pkt);

    // 写入文件尾
    av_write_trailer(output_fmt_ctx);

end:
    // 清理资源
    if (video_fmt_ctx)
        avformat_close_input(&video_fmt_ctx);
    if (audio_fmt_ctx)
        avformat_close_input(&audio_fmt_ctx);
    if (output_fmt_ctx && !(output_fmt_ctx->oformat->flags & AVFMT_NOFILE))
        avio_closep(&output_fmt_ctx->pb);
    if (output_fmt_ctx)
        avformat_free_context(output_fmt_ctx);
    if (stream_ctx)
        av_freep(&stream_ctx);

    return ret ? 1 : 0;
}
编译后运行
1	merge_av input_video.mp4 input_audio.mp3 output_av.mp4