C语言中使用FFmpeg将视频流与音频流合并为一个文件

  1. 编写代码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    #include <stdio.h>
    #include <libavformat/avformat.h>
    #include <libavcodec/avcodec.h>
    #include <libavutil/opt.h>
    #include <libavutil/timestamp.h>

    typedef struct StreamContext {
    AVCodecContext* dec_ctx;
    AVCodecContext* enc_ctx;
    AVStream* dec_stream;
    AVStream* enc_stream;
    } StreamContext;

    static void log_packet(const AVFormatContext* fmt_ctx, const AVPacket* pkt, const char* tag) {
    AVRational* time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;

    printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
    tag,
    av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
    av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
    av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
    pkt->stream_index);
    }

    int main(int argc, char** argv) {
    if (argc != 4) {
    fprintf(stderr, "Usage: %s <input_video> <input_audio> <output_file>\n", argv[0]);
    return 1;
    }

    const char* video_file = argv[1];
    const char* audio_file = argv[2];
    const char* output_file = argv[3];

    int ret = 0;
    AVFormatContext* video_fmt_ctx = NULL;
    AVFormatContext* audio_fmt_ctx = NULL;
    AVFormatContext* output_fmt_ctx = NULL;
    StreamContext* stream_ctx = NULL;
    int video_stream_idx = -1;
    int audio_stream_idx = -1;
    int stream_count = 0;

    // 打开输入文件
    if ((ret = avformat_open_input(&video_fmt_ctx, video_file, NULL, NULL)) < 0) {
    fprintf(stderr, "Could not open video input file '%s'\n", video_file);
    goto end;
    }

    if ((ret = avformat_find_stream_info(video_fmt_ctx, NULL)) < 0) {
    fprintf(stderr, "Failed to retrieve video input stream information\n");
    goto end;
    }

    if ((ret = avformat_open_input(&audio_fmt_ctx, audio_file, NULL, NULL)) < 0) {
    fprintf(stderr, "Could not open audio input file '%s'\n", audio_file);
    goto end;
    }

    if ((ret = avformat_find_stream_info(audio_fmt_ctx, NULL)) < 0) {
    fprintf(stderr, "Failed to retrieve audio input stream information\n");
    goto end;
    }

    // 创建输出文件
    avformat_alloc_output_context2(&output_fmt_ctx, NULL, NULL, output_file);
    if (!output_fmt_ctx) {
    fprintf(stderr, "Could not create output context\n");
    ret = AVERROR_UNKNOWN;
    goto end;
    }

    stream_count = video_fmt_ctx->nb_streams + audio_fmt_ctx->nb_streams;
    stream_ctx = av_calloc(stream_count, sizeof(*stream_ctx));
    if (!stream_ctx) {
    ret = AVERROR(ENOMEM);
    goto end;
    }

    // 处理视频流
    for (unsigned int i = 0; i < video_fmt_ctx->nb_streams; i++) {
    AVStream* in_stream = video_fmt_ctx->streams[i];
    if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
    video_stream_idx = i;

    AVStream* out_stream = avformat_new_stream(output_fmt_ctx, NULL);
    if (!out_stream) {
    fprintf(stderr, "Failed allocating output stream\n");
    ret = AVERROR_UNKNOWN;
    goto end;
    }

    ret = avcodec_parameters_copy(out_stream->codecpar, in_stream->codecpar);
    if (ret < 0) {
    fprintf(stderr, "Failed to copy codec params\n");
    goto end;
    }

    stream_ctx[i].dec_stream = in_stream;
    stream_ctx[i].enc_stream = out_stream;
    out_stream->codecpar->codec_tag = 0;
    }
    }

    // 处理音频流
    for (unsigned int i = 0; i < audio_fmt_ctx->nb_streams; i++) {
    AVStream* in_stream = audio_fmt_ctx->streams[i];
    if (in_stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
    audio_stream_idx = i;

    AVStream* out_stream = avformat_new_stream(output_fmt_ctx, NULL);
    if (!out_stream) {
    fprintf(stderr, "Failed allocating output stream\n");
    ret = AVERROR_UNKNOWN;
    goto end;
    }

    ret = avcodec_parameters_copy(out_stream->codecpar, in_stream->codecpar);
    if (ret < 0) {
    fprintf(stderr, "Failed to copy codec params\n");
    goto end;
    }

    stream_ctx[video_fmt_ctx->nb_streams + i].dec_stream = in_stream;
    stream_ctx[video_fmt_ctx->nb_streams + i].enc_stream = out_stream;
    out_stream->codecpar->codec_tag = 0;
    }
    }

    // 打开输出文件
    if (!(output_fmt_ctx->oformat->flags & AVFMT_NOFILE)) {
    ret = avio_open(&output_fmt_ctx->pb, output_file, AVIO_FLAG_WRITE);
    if (ret < 0) {
    fprintf(stderr, "Could not open output file '%s'\n", output_file);
    goto end;
    }
    }

    // 写入文件头
    ret = avformat_write_header(output_fmt_ctx, NULL);
    if (ret < 0) {
    fprintf(stderr, "Error occurred when opening output file\n");
    goto end;
    }

    // 读取视频包并写入
    AVPacket* pkt = av_packet_alloc();
    while (1) {
    AVFormatContext* input_fmt_ctx;
    int stream_idx;
    int base_stream_idx;

    if (av_read_frame(video_fmt_ctx, pkt) >= 0) {
    input_fmt_ctx = video_fmt_ctx;
    stream_idx = pkt->stream_index;
    base_stream_idx = 0;
    }
    else if (av_read_frame(audio_fmt_ctx, pkt) >= 0) {
    input_fmt_ctx = audio_fmt_ctx;
    stream_idx = pkt->stream_index;
    base_stream_idx = video_fmt_ctx->nb_streams;
    }
    else {
    break;
    }

    StreamContext* stream = &stream_ctx[base_stream_idx + stream_idx];

    // 转换时间基准
    pkt->pts = av_rescale_q_rnd(pkt->pts,
    stream->dec_stream->time_base,
    stream->enc_stream->time_base,
    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
    pkt->dts = av_rescale_q_rnd(pkt->dts,
    stream->dec_stream->time_base,
    stream->enc_stream->time_base,
    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
    pkt->duration = av_rescale_q(pkt->duration,
    stream->dec_stream->time_base,
    stream->enc_stream->time_base);
    pkt->stream_index = stream->enc_stream->index;

    log_packet(output_fmt_ctx, pkt, "out");

    ret = av_interleaved_write_frame(output_fmt_ctx, pkt);
    if (ret < 0) {
    fprintf(stderr, "Error muxing packet\n");
    break;
    }
    av_packet_unref(pkt);
    }
    av_packet_free(&pkt);

    // 写入文件尾
    av_write_trailer(output_fmt_ctx);

    end:
    // 清理资源
    if (video_fmt_ctx)
    avformat_close_input(&video_fmt_ctx);
    if (audio_fmt_ctx)
    avformat_close_input(&audio_fmt_ctx);
    if (output_fmt_ctx && !(output_fmt_ctx->oformat->flags & AVFMT_NOFILE))
    avio_closep(&output_fmt_ctx->pb);
    if (output_fmt_ctx)
    avformat_free_context(output_fmt_ctx);
    if (stream_ctx)
    av_freep(&stream_ctx);

    return ret ? 1 : 0;
    }
  2. 编译后运行

    1
    merge_av input_video.mp4 input_audio.mp3 output_av.mp4