How to use sonic to perform speed doubling on ffmpeg AVFrame type audio. #51

ZzMLvzZ-792998470 · 2024-06-30T15:10:24Z

Hello, I am trying to perform a speed doubling operation on the audio data decoded by ffmpeg. My initial idea was to use impulse sampling to achieve speed doubling, but changing the tone of the sound caused me a lot of trouble. So I chose to use snoic for audio data doubling processing, but it seems that the extraction and processing of audio data did not follow my expectations.
For example, for dual channel audio and FLTP audio formats, my approach is to write the data into sonic according to the number and order of channels, process it, and then retrieve it. But I'm not sure if I write all the data from both channels at once before reading, or if I read it every time I write to one channel. Thank you very much for your advice!

int Transcoder::add2TranscodeBuffer(AVFrame *inFrame, int sep, float speed) {
    int64_t inChannelLayout = inFrame->channel_layout;
    int inNbSamples = inFrame->nb_samples;
    int inSampleRate = inFrame->sample_rate;
    int inChannels = inFrame->channels;
    int inFormat = inFrame->format;

    int ret;
    if (!swrContext) {
        // 申请上下文空间
        swrContext = swr_alloc();
        // 设置上下文空间参数
        swr_alloc_set_opts(swrContext, outChannelLayout, (AVSampleFormat) outFormat, outSampleRate,
                           av_get_default_channel_layout(inChannels), (AVSampleFormat) inFormat, inSampleRate, 0, 0);
        // 初始化
        swr_init(swrContext);
        int fifoSize = swr_get_out_samples(swrContext, 0);
        av_log(nullptr, AV_LOG_INFO, "fifoSize:%i\n", fifoSize);

        //如果采样率相同，则以outNbSamples为输出大小
        if (inSampleRate == outSampleRate){
            audioOutSize = outNbSamples;
        }else {//48000->44100时，只有缓冲区有1115个采样时，才能得到1024个采样。
            audioOutSize = ceil(1.0 * inNbSamples * inSampleRate / outSampleRate);
            if (inSampleRate < outSampleRate) {
                audioOutSize += outNbSamples;
            }
        }
    }

    // 计算补帧个数 噪音帧和普通帧分开讨论
    int silenceNum = sep & 0b01111111;
    bool isComfortNoise = (sep & 0b10000000) == 0b10000000;
    if(isComfortNoise) silenceNum++;

    while (silenceNum > 0) {
        if(!frame) frame = av_frame_alloc();
        frame->sample_rate = inFrame->sample_rate;
        frame->channels = inFrame->channels;
        frame->format = inFrame->format;
        frame->nb_samples = inFrame->nb_samples;
        frame->channel_layout = inFrame->channel_layout;

        av_frame_get_buffer(frame, 0);

        //设置静音帧
        av_samples_set_silence(frame->data, 0, frame->nb_samples, frame->channels, (AVSampleFormat)frame->format);

        ret = swr_convert(swrContext, nullptr, 0, (const uint8_t **)frame->data, inNbSamples);
        if (ret < 0) {
            av_log(nullptr, AV_LOG_ERROR, "swr_convert failed:%i\n", ret);
            return ret;
        }
        silenceNum--;
    }


    //把帧加入缓存
    //将要重采样的帧放入缓存队列中
    if(speed != 1.0f) {
        if (_speed != speed) {
            _speed = speed;
            if(_sonic) {
                sonicDestroyStream(_sonic);
                _sonic = nullptr;
            }
        }

        if (!_sonic) {
            _sonic = sonicCreateStream(inSampleRate, inChannels);
        }

        sonicSetSpeed(_sonic, _speed);
        sonicSetPitch(_sonic, 1);

        int sonicBuffSize;
        int out_nb_samples = 0;
        uint8_t** outData = new uint8_t*[8];
        // Write audio data to Sonic
        int time_write = 0;
        for (int channel = 0; channel < inChannels; channel++) {
            time_write++;
            int numsProcess = sonicWriteShortToStream(_sonic, (short *) inFrame->data[channel], inNbSamples);
            av_log(nullptr, AV_LOG_INFO, "write_into_sonic, time_write:%i\n", time_write);
            sonicBuffSize = sonicSamplesAvailable(_sonic);
            out_nb_samples += sonicBuffSize;
            auto data = new short[sonicBuffSize];
            int readSamples = sonicReadShortFromStream(_sonic, data, sonicBuffSize);
            int dataSize = 0;
            if(readSamples > 0) {
                dataSize = av_samples_get_buffer_size(nullptr, inChannels, readSamples, (AVSampleFormat)inFrame->format, 0);
            }
            outData[channel] = new uint8_t[dataSize];
            memcpy(outData[channel], (uint8_t *)data, dataSize);
            delete[] data;
        }

        ret = swr_convert(swrContext, nullptr, 0, (const uint8_t **)outData, out_nb_samples);
        if (ret < 0) {
            av_log(nullptr, AV_LOG_ERROR, "swr_convert failed: %i\n", ret);
        }
        for (int channel = 0; channel < inChannels; channel++) {
            delete[] outData[channel];
        }
        delete[] outData;
        return ret;
    } else {
        if (!isComfortNoise || sep == 0) {
            ret = swr_convert(swrContext, nullptr, 0, (const uint8_t **) inFrame->data, inNbSamples);
            if (ret < 0) {
                av_log(nullptr, AV_LOG_ERROR, "swr_convert failed:%i\n", ret);
            }
        }
    }

    return ret;
    // 1. 读写分离，平均分2 x
//        int sonicBuffSize = 0;
//        // Write audio data to Sonic
//        int time_write = 0;
//        for (int channel = 0; channel < inChannels; channel++) {
//            time_write++;
//            int numsProcess = sonicWriteShortToStream(_sonic, (short *)inFrame->data[channel], inNbSamples);
//            av_log(nullptr, AV_LOG_INFO, "write_into_sonic, time_write:%i\n", time_write);
//            if (numsProcess > 0) {
//                sonicBuffSize = sonicSamplesAvailable(_sonic);
//                av_log(nullptr, AV_LOG_INFO, "write_into_sonic, sonicBufferSize:%i\n", sonicBuffSize);
//            }
//        }
//
//        if (sonicBuffSize > 0) {
//            int time_read = 0;
//            uint8_t **outSampleData = new uint8_t *[8];
//            int out_nb_samples = sonicBuffSize / inChannels;
//            for (int channel = 0; channel < inChannels; channel++) {
//                time_read++;
//                auto outData = new short[sonicBuffSize];
//                int readSamples = sonicReadShortFromStream(_sonic, outData, sonicBuffSize / inChannels);
//
//                av_log(nullptr, AV_LOG_INFO, "sonic::read, %i\n", time_read);
//                int dataSize = av_samples_get_buffer_size(nullptr, inChannels, readSamples, (AVSampleFormat)inFrame->format, 0);
//
//                outSampleData[channel] = new uint8_t[dataSize];
//                memcpy(outSampleData[channel], (uint8_t *)outData, dataSize);
//                delete[] outData;
//            }
//
//            auto data1 = outSampleData[0];
//            auto data2 = outSampleData[1];
//
//            av_log(nullptr, AV_LOG_INFO, "sonic::swr_convert, sample:%i\n", out_nb_samples);
//            ret = swr_convert(swrContext, nullptr, 0, (const uint8_t **)outSampleData, out_nb_samples);
//            if (ret < 0) {
//                av_log(nullptr, AV_LOG_ERROR, "swr_convert failed: %i\n", ret);
//            }
//            for (int channel = 0; channel < inChannels; channel++) {
//                delete[] outSampleData[channel];
//            }
//            delete[] outSampleData;
//        }
//        return ret;
}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

How to use sonic to perform speed doubling on ffmpeg AVFrame type audio. #51

How to use sonic to perform speed doubling on ffmpeg AVFrame type audio. #51

ZzMLvzZ-792998470 commented Jun 30, 2024

How to use sonic to perform speed doubling on ffmpeg AVFrame type audio. #51

How to use sonic to perform speed doubling on ffmpeg AVFrame type audio. #51

Comments

ZzMLvzZ-792998470 commented Jun 30, 2024