C# 使用NAudio实现录音并过滤大部分无用的杂音并使用python 识别录音内容

发表于2022-09-27 分类：技术分享评论： 0 阅读：927

NAudio 录音帮类

    public interface ISpeechRecorder
    {
        void SetFileName(string fileName);
        void StartRec();
        void StopRec();
    }
    public class NAudioRecorder : ISpeechRecorder
    {
        #region var

        /// <summary>
        /// 录音机对象
        /// </summary>
        public WaveInEvent waveSource = null;
        /// <summary>
        /// 录音写入磁盘记录者  用于截取声音测试
        /// </summary>
        public WaveFileWriter waveFile = null;
        /// <summary>
        /// 录音保存格式
        /// </summary>
        private string fileName = string.Empty;
        /// <summary>
        /// 委托声音触发时间
        /// </summary>
        public Action<byte[]> ReciveMaxData;
        /// <summary>
        /// 缓存截取声音片段
        /// </summary>
        private List<byte> CacheBuffer = new List<byte>();
        /// <summary>
        /// 记录有人说话时间
        /// </summary>
        private DateTime BeginTime = DateTime.Now;
        /// <summary>
        /// 是否有人说话标志
        /// </summary>
        private bool IsSpeeak = false;
        /// <summary>
        /// 声音响度标准
        /// </summary>
        public float LoudnessStant = 0.09F;

        #endregion
        /// <summary>
        /// 更新语音图标 1、未输入 2、错误 3、正确 4、输入
        /// </summary>
        public event Action<int> updateIcon;
        //识别开始
        public event Action<string> actionSpeechToText;

        /// <summary>
        /// 开始录音
        /// </summary>
        public void StartRec()
        {
            waveSource = new WaveInEvent();
            waveSource.WaveFormat = new WaveFormat(16000, 16, 1); // 16bit,16KHz,Mono 的录音格式
            waveSource.DataAvailable += new EventHandler<WaveInEventArgs>(waveSource_DataAvailable);
            waveSource.RecordingStopped += new EventHandler<StoppedEventArgs>(waveSource_RecordingStopped);
            waveFile = new WaveFileWriter(fileName, waveSource.WaveFormat);
            waveSource.StartRecording();
        }

        /// <summary>
        /// 停止录音
        /// </summary>
        public void StopRec()
        {
            waveSource.StopRecording();
        }

        /// <summary>
        /// 录音结束后保存的文件路径
        /// </summary>
        /// <param name="fileName">保存wav文件的路径名</param>
        public void SetFileName(string fileName)
        {
            this.fileName = fileName;
        }

        private bool isMic = true;

        /// <summary>
        /// 开始录音回调函数
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void waveSource_DataAvailable(object sender, WaveInEventArgs e)
        {
            if (waveFile == null) return;
            AnalyzeVoice(e.Buffer);
            if (waveFile.TotalTime < TimeSpan.FromSeconds(5)) return;
            waveSource.StopRecording();
            actionSpeechToText?.BeginInvoke(this.fileName, null, null);

        }

        /// <summary>
        /// 录音结束回调函数
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void waveSource_RecordingStopped(object sender, StoppedEventArgs e)
        {
            waveSource?.Dispose();
            waveSource = null;
            waveFile?.Dispose();
            waveFile = null;
        }

        /// <summary>
        /// 语音分析
        /// </summary>
        /// <param name="buf"></param>
        private void AnalyzeVoice(byte[] buf)
        {
            float max = LoudnessStant;
            // interpret as 16 bit audio
            for (int index = 0; index < buf.Length; index += 2)
            {
                short sample = (short)((buf[index + 1] << 8) |
                                       buf[index + 0]);
                // to floating point
                var sample32 = sample / 32768f;
                // absolute value 
                if (sample32 < 0) sample32 = -sample32;
                // is this the max value?
                if (sample32 > max) max = sample32;
            }
            if (max != LoudnessStant)
            {
                CacheBuffer.AddRange(buf);
                IsSpeeak = true;
                BeginTime = DateTime.Now;
            }
            else
            {
                if (IsSpeeak)
                {
                    updateIcon?.BeginInvoke(4, null, null); //更新图标为正在输入状态
                    if ((DateTime.Now - BeginTime).TotalSeconds < 2)
                    {
                        CacheBuffer.AddRange(buf);
                    }
                    else
                    {
                        CacheBuffer.AddRange(buf);
                        waveFile.Write(CacheBuffer.ToArray(), 0, CacheBuffer.Count);
                        waveFile.Flush();
                        //回调声音触发方法
                        //ReciveMaxData(CacheBuffer.ToArray());
                        CacheBuffer.Clear();
                        IsSpeeak = false;
                    }
                }
                else
                {
                    if (CacheBuffer.Count > 3200 * 6)
                    {
                        CacheBuffer.RemoveRange(0, 3200);
                    }
                    CacheBuffer.AddRange(buf);
                    updateIcon?.BeginInvoke(1, null, null); //更新图标为正在输入状态
                }

            }
        }
    }

python 安装录音识别环境

pip install SpeechRecognition

pip install PyAudio

录音识别脚本

import sys
import speech_recognition as sr

r = sr.Recognizer()


def microphone_to_text():
    # 生成语音识别模块
    mic = sr.Microphone()
    # 生成麦克风模块
    with mic as source:
        try:
            print("Say something!")
            # 扫描麦克风输入
            audio = r.listen(source, timeout=1, phrase_time_limit=10)
        except Exception as ex:
            print(ex)
    try:
        # 对麦克输入当作中文识别
        print(type(audio))
        output = r.recognize_google(audio, language='zh-cn')

        print(output)  # 打印结果
        return output
    except sr.UnknownValueError as unknown:
        print("Google Speech Recognition could not understand audio")
        return "Google Speech Recognition could not understand audio"
    except Exception as ex:
        print("出错了")


def file_to_text(path):
    try:
        with sr.AudioFile(path) as source:
            audio = r.record(source)
        output = r.recognize_google(audio, language='zh-cn')
        print(output)  # 打印结果
        return output
    except Exception as ex:
        return ""


if __name__ == '__main__':
    # while True:
    #     microphone_to_text()
    #     time.sleep(3)
    if len(sys.argv[:]) > 1:
        result = file_to_text(sys.argv[1])
        print(result);
    else:
        result = file_to_text("E:\\record.wav")
        print(result)
    microphone_to_text()

c# 调用python 脚本

/// <summary>
        /// 调用python核心代码
        /// </summary>
        /// <param name="filePath"></param>
        /// <param name="args"></param>
        /// <param name="teps"></param>
        public void RunPythonScript(string filePath, string args = "", params string[] teps)
        {
            Process p = new Process();
            string path = filePath;// 获得python文件的绝对路径（将文件放在c#的debug文件夹中可以这样操作）
            p.StartInfo.FileName = @"python.exe";//没有配环境变量的话，可以像我这样写python.exe的绝对路径。如果配了，直接写"python.exe"即可
            string sArguments = teps.Aggregate(path, (current, sigstr) => current + (" " + sigstr));
            sArguments += " " + args;
            p.StartInfo.Arguments = sArguments;
            p.StartInfo.UseShellExecute = false;
            p.StartInfo.RedirectStandardOutput = true;
            p.StartInfo.RedirectStandardInput = true;
            p.StartInfo.RedirectStandardError = true;
            p.StartInfo.CreateNoWindow = true;
            p.Start();
            var strRst = p.StandardOutput.ReadToEnd();
            strRst = strRst.Replace("\r\n", "");
            p.WaitForExit();
            try
            {
                if (teps.Length > 0)
                {
                    File.Delete(teps[0]);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
            if (!string.IsNullOrEmpty(strRst))
            {
                Console.WriteLine(strRst);
                UpdateVoiceIcon(2);
                return;
            }
            UpdateVoiceIcon(2);
            Thread.Sleep(1500);
        }


          /// <summary>
        /// 获取utc时间
        /// </summary>
        /// <returns></returns>
        private string GetToFileTimeUtc()
        {
            string _time = DateTime.Now.ToFileTimeUtc().ToString();
            return _time;
        }

使用方式

 try
                {

                    recorder = new NAudioRecorder();
                    //开始录音
                    recorder.SetFileName($"{GetToFileTimeUtc()}.wav");
                    recorder.StartRec();
                    recorder.updateIcon += (type) =>
                    {
                        UpdateVoiceIcon(type);
                    };

                    recorder.actionSpeechToText += (fileName) =>
                    {
                        if (isSpeechToTextStatus) return;
                        isSpeechToTextStatus = true;
                        string path = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Script");
                        string file = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, fileName);
                        RunPythonScript(Path.Combine(path, "speech.py"), "-u", new string[] { file });
                        Thread.Sleep(500);
                        recorder.SetFileName($"{GetToFileTimeUtc()}.wav");
                        Thread.Sleep(500);
                        recorder.StartRec();
                        isSpeechToTextStatus = false;
                    };
                }
                catch (Exception e)
                {
                    Logger.Default.Error("录音服务出错---{0}", e.Message);
                }

本文作者：admin

本文链接：https://www.javalc.com/post/101.html

版权声明：本篇文章于2022-09-27，由admin发表，转载请注明出处：分享你我。如有疑问，请联系我们

java/C# 调用unipush2.0 uniapp nvue 仿抖音评论

发表评论

扫码支持

支付宝

微信支付