Created
April 16, 2024 10:20
-
-
Save damithsj/abe20dda077c937a13ae0839748da7d0 to your computer and use it in GitHub Desktop.
Azure Function for Azure Speech Services Speech Text which Inputs Any Audio Format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Azure.Core; | |
using Microsoft.AspNetCore.Http; | |
using Microsoft.AspNetCore.Mvc; | |
using Microsoft.Azure.Functions.Worker; | |
using Microsoft.CognitiveServices.Speech; | |
using Microsoft.CognitiveServices.Speech.Audio; | |
using Microsoft.Extensions.Logging; | |
using Newtonsoft.Json.Linq; | |
using System.Diagnostics; | |
namespace azure_ai_services | |
{ | |
public class CognitiveServices | |
{ | |
static string speechKey = "YOUR_SPEECH_KEY"; | |
static string speechRegion = "your_speech_region"; | |
private readonly ILogger<CognitiveServices> _logger; | |
public CognitiveServices(ILogger<CognitiveServices> logger) | |
{ | |
_logger = logger; | |
} | |
//-------------------------------------------------------------------------- | |
//------ Speech to text ---------------------------------------------------- | |
//-------------------------------------------------------------------------- | |
[Function("Stt")] | |
public async Task<IActionResult> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post")] HttpRequest req) | |
{ | |
_logger.LogInformation("C# HTTP trigger function processed a request."); | |
var tempPath = Path.GetTempPath(); | |
var tempIn = Path.GetRandomFileName() + ".tmp"; // For FFMPeg input file name can be anything | |
tempIn = Path.Combine(tempPath, tempIn); | |
var tempOut = Path.GetRandomFileName() + ".wav"; | |
tempOut = Path.Combine(tempPath, tempOut); | |
using (var ms = new MemoryStream()) | |
{ | |
_logger.LogInformation($"File write start: {tempIn}"); | |
await req.Body.CopyToAsync(ms); | |
File.WriteAllBytes(tempIn, ms.ToArray()); | |
ms.Dispose(); | |
_logger.LogInformation($"File write finished: {tempIn}"); | |
} | |
Process process = new Process(); | |
//Azure path COMMENT FOR LOCAL TESTING | |
process.StartInfo.FileName = @"C:\home\site\wwwroot\executables\ffmpeg.exe"; | |
//Local path | |
//process.StartInfo.FileName = @"D:\_work\dsj23\repos\azure-ai-services\azure-ai-services\executables\ffmpeg.exe"; | |
process.StartInfo.Arguments = $"-i \"{tempIn}\" \"{tempOut}\""; | |
process.StartInfo.RedirectStandardOutput = true; | |
process.StartInfo.RedirectStandardError = true; | |
process.StartInfo.UseShellExecute = false; | |
_logger.LogInformation($"Args: {process.StartInfo.Arguments}"); | |
process.Start(); | |
process.WaitForExit(); | |
var error_ = await process.StandardError.ReadToEndAsync(); | |
// _logger.LogInformation($"FFMPEG Info: {error_}"); | |
process.Dispose(); | |
_logger.LogInformation($"File conversion finished: {tempOut}"); | |
//Now comes the interesting part. SPEECH TO TEXT | |
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion); | |
speechConfig.SpeechRecognitionLanguage = "en-US"; | |
var audioConfig = AudioConfig.FromWavFileInput(tempOut); | |
using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig); | |
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync(); | |
audioConfig.Dispose(); | |
speechRecognizer.Dispose(); | |
_logger.LogInformation($"STT Result: {speechRecognitionResult.Text}"); | |
// Delete the temp files | |
File.Delete(tempOut); | |
File.Delete(tempIn); | |
// Create response payload | |
JObject response = new JObject | |
{ | |
{ "DisplayText", speechRecognitionResult.Text }, | |
{ "Duration", speechRecognitionResult.Duration}, | |
}; | |
// Send the response | |
// TODO: this will send the response in Text format. need to set the content type | |
OkObjectResult okResponse_ = new OkObjectResult(response.ToString()); | |
return okResponse_; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment