Как мне записать звук в формате .wav, который бы соответствовал стандарту Google Speech To Text API в React Native Expo
У меня есть функция, которая запускает и останавливает запись, когда запись останавливается, я могу воспроизвести запись, и все работает хорошо, но всякий раз, когда я загружаю аудиофайл в Google для транскрипции, я получаю пустой ответ. Но когда я загружаю другой .wav-файл (скачанный в Интернете, а не запись). я получаю транскрипцию этого аудиофайла.
const startRecording = async () => {
try {
console.log("Requesting permissions..");
const { status } = await Audio.requestPermissionsAsync();
if (status !== "granted") {
alert("Sorry, we need audio recording permissions to make this work!");
return;
}
console.log("Starting recording..");
await Audio.setAudioModeAsync({
allowsRecordingIOS: true,
playsInSilentModeIOS: true,
});
const { recording } = await Audio.Recording.createAsync({
android: {
extension: ".wav",
outputFormat: Audio.RECORDING_OPTION_ANDROID_OUTPUT_FORMAT_DEFAULT,
audioEncoder: Audio.RECORDING_OPTION_ANDROID_AUDIO_ENCODER_DEFAULT,
sampleRate: 16000,
numberOfChannels: 1,
bitRate: 128000,
},
ios: {
extension: ".wav",
audioQuality: Audio.RECORDING_OPTION_IOS_AUDIO_QUALITY_HIGH,
sampleRate: 16000,
numberOfChannels: 1,
bitRate: 128000,
linearPCMBitDepth: 16,
linearPCMIsBigEndian: false,
linearPCMIsFloat: false,
},
});
setRecording(recording);
} catch (err) {
console.error("Failed to start recording", err);
}
};
const stopRecording = async () => {
console.log("Stopping recording..");
if (recording) {
await recording.stopAndUnloadAsync();
const uri = recording.getURI();
setRecording(null);
uploadAudio(uri);
}
};
function getOtherUser(data, username) {
if (data.receiver.username !== username) {
return data.receiver;
}
if (data.sender.username !== username) {
return data.sender;
}
return null;
}
const uploadAudio = async (uri) => {
const formData = new FormData();
formData.append("file", {
uri,
name: "recording.wav",
type: "audio/wav",
});
formData.append("pair_id", details.id);
formData.append(
"delivered",
Object.values(members).some(
(entry) => entry.name === receipient?.username,
)
? true
: false,
);
try {
const url = `${API_URL}/api/record_view/`;
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "multipart/form-data",
Authorization: "Bearer " + token,
user: username,
},
body: formData,
});
const data = await response.json();
if (response.status === 200) {
console.log("File successfully uploaded");
}
} catch (error) {
console.error("Error uploading audio file:", error);
}
};
Выше приведен мой код React Native
import os
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
from google.cloud import speech_v2
from google.cloud.speech_v2.types import cloud_speech
from google.oauth2 import service_account
def transcribe_model_selection_v2(project_id: str, model: str, audio_path: str) -> cloud_speech.RecognizeResponse:
"""Transcribe an audio file."""
# Instantiates a client with credentials
credentials = service_account.Credentials.from_service_account_file('service-account-file.json')
client = speech_v2.SpeechClient(credentials=credentials)
with open(audio_path, "rb") as f:
content = f.read()
config = cloud_speech.RecognitionConfig(
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
language_codes=["en-US"],
model=model,
)
request = cloud_speech.RecognizeRequest(
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
config=config,
content=content,
)
response = client.recognize(request=request)
return response
@csrf_exempt
def transcribe_audio(request):
if request.method == 'POST' and request.FILES.get('audio'):
audio_file = request.FILES['audio']
audio_path = f'/tmp/{audio_file.name}'
with open(audio_path, 'wb+') as destination:
for chunk in audio_file.chunks():
destination.write(chunk)
try:
project_id = 'project-id'
model = 'latest_long'
response = transcribe_model_selection_v2(project_id, model, audio_path)
os.remove(audio_path)
transcription = ''
for result in response.results:
transcription += result.alternatives[0].transcript
return JsonResponse({'transcription': transcription})
except Exception as e:
os.remove(audio_path)
return JsonResponse({'error': str(e)}, status=500)
return JsonResponse({'error': 'Invalid request'}, status=400)
Выше представлен мой django backend View для транскрибирования загруженного аудио