Как мне записать звук в формате .wav, который бы соответствовал стандарту Google Speech To Text API в React Native Expo

У меня есть функция, которая запускает и останавливает запись, когда запись останавливается, я могу воспроизвести запись, и все работает хорошо, но всякий раз, когда я загружаю аудиофайл в Google для транскрипции, я получаю пустой ответ. Но когда я загружаю другой .wav-файл (скачанный в Интернете, а не запись). я получаю транскрипцию этого аудиофайла.

const startRecording = async () => {
    try {
      console.log("Requesting permissions..");
      const { status } = await Audio.requestPermissionsAsync();
      if (status !== "granted") {
        alert("Sorry, we need audio recording permissions to make this work!");
        return;
      }

      console.log("Starting recording..");
      await Audio.setAudioModeAsync({
        allowsRecordingIOS: true,
        playsInSilentModeIOS: true,
      });

      const { recording } = await Audio.Recording.createAsync({
        android: {
          extension: ".wav",
          outputFormat: Audio.RECORDING_OPTION_ANDROID_OUTPUT_FORMAT_DEFAULT,
          audioEncoder: Audio.RECORDING_OPTION_ANDROID_AUDIO_ENCODER_DEFAULT,
          sampleRate: 16000,
          numberOfChannels: 1,
          bitRate: 128000,
        },
        ios: {
          extension: ".wav",
          audioQuality: Audio.RECORDING_OPTION_IOS_AUDIO_QUALITY_HIGH,
          sampleRate: 16000,
          numberOfChannels: 1,
          bitRate: 128000,
          linearPCMBitDepth: 16,
          linearPCMIsBigEndian: false,
          linearPCMIsFloat: false,
        },
      });
      setRecording(recording);
    } catch (err) {
      console.error("Failed to start recording", err);
    }
  };

  const stopRecording = async () => {
    console.log("Stopping recording..");
    if (recording) {
      await recording.stopAndUnloadAsync();
      const uri = recording.getURI();
      setRecording(null);
      uploadAudio(uri);
    }
  };

  function getOtherUser(data, username) {
    if (data.receiver.username !== username) {
      return data.receiver;
    }
    if (data.sender.username !== username) {
      return data.sender;
    }
    return null;
  }

  const uploadAudio = async (uri) => {
    const formData = new FormData();
    formData.append("file", {
      uri,
      name: "recording.wav",
      type: "audio/wav",
    });
    formData.append("pair_id", details.id);
    formData.append(
      "delivered",
      Object.values(members).some(
        (entry) => entry.name === receipient?.username,
      )
        ? true
        : false,
    );

    try {
      const url = `${API_URL}/api/record_view/`;
      const response = await fetch(url, {
        method: "POST",
        headers: {
          "Content-Type": "multipart/form-data",
          Authorization: "Bearer " + token,
          user: username,
        },
        body: formData,
      });
      const data = await response.json();
      if (response.status === 200) {
        console.log("File successfully uploaded");
      }
    } catch (error) {
      console.error("Error uploading audio file:", error);
    }
  };

Выше приведен мой код React Native

import os
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
from google.cloud import speech_v2
from google.cloud.speech_v2.types import cloud_speech
from google.oauth2 import service_account

def transcribe_model_selection_v2(project_id: str, model: str, audio_path: str) -> cloud_speech.RecognizeResponse:
    """Transcribe an audio file."""
    # Instantiates a client with credentials
    credentials = service_account.Credentials.from_service_account_file('service-account-file.json')
    client = speech_v2.SpeechClient(credentials=credentials)

    with open(audio_path, "rb") as f:
        content = f.read()

    config = cloud_speech.RecognitionConfig(
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
        language_codes=["en-US"],
        model=model,
    )

    request = cloud_speech.RecognizeRequest(
        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
        config=config,
        content=content,
    )

    response = client.recognize(request=request)

    return response

@csrf_exempt
def transcribe_audio(request):
    if request.method == 'POST' and request.FILES.get('audio'):
        audio_file = request.FILES['audio']
        audio_path = f'/tmp/{audio_file.name}'
        
        with open(audio_path, 'wb+') as destination:
            for chunk in audio_file.chunks():
                destination.write(chunk)

        try:
            project_id = 'project-id'
            model = 'latest_long' 
            response = transcribe_model_selection_v2(project_id, model, audio_path)

            os.remove(audio_path)

            transcription = ''
            for result in response.results:
                transcription += result.alternatives[0].transcript

            return JsonResponse({'transcription': transcription})

        except Exception as e:
            os.remove(audio_path)
            return JsonResponse({'error': str(e)}, status=500)

    return JsonResponse({'error': 'Invalid request'}, status=400)

Выше представлен мой django backend View для транскрибирования загруженного аудио

Вернуться на верх