Skip to content

Instantly share code, notes, and snippets.

@Miouyouyou
Created December 10, 2020 00:00
Show Gist options
  • Save Miouyouyou/2b846cc2e44b984de4a401d2995e299b to your computer and use it in GitHub Desktop.
Save Miouyouyou/2b846cc2e44b984de4a401d2995e299b to your computer and use it in GitHub Desktop.
Godot speech 4.0 to 3.2
From 4765ea3342c76f7e416dd50d9bf39adcaa48f351 Mon Sep 17 00:00:00 2001
From: "Miouyouyou (Myy)" <myy@miouyouyou.fr>
Date: Thu, 10 Dec 2020 00:58:11 +0100
Subject: [PATCH] Master to 3.2 port
Basically, rename all the Packed Array references to Pool Array
references, and use the appropriate pointer calls.
It's still untested, so it might attack you in your sleep.
Signed-off-by: Miouyouyou (Myy) <myy@miouyouyou.fr>
---
opus_codec.h | 14 +++++++-------
register_types.cpp | 2 +-
speech.h | 31 +++++++++++++----------------
speech_decoder.h | 11 +++++------
speech_processor.cpp | 46 ++++++++++++++++++++++----------------------
speech_processor.h | 36 +++++++++++++++++-----------------
6 files changed, 67 insertions(+), 73 deletions(-)
diff --git a/opus_codec.h b/opus_codec.h
index d430042..ac52773 100644
--- a/opus_codec.h
+++ b/opus_codec.h
@@ -29,8 +29,8 @@ public:
}
virtual bool process(
- const PackedByteArray *p_compressed_buffer,
- PackedByteArray *p_pcm_output_buffer,
+ const PoolByteArray *p_compressed_buffer,
+ PoolByteArray *p_pcm_output_buffer,
const int p_compressed_buffer_size,
const int p_pcm_output_buffer_size,
const int p_buffer_frame_count) {
@@ -110,18 +110,18 @@ public:
return speech_decoder;
}
- int encode_buffer(const PackedByteArray *p_pcm_buffer, PackedByteArray *p_output_buffer) {
+ int encode_buffer(const PoolByteArray *p_pcm_buffer, PoolByteArray *p_output_buffer) {
int number_of_bytes = -1;
if (encoder) {
- const opus_int16 *pcm_buffer_pointer = reinterpret_cast<const opus_int16 *>(p_pcm_buffer->ptr());
+ const opus_int16 *pcm_buffer_pointer = reinterpret_cast<const opus_int16 *>(p_pcm_buffer->read().ptr());
opus_int32 ret_value = opus_encode(encoder, pcm_buffer_pointer, BUFFER_FRAME_COUNT, internal_buffer, INTERNAL_BUFFER_SIZE);
if (ret_value >= 0) {
number_of_bytes = ret_value;
if (number_of_bytes > 0) {
- unsigned char *output_buffer_pointer = reinterpret_cast<unsigned char *>(p_output_buffer->ptrw());
+ unsigned char *output_buffer_pointer = reinterpret_cast<unsigned char *>(p_output_buffer->write().ptr());
memcpy(output_buffer_pointer, internal_buffer, number_of_bytes);
}
} else {
@@ -134,8 +134,8 @@ public:
bool decode_buffer(
SpeechDecoder *p_speech_decoder,
- const PackedByteArray *p_compressed_buffer,
- PackedByteArray *p_pcm_output_buffer,
+ const PoolByteArray *p_compressed_buffer,
+ PoolByteArray *p_pcm_output_buffer,
const int p_compressed_buffer_size,
const int p_pcm_output_buffer_size) {
if (p_pcm_output_buffer->size() != p_pcm_output_buffer_size) {
diff --git a/register_types.cpp b/register_types.cpp
index 10c6bcf..8e25a68 100644
--- a/register_types.cpp
+++ b/register_types.cpp
@@ -1,6 +1,6 @@
#include "register_types.h"
-#include "core/object/class_db.h"
+#include "core/class_db.h"
#include "speech.h"
#include "speech_processor.h"
diff --git a/speech.h b/speech.h
index 6d598ce..adc6b89 100644
--- a/speech.h
+++ b/speech.h
@@ -32,14 +32,9 @@
#define STREAM_AUDIO_OPUS_H
#include "modules/audio_effect_stream/stream_audio.h"
-#include "opus_codec.h"
#include "thirdparty/libsamplerate/src/samplerate.h"
-#include "core/config/engine.h"
-#include "core/config/project_settings.h"
#include "core/os/mutex.h"
-#include "core/variant/array.h"
-#include "core/variant/dictionary.h"
#include "scene/main/node.h"
#include "servers/audio_server.h"
@@ -50,10 +45,10 @@ class Speech : public Node {
static const int MAX_AUDIO_BUFFER_ARRAY_SIZE = 10;
- PackedByteArray input_byte_array;
+ PoolByteArray input_byte_array;
float volume = 0.0;
- Mutex audio_mutex;
+ mutable Mutex *audio_mutex;
int skipped_audio_packets = 0;
@@ -61,13 +56,13 @@ class Speech : public Node {
SpeechProcessor *speech_processor = NULL;
struct InputPacket {
- PackedByteArray compressed_byte_array;
+ PoolByteArray compressed_byte_array;
int buffer_size = 0;
float loudness = 0.0;
};
int current_input_size = 0;
- PackedByteArray compression_output_byte_array;
+ PoolByteArray compression_output_byte_array;
InputPacket input_audio_buffer_array[MAX_AUDIO_BUFFER_ARRAY_SIZE];
//
private:
@@ -100,8 +95,8 @@ private:
return input_packet;
} else {
for (int i = MAX_AUDIO_BUFFER_ARRAY_SIZE - 1; i > 0; i--) {
- memcpy(input_audio_buffer_array[i - 1].compressed_byte_array.ptrw(),
- input_audio_buffer_array[i].compressed_byte_array.ptr(),
+ memcpy(input_audio_buffer_array[i - 1].compressed_byte_array.write().ptr(),
+ input_audio_buffer_array[i].compressed_byte_array.read().ptr(),
SpeechProcessor::PCM_BUFFER_SIZE);
input_audio_buffer_array[i - 1].buffer_size = input_audio_buffer_array[i].buffer_size;
@@ -115,8 +110,8 @@ private:
// Is responsible for recieving packets from the SpeechProcessor and then compressing them
void speech_processed(SpeechProcessor::SpeechInput *p_mic_input) {
// Copy the raw PCM data from the SpeechInput packet to the input byte array
- PackedByteArray *mic_input_byte_array = p_mic_input->pcm_byte_array;
- memcpy(input_byte_array.ptrw(), mic_input_byte_array->ptr(), SpeechProcessor::PCM_BUFFER_SIZE);
+ PoolByteArray *mic_input_byte_array = p_mic_input->pcm_byte_array;
+ memcpy(input_byte_array.write().ptr(), mic_input_byte_array->read().ptr(), SpeechProcessor::PCM_BUFFER_SIZE);
// Create a new SpeechProcessor::CompressedBufferInput to be passed into the compressor
// and assign it the compressed_byte_array from the input packet
@@ -133,8 +128,8 @@ private:
InputPacket *input_packet = get_next_valid_input_packet();
// Copy the buffer size from the compressed_buffer_input back into the input packet
memcpy(
- input_packet->compressed_byte_array.ptrw(),
- compressed_buffer_input.compressed_byte_array->ptr(),
+ input_packet->compressed_byte_array.write().ptr(),
+ compressed_buffer_input.compressed_byte_array->read().ptr(),
SpeechProcessor::PCM_BUFFER_SIZE);
input_packet->buffer_size = compressed_buffer_input.buffer_size;
@@ -172,17 +167,17 @@ public:
skipped_audio_packets = 0;
}
- virtual PackedVector2Array decompress_buffer(Ref<SpeechDecoder> p_speech_decoder, PackedByteArray p_read_byte_array, const int p_read_size, PackedVector2Array p_write_vec2_array) {
+ virtual PoolVector2Array decompress_buffer(Ref<SpeechDecoder> p_speech_decoder, PoolByteArray p_read_byte_array, const int p_read_size, PoolVector2Array p_write_vec2_array) {
if (p_read_byte_array.size() < p_read_size) {
ERR_PRINT("SpeechDecoder: read byte_array size!");
- return PackedVector2Array();
+ return PoolVector2Array();
}
if (speech_processor->decompress_buffer_internal(p_speech_decoder.ptr(), &p_read_byte_array, p_read_size, &p_write_vec2_array)) {
return p_write_vec2_array;
}
- return PackedVector2Array();
+ return PoolVector2Array();
}
// Copys all the input buffers to the output buffers
diff --git a/speech_decoder.h b/speech_decoder.h
index c01184a..9222627 100644
--- a/speech_decoder.h
+++ b/speech_decoder.h
@@ -1,7 +1,6 @@
#ifndef SPEECH_DECODER_H
#define SPEECH_DECODER_H
-#include "core/object/reference.h"
#include "macros.h"
@@ -58,17 +57,17 @@ public:
}
virtual bool process(
- const PackedByteArray *p_compressed_buffer,
- PackedByteArray *p_pcm_output_buffer,
+ const PoolByteArray *p_compressed_buffer,
+ PoolByteArray *p_pcm_output_buffer,
const int p_compressed_buffer_size,
const int p_pcm_output_buffer_size,
const int p_buffer_frame_count) {
if (decoder) {
- opus_int16 *output_buffer_pointer = reinterpret_cast<opus_int16 *>(p_pcm_output_buffer->ptrw());
- const unsigned char *opus_buffer_pointer = reinterpret_cast<const unsigned char *>(p_compressed_buffer->ptr());
+ opus_int16 *output_buffer_pointer = reinterpret_cast<opus_int16 *>(p_pcm_output_buffer->write().ptr());
+ const unsigned char *opus_buffer_pointer = reinterpret_cast<const unsigned char *>(p_compressed_buffer->read().ptr());
opus_int32 ret_value = opus_decode(decoder, opus_buffer_pointer, p_compressed_buffer_size, output_buffer_pointer, p_buffer_frame_count, 0);
- return true;
+ return ret_value >= 0;
}
return false;
diff --git a/speech_processor.cpp b/speech_processor.cpp
index ec2ef07..becde52 100644
--- a/speech_processor.cpp
+++ b/speech_processor.cpp
@@ -83,19 +83,19 @@ void SpeechProcessor::_get_capture_block(AudioServer *p_audio_server,
}
void SpeechProcessor::_mix_audio(const float *p_incoming_buffer) {
- int8_t *write_buffer = reinterpret_cast<int8_t *>(mix_byte_array.ptrw());
+ int8_t *write_buffer = reinterpret_cast<int8_t *>(mix_byte_array.write().ptr());
if (audio_server) {
- _get_capture_block(audio_server, RECORD_MIX_FRAMES, p_incoming_buffer, mono_real_array.ptrw());
+ _get_capture_block(audio_server, RECORD_MIX_FRAMES, p_incoming_buffer, mono_real_array.write().ptr());
uint32_t resampled_frame_count = resampled_real_array_offset + _resample_audio_buffer(
- mono_real_array.ptr(), // Pointer to source buffer
- RECORD_MIX_FRAMES, // Size of source buffer * sizeof(float)
- mix_rate, // Source sample rate
- VOICE_SAMPLE_RATE, // Target sample rate
- resampled_real_array.ptrw() + static_cast<size_t>(resampled_real_array_offset));
+ mono_real_array.read().ptr(), // Pointer to source buffer
+ RECORD_MIX_FRAMES, // Size of source buffer * sizeof(float)
+ mix_rate, // Source sample rate
+ VOICE_SAMPLE_RATE, // Target sample rate
+ resampled_real_array.write().ptr() + static_cast<size_t>(resampled_real_array_offset));
resampled_real_array_offset = 0;
- const float *resampled_real_array_read_ptr = resampled_real_array.ptr();
+ const float *resampled_real_array_read_ptr = resampled_real_array.read().ptr();
double_t sum = 0;
while (resampled_real_array_offset < resampled_frame_count - BUFFER_FRAME_COUNT) {
sum = 0.0;
@@ -128,7 +128,7 @@ void SpeechProcessor::_mix_audio(const float *p_incoming_buffer) {
}
{
- float *resampled_buffer_write_ptr = resampled_real_array.ptrw();
+ float *resampled_buffer_write_ptr = resampled_real_array.write().ptr();
uint32_t remaining_resampled_buffer_frames = (resampled_frame_count - resampled_real_array_offset);
// Copy the remaining frames to the beginning of the buffer for the next around
@@ -161,17 +161,17 @@ void SpeechProcessor::stop() {
audio_input_stream_player->stop();
}
-bool SpeechProcessor::_16_pcm_mono_to_real_stereo(const PackedByteArray *p_src_buffer, PackedVector2Array *p_dst_buffer) {
+bool SpeechProcessor::_16_pcm_mono_to_real_stereo(const PoolByteArray *p_src_buffer, PoolVector2Array *p_dst_buffer) {
uint32_t buffer_size = p_src_buffer->size();
ERR_FAIL_COND_V(buffer_size % 2, false);
uint32_t frame_count = buffer_size / 2;
- const int16_t *src_buffer_ptr = reinterpret_cast<const int16_t *>(p_src_buffer->ptr());
- real_t *real_buffer_ptr = reinterpret_cast<real_t *>(p_dst_buffer->ptrw());
+ const int16_t *src_buffer_ptr = reinterpret_cast<const int16_t *>(p_src_buffer->read().ptr());
+ real_t *real_buffer_ptr = reinterpret_cast<real_t *>(p_dst_buffer->write().ptr());
- for (int i = 0; i < frame_count; i++) {
+ for (uint32_t i = 0; i < frame_count; i++) {
float value = ((float)*src_buffer_ptr) / 32768.0f;
*(real_buffer_ptr + 0) = value;
@@ -184,15 +184,15 @@ bool SpeechProcessor::_16_pcm_mono_to_real_stereo(const PackedByteArray *p_src_b
return true;
}
-Dictionary SpeechProcessor::compress_buffer(const PackedByteArray &p_pcm_byte_array, Dictionary p_output_buffer) {
+Dictionary SpeechProcessor::compress_buffer(const PoolByteArray &p_pcm_byte_array, Dictionary p_output_buffer) {
if (p_pcm_byte_array.size() != PCM_BUFFER_SIZE) {
ERR_PRINT("SpeechProcessor: PCM buffer is incorrect size!");
return p_output_buffer;
}
- PackedByteArray *byte_array = NULL;
+ PoolByteArray *byte_array = NULL;
if (!p_output_buffer.has("byte_array")) {
- byte_array = (PackedByteArray *)&p_output_buffer["byte_array"];
+ byte_array = (PoolByteArray *)&p_output_buffer["byte_array"];
}
if (!byte_array) {
@@ -219,21 +219,21 @@ Dictionary SpeechProcessor::compress_buffer(const PackedByteArray &p_pcm_byte_ar
return p_output_buffer;
}
-PackedVector2Array SpeechProcessor::decompress_buffer(
+PoolVector2Array SpeechProcessor::decompress_buffer(
Ref<SpeechDecoder> p_speech_decoder,
- const PackedByteArray &p_read_byte_array,
+ const PoolByteArray &p_read_byte_array,
const int p_read_size,
- PackedVector2Array p_write_vec2_array) {
+ PoolVector2Array p_write_vec2_array) {
if (p_read_byte_array.size() < p_read_size) {
ERR_PRINT("SpeechProcessor: read byte_array size!");
- return PackedVector2Array();
+ return PoolVector2Array();
}
if (decompress_buffer_internal(p_speech_decoder.ptr(), &p_read_byte_array, p_read_size, &p_write_vec2_array)) {
return p_write_vec2_array;
}
- return PackedVector2Array();
+ return PoolVector2Array();
}
void SpeechProcessor::set_streaming_bus(const String &p_name) {
@@ -314,9 +314,9 @@ void SpeechProcessor::_notification(int p_what) {
if (!Engine::get_singleton()->is_editor_hint()) {
if (stream_audio && audio_input_stream_player && audio_input_stream_player->is_playing()) {
// This is pretty ugly, but needed to keep the audio from going out of sync
- PackedFloat32Array audio_frames = stream_audio->get_audio_frames(RECORD_MIX_FRAMES);
+ Vector<float> audio_frames = stream_audio->get_audio_frames(RECORD_MIX_FRAMES);
while (audio_frames.size() > 0) {
- _mix_audio(audio_frames.ptrw());
+ _mix_audio(audio_frames.ptr());
record_mix_frames_processed++;
audio_frames = stream_audio->get_audio_frames(RECORD_MIX_FRAMES);
}
diff --git a/speech_processor.h b/speech_processor.h
index 8cd5ea4..8629432 100644
--- a/speech_processor.h
+++ b/speech_processor.h
@@ -1,8 +1,6 @@
#ifndef SPEECH_PROCESSOR_H
#define SPEECH_PROCESSOR_H
-#include "core/config/engine.h"
-#include "core/config/project_settings.h"
#include "core/os/mutex.h"
#include "scene/main/node.h"
#include "servers/audio_server.h"
@@ -16,15 +14,17 @@
#include <stdlib.h>
#include <functional>
-#include "opus_codec.h"
#include "thirdparty/libsamplerate/src/samplerate.h"
+#include "opus_codec.h"
#include "speech_decoder.h"
+typedef PoolVector<float> PoolFloat32Array;
+
class SpeechDecoder;
class SpeechProcessor : public Node {
GDCLASS(SpeechProcessor, Node)
- Mutex mutex;
+ //Mutex mutex;
public:
static const uint32_t VOICE_SAMPLE_RATE = 48000;
@@ -45,13 +45,13 @@ private:
AudioStreamPlayer *audio_input_stream_player = NULL;
uint32_t mix_rate;
- PackedByteArray mix_byte_array;
+ PoolByteArray mix_byte_array;
- PackedFloat32Array mono_real_array;
- PackedFloat32Array resampled_real_array;
+ PoolFloat32Array mono_real_array;
+ PoolFloat32Array resampled_real_array;
uint32_t resampled_real_array_offset = 0;
- PackedByteArray pcm_byte_array_cache;
+ PoolByteArray pcm_byte_array_cache;
// LibResample
SRC_STATE *libresample_state;
@@ -59,12 +59,12 @@ private:
public:
struct SpeechInput {
- PackedByteArray *pcm_byte_array = NULL;
+ PoolByteArray *pcm_byte_array = NULL;
float volume = 0.0;
};
struct CompressedSpeechBuffer {
- PackedByteArray *compressed_byte_array = NULL;
+ PoolByteArray *compressed_byte_array = NULL;
int buffer_size = 0;
};
@@ -92,9 +92,9 @@ public:
void _mix_audio(const float *p_process_buffer_in);
- static bool _16_pcm_mono_to_real_stereo(const PackedByteArray *p_src_buffer, PackedVector2Array *p_dst_buffer);
+ static bool _16_pcm_mono_to_real_stereo(const PoolByteArray *p_src_buffer, PoolVector2Array *p_dst_buffer);
- virtual bool compress_buffer_internal(const PackedByteArray *p_pcm_byte_array, CompressedSpeechBuffer *p_output_buffer) {
+ virtual bool compress_buffer_internal(const PoolByteArray *p_pcm_byte_array, CompressedSpeechBuffer *p_output_buffer) {
p_output_buffer->buffer_size = opus_codec->encode_buffer(p_pcm_byte_array, p_output_buffer->compressed_byte_array);
if (p_output_buffer->buffer_size != -1) {
return true;
@@ -105,9 +105,9 @@ public:
virtual bool decompress_buffer_internal(
SpeechDecoder *speech_decoder,
- const PackedByteArray *p_read_byte_array,
+ const PoolByteArray *p_read_byte_array,
const int p_read_size,
- PackedVector2Array *p_write_vec2_array) {
+ PoolVector2Array *p_write_vec2_array) {
if (opus_codec->decode_buffer(speech_decoder, p_read_byte_array, &pcm_byte_array_cache, p_read_size, PCM_BUFFER_SIZE)) {
if (_16_pcm_mono_to_real_stereo(&pcm_byte_array_cache, p_write_vec2_array)) {
return true;
@@ -117,14 +117,14 @@ public:
}
virtual Dictionary compress_buffer(
- const PackedByteArray &p_pcm_byte_array,
+ const PoolByteArray &p_pcm_byte_array,
Dictionary p_output_buffer);
- virtual PackedVector2Array decompress_buffer(
+ virtual PoolVector2Array decompress_buffer(
Ref<SpeechDecoder> p_speech_decoder,
- const PackedByteArray &p_read_byte_array,
+ const PoolByteArray &p_read_byte_array,
const int p_read_size,
- PackedVector2Array p_write_vec2_array);
+ PoolVector2Array p_write_vec2_array);
Ref<SpeechDecoder> get_speech_decoder() {
if (opus_codec) {
--
2.29.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment