diff --git a/shared/godot/conversation/conversation_orchestrator.gd b/shared/godot/conversation/conversation_orchestrator.gd index 8fcbecb..6adccec 100644 --- a/shared/godot/conversation/conversation_orchestrator.gd +++ b/shared/godot/conversation/conversation_orchestrator.gd @@ -346,6 +346,8 @@ func _interrupt() -> void: _transition("interrupted") await get_tree().create_timer(0.3).timeout + if not is_inside_tree(): + return _transition("listening") @@ -436,10 +438,14 @@ func _sanitize_for_speech(text: String) -> String: func _emotion_to_exaggeration(emotion: String) -> float: + if not CompanionConfig.tts_use_emotion_params: + return CompanionConfig.tts_exaggeration return ConversationDefs.EXAGGERATION_MAP.get(emotion, 0.5) func _emotion_to_cfg_weight(emotion: String) -> float: + if not CompanionConfig.tts_use_emotion_params: + return CompanionConfig.tts_cfg_weight return ConversationDefs.CFG_WEIGHT_MAP.get(emotion, 0.5) diff --git a/shared/godot/conversation/conversation_stream_player.gd b/shared/godot/conversation/conversation_stream_player.gd index 2a7b2f7..567fd00 100644 --- a/shared/godot/conversation/conversation_stream_player.gd +++ b/shared/godot/conversation/conversation_stream_player.gd @@ -3,7 +3,7 @@ extends Node ## Receives PCM chunks from the duplex client, buffers them via a jitter window, ## then pushes them into an AudioStreamGenerator for seamless playback. -const JITTER_BUFFER_BYTES: int = 8820 # ~200ms at 22050Hz 16-bit mono +const JITTER_BUFFER_BYTES: int = 9600 # ~200ms at 24000Hz 16-bit mono (24000 * 0.2 * 2) var _audio_player: AudioStreamPlayer var _duplex_client: Node @@ -24,7 +24,7 @@ func _setup_stream_generator() -> void: if _audio_player == null: return _stream_generator = AudioStreamGenerator.new() - _stream_generator.mix_rate = 22050.0 + _stream_generator.mix_rate = 24000.0 _stream_generator.buffer_length = 0.5 diff --git a/shared/godot/conversation/llm_client.gd b/shared/godot/conversation/llm_client.gd index 15b661a..6716dfc 100644 --- a/shared/godot/conversation/llm_client.gd +++ b/shared/godot/conversation/llm_client.gd @@ -50,7 +50,14 @@ func chat(messages: Array[Dictionary]) -> void: _is_streaming = true _http_client = HTTPClient.new() - FlightRecorder.record("llm.chat_start", "Chat request", {"messages": messages.size()}) + ( + FlightRecorder + . record( + "llm.chat_start", + "Chat request", + {"url": _base_url + _api_path, "model": _model, "messages": messages.size()}, + ) + ) var body := ( JSON @@ -118,7 +125,8 @@ func _start_request(body: String) -> void: return if _http_client.get_response_code() != 200: - _emit_error("LLM: HTTP %d" % _http_client.get_response_code()) + var error_body: String = await _read_error_body() + _emit_error("LLM: HTTP %d" % _http_client.get_response_code(), error_body) return await _read_stream() @@ -186,8 +194,22 @@ func _parse_data(data: String) -> void: token_received.emit(content) -func _emit_error(message: String) -> void: +func _read_error_body() -> String: + var body: String = "" + while _http_client.get_status() == HTTPClient.STATUS_BODY: + _http_client.poll() + var chunk: PackedByteArray = _http_client.read_response_body_chunk() + if chunk.size() > 0: + body += chunk.get_string_from_utf8() + await get_tree().process_frame + return body.substr(0, 512) + + +func _emit_error(message: String, body: String = "") -> void: _is_streaming = false - FlightRecorder.record("llm.error", message) + var meta: Dictionary = {"url": _base_url + _api_path} + if not body.is_empty(): + meta["body"] = body + FlightRecorder.record("llm.error", message, meta) EventBus.backend_error.emit(message) response_error.emit(message) diff --git a/shared/godot/conversation/stt_client.gd b/shared/godot/conversation/stt_client.gd index db568d0..02e86b1 100644 --- a/shared/godot/conversation/stt_client.gd +++ b/shared/godot/conversation/stt_client.gd @@ -36,7 +36,14 @@ func transcribe(wav_bytes: PackedByteArray) -> void: ] ) - FlightRecorder.record("stt.transcribe", "Transcription request", {"bytes": wav_bytes.size()}) + ( + FlightRecorder + . record( + "stt.transcribe", + "Transcription request", + {"url": _base_url + "/stt/transcribe", "bytes": wav_bytes.size()}, + ) + ) var url := _base_url + "/stt/transcribe" var err := _http.request_raw(url, headers, HTTPClient.METHOD_POST, body) @@ -90,16 +97,22 @@ func _on_request_completed( body: PackedByteArray, ) -> void: if result != HTTPRequest.RESULT_SUCCESS: + FlightRecorder.record("stt.error", "Request failed", {"result": result}) EventBus.backend_error.emit("STT request failed: result=%d" % result) return if response_code != 200: + var body_str: String = body.get_string_from_utf8().substr(0, 512) + FlightRecorder.record("stt.error", "HTTP %d" % response_code, {"body": body_str}) EventBus.backend_error.emit("STT error: HTTP %d" % response_code) return - var json := JSON.new() - var parse_err := json.parse(body.get_string_from_utf8()) + var json: JSON = JSON.new() + var parse_err: int = json.parse(body.get_string_from_utf8()) if parse_err != OK: + FlightRecorder.record( + "stt.error", "Invalid JSON", {"body": body.get_string_from_utf8().substr(0, 512)} + ) EventBus.backend_error.emit("STT: Invalid JSON response") return diff --git a/shared/godot/conversation/tts_client.gd b/shared/godot/conversation/tts_client.gd index 10bcf69..7c30df2 100644 --- a/shared/godot/conversation/tts_client.gd +++ b/shared/godot/conversation/tts_client.gd @@ -96,8 +96,9 @@ func _send_request(text: String, exaggeration: float, cfg_weight: float) -> void ] ) - var url := _base_url + "/synthesize" - var err := _http.request(url, headers, HTTPClient.METHOD_POST, body) + var url: String = _base_url + "/synthesize" + FlightRecorder.record("tts.request", text.substr(0, 80), {"url": url}) + var err: int = _http.request(url, headers, HTTPClient.METHOD_POST, body) if err != OK: EventBus.backend_error.emit("TTS request failed: %s" % error_string(err)) _process_next() @@ -110,6 +111,7 @@ func _on_request_completed( body: PackedByteArray, ) -> void: if result != HTTPRequest.RESULT_SUCCESS: + FlightRecorder.record("tts.error", "Request failed", {"result": result}) if _tts_available: _tts_available = false EventBus.backend_error.emit("TTS unavailable") @@ -118,6 +120,8 @@ func _on_request_completed( return if response_code != 200: + var body_str: String = body.get_string_from_utf8().substr(0, 512) + FlightRecorder.record("tts.error", "HTTP %d" % response_code, {"body": body_str}) if _tts_available: _tts_available = false EventBus.backend_error.emit("TTS error: HTTP %d" % response_code) @@ -127,9 +131,12 @@ func _on_request_completed( _tts_available = true - var json := JSON.new() - var parse_err := json.parse(body.get_string_from_utf8()) + var json: JSON = JSON.new() + var parse_err: int = json.parse(body.get_string_from_utf8()) if parse_err != OK: + FlightRecorder.record( + "tts.error", "Invalid JSON", {"body": body.get_string_from_utf8().substr(0, 512)} + ) EventBus.backend_error.emit("TTS: Invalid JSON") _process_next() return @@ -137,11 +144,13 @@ func _on_request_completed( var data: Dictionary = json.data var audio_b64: String = data.get("audio_base64", "") if audio_b64.is_empty(): + FlightRecorder.record("tts.error", "No audio in response", {}) EventBus.backend_error.emit("TTS: No audio in response") _process_next() return - var audio_bytes := Marshalls.base64_to_raw(audio_b64) + var audio_bytes: PackedByteArray = Marshalls.base64_to_raw(audio_b64) + FlightRecorder.record("tts.audio_ready", "Audio received", {"bytes": audio_bytes.size()}) _play_wav(audio_bytes)