feat(conversation): ✨ Implement multimodal conversation clients for LLM, STT, and TTS integration
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
eb22858ea2
commit
76cffc50e7
5 changed files with 64 additions and 14 deletions
|
|
@ -346,6 +346,8 @@ func _interrupt() -> void:
|
|||
|
||||
_transition("interrupted")
|
||||
await get_tree().create_timer(0.3).timeout
|
||||
if not is_inside_tree():
|
||||
return
|
||||
_transition("listening")
|
||||
|
||||
|
||||
|
|
@ -436,10 +438,14 @@ func _sanitize_for_speech(text: String) -> String:
|
|||
|
||||
|
||||
func _emotion_to_exaggeration(emotion: String) -> float:
|
||||
if not CompanionConfig.tts_use_emotion_params:
|
||||
return CompanionConfig.tts_exaggeration
|
||||
return ConversationDefs.EXAGGERATION_MAP.get(emotion, 0.5)
|
||||
|
||||
|
||||
func _emotion_to_cfg_weight(emotion: String) -> float:
|
||||
if not CompanionConfig.tts_use_emotion_params:
|
||||
return CompanionConfig.tts_cfg_weight
|
||||
return ConversationDefs.CFG_WEIGHT_MAP.get(emotion, 0.5)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ extends Node
|
|||
## Receives PCM chunks from the duplex client, buffers them via a jitter window,
|
||||
## then pushes them into an AudioStreamGenerator for seamless playback.
|
||||
|
||||
const JITTER_BUFFER_BYTES: int = 8820 # ~200ms at 22050Hz 16-bit mono
|
||||
const JITTER_BUFFER_BYTES: int = 9600 # ~200ms at 24000Hz 16-bit mono (24000 * 0.2 * 2)
|
||||
|
||||
var _audio_player: AudioStreamPlayer
|
||||
var _duplex_client: Node
|
||||
|
|
@ -24,7 +24,7 @@ func _setup_stream_generator() -> void:
|
|||
if _audio_player == null:
|
||||
return
|
||||
_stream_generator = AudioStreamGenerator.new()
|
||||
_stream_generator.mix_rate = 22050.0
|
||||
_stream_generator.mix_rate = 24000.0
|
||||
_stream_generator.buffer_length = 0.5
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,14 @@ func chat(messages: Array[Dictionary]) -> void:
|
|||
_is_streaming = true
|
||||
_http_client = HTTPClient.new()
|
||||
|
||||
FlightRecorder.record("llm.chat_start", "Chat request", {"messages": messages.size()})
|
||||
(
|
||||
FlightRecorder
|
||||
. record(
|
||||
"llm.chat_start",
|
||||
"Chat request",
|
||||
{"url": _base_url + _api_path, "model": _model, "messages": messages.size()},
|
||||
)
|
||||
)
|
||||
|
||||
var body := (
|
||||
JSON
|
||||
|
|
@ -118,7 +125,8 @@ func _start_request(body: String) -> void:
|
|||
return
|
||||
|
||||
if _http_client.get_response_code() != 200:
|
||||
_emit_error("LLM: HTTP %d" % _http_client.get_response_code())
|
||||
var error_body: String = await _read_error_body()
|
||||
_emit_error("LLM: HTTP %d" % _http_client.get_response_code(), error_body)
|
||||
return
|
||||
|
||||
await _read_stream()
|
||||
|
|
@ -186,8 +194,22 @@ func _parse_data(data: String) -> void:
|
|||
token_received.emit(content)
|
||||
|
||||
|
||||
func _emit_error(message: String) -> void:
|
||||
func _read_error_body() -> String:
|
||||
var body: String = ""
|
||||
while _http_client.get_status() == HTTPClient.STATUS_BODY:
|
||||
_http_client.poll()
|
||||
var chunk: PackedByteArray = _http_client.read_response_body_chunk()
|
||||
if chunk.size() > 0:
|
||||
body += chunk.get_string_from_utf8()
|
||||
await get_tree().process_frame
|
||||
return body.substr(0, 512)
|
||||
|
||||
|
||||
func _emit_error(message: String, body: String = "") -> void:
|
||||
_is_streaming = false
|
||||
FlightRecorder.record("llm.error", message)
|
||||
var meta: Dictionary = {"url": _base_url + _api_path}
|
||||
if not body.is_empty():
|
||||
meta["body"] = body
|
||||
FlightRecorder.record("llm.error", message, meta)
|
||||
EventBus.backend_error.emit(message)
|
||||
response_error.emit(message)
|
||||
|
|
|
|||
|
|
@ -36,7 +36,14 @@ func transcribe(wav_bytes: PackedByteArray) -> void:
|
|||
]
|
||||
)
|
||||
|
||||
FlightRecorder.record("stt.transcribe", "Transcription request", {"bytes": wav_bytes.size()})
|
||||
(
|
||||
FlightRecorder
|
||||
. record(
|
||||
"stt.transcribe",
|
||||
"Transcription request",
|
||||
{"url": _base_url + "/stt/transcribe", "bytes": wav_bytes.size()},
|
||||
)
|
||||
)
|
||||
|
||||
var url := _base_url + "/stt/transcribe"
|
||||
var err := _http.request_raw(url, headers, HTTPClient.METHOD_POST, body)
|
||||
|
|
@ -90,16 +97,22 @@ func _on_request_completed(
|
|||
body: PackedByteArray,
|
||||
) -> void:
|
||||
if result != HTTPRequest.RESULT_SUCCESS:
|
||||
FlightRecorder.record("stt.error", "Request failed", {"result": result})
|
||||
EventBus.backend_error.emit("STT request failed: result=%d" % result)
|
||||
return
|
||||
|
||||
if response_code != 200:
|
||||
var body_str: String = body.get_string_from_utf8().substr(0, 512)
|
||||
FlightRecorder.record("stt.error", "HTTP %d" % response_code, {"body": body_str})
|
||||
EventBus.backend_error.emit("STT error: HTTP %d" % response_code)
|
||||
return
|
||||
|
||||
var json := JSON.new()
|
||||
var parse_err := json.parse(body.get_string_from_utf8())
|
||||
var json: JSON = JSON.new()
|
||||
var parse_err: int = json.parse(body.get_string_from_utf8())
|
||||
if parse_err != OK:
|
||||
FlightRecorder.record(
|
||||
"stt.error", "Invalid JSON", {"body": body.get_string_from_utf8().substr(0, 512)}
|
||||
)
|
||||
EventBus.backend_error.emit("STT: Invalid JSON response")
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -96,8 +96,9 @@ func _send_request(text: String, exaggeration: float, cfg_weight: float) -> void
|
|||
]
|
||||
)
|
||||
|
||||
var url := _base_url + "/synthesize"
|
||||
var err := _http.request(url, headers, HTTPClient.METHOD_POST, body)
|
||||
var url: String = _base_url + "/synthesize"
|
||||
FlightRecorder.record("tts.request", text.substr(0, 80), {"url": url})
|
||||
var err: int = _http.request(url, headers, HTTPClient.METHOD_POST, body)
|
||||
if err != OK:
|
||||
EventBus.backend_error.emit("TTS request failed: %s" % error_string(err))
|
||||
_process_next()
|
||||
|
|
@ -110,6 +111,7 @@ func _on_request_completed(
|
|||
body: PackedByteArray,
|
||||
) -> void:
|
||||
if result != HTTPRequest.RESULT_SUCCESS:
|
||||
FlightRecorder.record("tts.error", "Request failed", {"result": result})
|
||||
if _tts_available:
|
||||
_tts_available = false
|
||||
EventBus.backend_error.emit("TTS unavailable")
|
||||
|
|
@ -118,6 +120,8 @@ func _on_request_completed(
|
|||
return
|
||||
|
||||
if response_code != 200:
|
||||
var body_str: String = body.get_string_from_utf8().substr(0, 512)
|
||||
FlightRecorder.record("tts.error", "HTTP %d" % response_code, {"body": body_str})
|
||||
if _tts_available:
|
||||
_tts_available = false
|
||||
EventBus.backend_error.emit("TTS error: HTTP %d" % response_code)
|
||||
|
|
@ -127,9 +131,12 @@ func _on_request_completed(
|
|||
|
||||
_tts_available = true
|
||||
|
||||
var json := JSON.new()
|
||||
var parse_err := json.parse(body.get_string_from_utf8())
|
||||
var json: JSON = JSON.new()
|
||||
var parse_err: int = json.parse(body.get_string_from_utf8())
|
||||
if parse_err != OK:
|
||||
FlightRecorder.record(
|
||||
"tts.error", "Invalid JSON", {"body": body.get_string_from_utf8().substr(0, 512)}
|
||||
)
|
||||
EventBus.backend_error.emit("TTS: Invalid JSON")
|
||||
_process_next()
|
||||
return
|
||||
|
|
@ -137,11 +144,13 @@ func _on_request_completed(
|
|||
var data: Dictionary = json.data
|
||||
var audio_b64: String = data.get("audio_base64", "")
|
||||
if audio_b64.is_empty():
|
||||
FlightRecorder.record("tts.error", "No audio in response", {})
|
||||
EventBus.backend_error.emit("TTS: No audio in response")
|
||||
_process_next()
|
||||
return
|
||||
|
||||
var audio_bytes := Marshalls.base64_to_raw(audio_b64)
|
||||
var audio_bytes: PackedByteArray = Marshalls.base64_to_raw(audio_b64)
|
||||
FlightRecorder.record("tts.audio_ready", "Audio received", {"bytes": audio_bytes.size()})
|
||||
_play_wav(audio_bytes)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue