Alina Lozovskaya
commited on
Commit
·
24681aa
1
Parent(s):
ce440e3
Simplify tool call handling and fix base64 image encoding
Browse files
src/reachy_mini_conversation_demo/openai_realtime.py
CHANGED
|
@@ -36,8 +36,6 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 36 |
self.connection = None
|
| 37 |
self.output_queue = asyncio.Queue()
|
| 38 |
|
| 39 |
-
self._pending_calls: dict[str, dict] = {}
|
| 40 |
-
|
| 41 |
self.last_activity_time = asyncio.get_event_loop().time()
|
| 42 |
self.start_time = asyncio.get_event_loop().time()
|
| 43 |
self.is_idle_tool_call = False
|
|
@@ -115,33 +113,10 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 115 |
)
|
| 116 |
|
| 117 |
# ---- tool-calling plumbing ----
|
| 118 |
-
# 1) model announces a function call item; capture name + call_id
|
| 119 |
-
if event.type == "response.output_item.added":
|
| 120 |
-
item = getattr(event, "item", None)
|
| 121 |
-
if item and getattr(item, "type", "") == "function_call":
|
| 122 |
-
call_id = getattr(item, "call_id", None)
|
| 123 |
-
name = getattr(item, "name", None)
|
| 124 |
-
if call_id and name:
|
| 125 |
-
self._pending_calls[call_id] = {
|
| 126 |
-
"name": name,
|
| 127 |
-
"args_buf": "",
|
| 128 |
-
}
|
| 129 |
-
|
| 130 |
-
# 2) model streams JSON arguments; buffer them by call_id
|
| 131 |
-
if event.type == "response.function_call_arguments.delta":
|
| 132 |
-
call_id = getattr(event, "call_id", None)
|
| 133 |
-
delta = getattr(event, "delta", "")
|
| 134 |
-
if call_id in self._pending_calls:
|
| 135 |
-
self._pending_calls[call_id]["args_buf"] += delta
|
| 136 |
-
|
| 137 |
-
# 3) when args done, execute Python tool, send function_call_output, then trigger a new response
|
| 138 |
if event.type == "response.function_call_arguments.done":
|
|
|
|
|
|
|
| 139 |
call_id = getattr(event, "call_id", None)
|
| 140 |
-
tool_call_info = self._pending_calls.get(call_id)
|
| 141 |
-
if not tool_call_info:
|
| 142 |
-
continue
|
| 143 |
-
tool_name = tool_call_info["name"]
|
| 144 |
-
args_json_str = tool_call_info["args_buf"] or "{}"
|
| 145 |
|
| 146 |
try:
|
| 147 |
tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
|
|
@@ -171,7 +146,11 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 171 |
)
|
| 172 |
|
| 173 |
if tool_name == "camera" and "b64_im" in tool_result:
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
await self.connection.conversation.item.create(
|
| 176 |
item={
|
| 177 |
"type": "message",
|
|
@@ -209,8 +188,6 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
|
|
| 209 |
|
| 210 |
# re synchronize the head wobble after a tool call that may have taken some time
|
| 211 |
self.deps.head_wobbler.reset()
|
| 212 |
-
# cleanup
|
| 213 |
-
self._pending_calls.pop(call_id, None)
|
| 214 |
|
| 215 |
# server error
|
| 216 |
if event.type == "error":
|
|
|
|
| 36 |
self.connection = None
|
| 37 |
self.output_queue = asyncio.Queue()
|
| 38 |
|
|
|
|
|
|
|
| 39 |
self.last_activity_time = asyncio.get_event_loop().time()
|
| 40 |
self.start_time = asyncio.get_event_loop().time()
|
| 41 |
self.is_idle_tool_call = False
|
|
|
|
| 113 |
)
|
| 114 |
|
| 115 |
# ---- tool-calling plumbing ----
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
if event.type == "response.function_call_arguments.done":
|
| 117 |
+
tool_name = getattr(event, "name", None)
|
| 118 |
+
args_json_str = getattr(event, "arguments", None)
|
| 119 |
call_id = getattr(event, "call_id", None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
try:
|
| 122 |
tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
|
|
|
|
| 146 |
)
|
| 147 |
|
| 148 |
if tool_name == "camera" and "b64_im" in tool_result:
|
| 149 |
+
# use raw base64, don't json.dumps (which adds quotes)
|
| 150 |
+
b64_im = tool_result["b64_im"]
|
| 151 |
+
if not isinstance(b64_im, str):
|
| 152 |
+
logger.warning("Unexpected type for b64_im: %s", type(b64_im))
|
| 153 |
+
b64_im = str(b64_im)
|
| 154 |
await self.connection.conversation.item.create(
|
| 155 |
item={
|
| 156 |
"type": "message",
|
|
|
|
| 188 |
|
| 189 |
# re synchronize the head wobble after a tool call that may have taken some time
|
| 190 |
self.deps.head_wobbler.reset()
|
|
|
|
|
|
|
| 191 |
|
| 192 |
# server error
|
| 193 |
if event.type == "error":
|