Alina Lozovskaya commited on
Commit
24681aa
·
1 Parent(s): ce440e3

Simplify tool call handling and fix base64 image encoding

Browse files
src/reachy_mini_conversation_demo/openai_realtime.py CHANGED
@@ -36,8 +36,6 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
36
  self.connection = None
37
  self.output_queue = asyncio.Queue()
38
 
39
- self._pending_calls: dict[str, dict] = {}
40
-
41
  self.last_activity_time = asyncio.get_event_loop().time()
42
  self.start_time = asyncio.get_event_loop().time()
43
  self.is_idle_tool_call = False
@@ -115,33 +113,10 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
115
  )
116
 
117
  # ---- tool-calling plumbing ----
118
- # 1) model announces a function call item; capture name + call_id
119
- if event.type == "response.output_item.added":
120
- item = getattr(event, "item", None)
121
- if item and getattr(item, "type", "") == "function_call":
122
- call_id = getattr(item, "call_id", None)
123
- name = getattr(item, "name", None)
124
- if call_id and name:
125
- self._pending_calls[call_id] = {
126
- "name": name,
127
- "args_buf": "",
128
- }
129
-
130
- # 2) model streams JSON arguments; buffer them by call_id
131
- if event.type == "response.function_call_arguments.delta":
132
- call_id = getattr(event, "call_id", None)
133
- delta = getattr(event, "delta", "")
134
- if call_id in self._pending_calls:
135
- self._pending_calls[call_id]["args_buf"] += delta
136
-
137
- # 3) when args done, execute Python tool, send function_call_output, then trigger a new response
138
  if event.type == "response.function_call_arguments.done":
 
 
139
  call_id = getattr(event, "call_id", None)
140
- tool_call_info = self._pending_calls.get(call_id)
141
- if not tool_call_info:
142
- continue
143
- tool_name = tool_call_info["name"]
144
- args_json_str = tool_call_info["args_buf"] or "{}"
145
 
146
  try:
147
  tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
@@ -171,7 +146,11 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
171
  )
172
 
173
  if tool_name == "camera" and "b64_im" in tool_result:
174
- b64_im = json.dumps(tool_result["b64_im"])
 
 
 
 
175
  await self.connection.conversation.item.create(
176
  item={
177
  "type": "message",
@@ -209,8 +188,6 @@ class OpenaiRealtimeHandler(AsyncStreamHandler):
209
 
210
  # re synchronize the head wobble after a tool call that may have taken some time
211
  self.deps.head_wobbler.reset()
212
- # cleanup
213
- self._pending_calls.pop(call_id, None)
214
 
215
  # server error
216
  if event.type == "error":
 
36
  self.connection = None
37
  self.output_queue = asyncio.Queue()
38
 
 
 
39
  self.last_activity_time = asyncio.get_event_loop().time()
40
  self.start_time = asyncio.get_event_loop().time()
41
  self.is_idle_tool_call = False
 
113
  )
114
 
115
  # ---- tool-calling plumbing ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  if event.type == "response.function_call_arguments.done":
117
+ tool_name = getattr(event, "name", None)
118
+ args_json_str = getattr(event, "arguments", None)
119
  call_id = getattr(event, "call_id", None)
 
 
 
 
 
120
 
121
  try:
122
  tool_result = await dispatch_tool_call(tool_name, args_json_str, self.deps)
 
146
  )
147
 
148
  if tool_name == "camera" and "b64_im" in tool_result:
149
+ # use raw base64, don't json.dumps (which adds quotes)
150
+ b64_im = tool_result["b64_im"]
151
+ if not isinstance(b64_im, str):
152
+ logger.warning("Unexpected type for b64_im: %s", type(b64_im))
153
+ b64_im = str(b64_im)
154
  await self.connection.conversation.item.create(
155
  item={
156
  "type": "message",
 
188
 
189
  # re synchronize the head wobble after a tool call that may have taken some time
190
  self.deps.head_wobbler.reset()
 
 
191
 
192
  # server error
193
  if event.type == "error":