Alina Lozovskaya commited on
Commit
15911c8
·
1 Parent(s): 24681aa

Add --local-vision flag for optional local vision processing

Browse files
src/reachy_mini_conversation_demo/utils.py CHANGED
@@ -16,13 +16,18 @@ def parse_args():
16
  help="Choose head tracker (default: None)",
17
  )
18
  parser.add_argument("--no-camera", default=False, action="store_true", help="Disable camera usage")
 
19
  parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
20
  parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
21
  return parser.parse_args()
22
 
23
 
24
  def handle_vision_stuff(args, current_robot):
25
- """Initialize camera, head tracker, camera worker, and vision manager."""
 
 
 
 
26
  camera_worker = None
27
  head_tracker = None
28
  vision_manager = None
@@ -40,8 +45,11 @@ def handle_vision_stuff(args, current_robot):
40
  # Initialize camera worker
41
  camera_worker = CameraWorker(current_robot, head_tracker)
42
 
43
- # Initialize vision manager (handles model download and configuration)
44
- vision_manager = initialize_vision_manager(camera_worker)
 
 
 
45
 
46
  return camera_worker, head_tracker, vision_manager
47
 
 
16
  help="Choose head tracker (default: None)",
17
  )
18
  parser.add_argument("--no-camera", default=False, action="store_true", help="Disable camera usage")
19
+ parser.add_argument("--local-vision", default=False, action="store_true", help="Use local vision model instead of gpt-realtime vision")
20
  parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
21
  parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
22
  return parser.parse_args()
23
 
24
 
25
  def handle_vision_stuff(args, current_robot):
26
+ """Initialize camera, head tracker, camera worker, and vision manager.
27
+
28
+ By default, vision is handled by gpt-realtime model when camera tool is used.
29
+ If --local-vision flag is used, a local vision model will process images periodically.
30
+ """
31
  camera_worker = None
32
  head_tracker = None
33
  vision_manager = None
 
45
  # Initialize camera worker
46
  camera_worker = CameraWorker(current_robot, head_tracker)
47
 
48
+ # Initialize vision manager only if local vision is requested
49
+ if args.local_vision:
50
+ vision_manager = initialize_vision_manager(camera_worker)
51
+ else:
52
+ logging.getLogger(__name__).info("Using gpt-realtime for vision (default). Use --local-vision for local processing.")
53
 
54
  return camera_worker, head_tracker, vision_manager
55