Alina Lozovskaya
commited on
Commit
·
15911c8
1
Parent(s):
24681aa
Add --local-vision flag for optional local vision processing
Browse files
src/reachy_mini_conversation_demo/utils.py
CHANGED
|
@@ -16,13 +16,18 @@ def parse_args():
|
|
| 16 |
help="Choose head tracker (default: None)",
|
| 17 |
)
|
| 18 |
parser.add_argument("--no-camera", default=False, action="store_true", help="Disable camera usage")
|
|
|
|
| 19 |
parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
|
| 20 |
parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
|
| 21 |
return parser.parse_args()
|
| 22 |
|
| 23 |
|
| 24 |
def handle_vision_stuff(args, current_robot):
|
| 25 |
-
"""Initialize camera, head tracker, camera worker, and vision manager.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
camera_worker = None
|
| 27 |
head_tracker = None
|
| 28 |
vision_manager = None
|
|
@@ -40,8 +45,11 @@ def handle_vision_stuff(args, current_robot):
|
|
| 40 |
# Initialize camera worker
|
| 41 |
camera_worker = CameraWorker(current_robot, head_tracker)
|
| 42 |
|
| 43 |
-
# Initialize vision manager
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
return camera_worker, head_tracker, vision_manager
|
| 47 |
|
|
|
|
| 16 |
help="Choose head tracker (default: None)",
|
| 17 |
)
|
| 18 |
parser.add_argument("--no-camera", default=False, action="store_true", help="Disable camera usage")
|
| 19 |
+
parser.add_argument("--local-vision", default=False, action="store_true", help="Use local vision model instead of gpt-realtime vision")
|
| 20 |
parser.add_argument("--gradio", default=False, action="store_true", help="Open gradio interface")
|
| 21 |
parser.add_argument("--debug", default=False, action="store_true", help="Enable debug logging")
|
| 22 |
return parser.parse_args()
|
| 23 |
|
| 24 |
|
| 25 |
def handle_vision_stuff(args, current_robot):
|
| 26 |
+
"""Initialize camera, head tracker, camera worker, and vision manager.
|
| 27 |
+
|
| 28 |
+
By default, vision is handled by gpt-realtime model when camera tool is used.
|
| 29 |
+
If --local-vision flag is used, a local vision model will process images periodically.
|
| 30 |
+
"""
|
| 31 |
camera_worker = None
|
| 32 |
head_tracker = None
|
| 33 |
vision_manager = None
|
|
|
|
| 45 |
# Initialize camera worker
|
| 46 |
camera_worker = CameraWorker(current_robot, head_tracker)
|
| 47 |
|
| 48 |
+
# Initialize vision manager only if local vision is requested
|
| 49 |
+
if args.local_vision:
|
| 50 |
+
vision_manager = initialize_vision_manager(camera_worker)
|
| 51 |
+
else:
|
| 52 |
+
logging.getLogger(__name__).info("Using gpt-realtime for vision (default). Use --local-vision for local processing.")
|
| 53 |
|
| 54 |
return camera_worker, head_tracker, vision_manager
|
| 55 |
|