{
  "act_attn_layers": null,
  "act_attn_loss_weights": null,
  "act_attn_sigma": 25.0,
  "chunk_size": 33,
  "dim_feedforward": 3200,
  "dim_gaze_decoder": 512,
  "dim_gaze_decoder_feedforward": 3200,
  "dim_model": 512,
  "dinov2_attn_layers": null,
  "dinov2_attn_loss_weights": null,
  "dinov2_attn_sigma": 25.0,
  "dropout": 0.1,
  "eyes": {
    "observation.left_eye": "observation.images.zed_cam_left"
  },
  "feedforward_activation": "relu",
  "freeze_backbone": false,
  "gaze_loss_weight": 1.0,
  "gaze_sigma": 50.0,
  "image_size": [
    480,
    640
  ],
  "input_normalization_modes": {
    "observation.images.left_eye_cam": "mean_std",
    "observation.images.right_eye_cam": "mean_std",
    "observation.state": "mean_std"
  },
  "input_shapes": {
    "observation.images.left_eye_cam": [
      3,
      480,
      640
    ],
    "observation.images.right_eye_cam": [
      3,
      480,
      640
    ],
    "observation.state": [
      21
    ]
  },
  "kl_weight": 10.0,
  "latent_dim": 32,
  "n_action_steps": 33,
  "n_decoder_layers": 1,
  "n_encoder_layers": 4,
  "n_gaze_decoder_layers": 1,
  "n_heads": 8,
  "n_obs_steps": 1,
  "n_vae_encoder_layers": 4,
  "output_normalization_modes": {
    "action": "mean_std"
  },
  "output_shapes": {
    "action": [
      21
    ]
  },
  "pre_norm": false,
  "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
  "replace_final_stride_with_dilation": false,
  "temporal_ensemble_coeff": null,
  "use_act_attn": false,
  "use_dinov2_attn": false,
  "use_gaze": false,
  "use_vae": true,
  "vision_backbone": "resnet18"
}