ImaghT
/

reinforce-CartPole-v1

+# ============================================================
+# Unit 4: Upload REINFORCE CartPole Model to Hugging Face
+# ============================================================
+import gymnasium as gym
+import numpy as np
+import os
+import shutil
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from huggingface_hub import HfApi, create_repo
+from torch.distributions import Categorical
+# ============================================================
+# 配置参数（修改这里）
+# ============================================================
+USERNAME = "ImaghT"  # 替换为你的用户名
+MODEL_NAME = "reinforce-CartPole-v1"
+MODEL_FILE = "/home/eason/Workspace/Result_DRL/reinforce_cartpole.pth"  # 绝对路径
+ENV_ID = "CartPole-v1"
+N_EVAL_EPISODES = 100
+repo_id = f"{USERNAME}/{MODEL_NAME}"
+# ============================================================
+# 策略网络定义（与训练时相同）
+# ============================================================
+class Policy(nn.Module):
+    def __init__(self, s_size, a_size, h_size=128):
+        super(Policy, self).__init__()
+        self.fc1 = nn.Linear(s_size, h_size)
+        self.fc2 = nn.Linear(h_size, a_size)
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return F.softmax(x, dim=1)
+# ============================================================
+# 1. 加载训练好的模型
+# ============================================================
+print("Loading trained model...")
+if not os.path.exists(MODEL_FILE):
+    print(f"❌ Error: Model file '{MODEL_FILE}' not found!")
+    print("Please run the training script first to generate the model.")
+    exit(1)
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+checkpoint = torch.load(MODEL_FILE, map_location=device, weights_only=False)
+# 重建模型
+s_size = checkpoint['s_size']
+a_size = checkpoint['a_size']
+hidden_size = checkpoint['hidden_size']
+policy = Policy(s_size, a_size, hidden_size).to(device)
+policy.load_state_dict(checkpoint['policy_state_dict'])
+policy.eval()
+print(f"✅ Model loaded from {MODEL_FILE}")
+print(f"   State size: {s_size}, Action size: {a_size}")
+print(f"   Hidden size: {hidden_size}")
+# ============================================================
+# 2. 创建评估环境
+# ============================================================
+print("\nCreating evaluation environment...")
+eval_env = gym.make(ENV_ID)
+print(f"✅ Environment {ENV_ID} ready")
+# ============================================================
+# 3. 运行评估
+# ============================================================
+print("="*60)
+print(f"Starting Evaluation ({N_EVAL_EPISODES} episodes)...")
+print("="*60)
+episode_rewards = []
+episode_lengths = []
+for episode in range(N_EVAL_EPISODES):
+    state, _ = eval_env.reset()
+    episode_reward = 0
+    episode_length = 0
+    done = False
+    while not done:
+        with torch.no_grad():
+            state_tensor = torch.from_numpy(state).float().unsqueeze(0).to(device)
+            probs = policy.forward(state_tensor)
+            m = Categorical(probs)
+            action = m.sample().item()
+        state, reward, terminated, truncated, _ = eval_env.step(action)
+        episode_reward += reward
+        episode_length += 1
+        done = terminated or truncated
+    episode_rewards.append(episode_reward)
+    episode_lengths.append(episode_length)
+    if (episode + 1) % 10 == 0:
+        print(f"Episode {episode + 1}/{N_EVAL_EPISODES}: "
+              f"Reward = {episode_reward:.2f}, Length = {episode_length}")
+# ============================================================
+# 4. 计算统计数据
+# ============================================================
+mean_reward = np.mean(episode_rewards)
+std_reward = np.std(episode_rewards)
+min_reward = np.min(episode_rewards)
+max_reward = np.max(episode_rewards)
+mean_length = np.mean(episode_lengths)
+score = mean_reward - std_reward
+print("\n" + "="*60)
+print("Evaluation Results:")
+print(f"  Mean Reward: {mean_reward:.2f}")
+print(f"  Std Reward:  {std_reward:.2f}")
+print(f"  Min Reward:  {min_reward:.2f}")
+print(f"  Max Reward:  {max_reward:.2f}")
+print(f"  Mean Length: {mean_length:.2f}")
+print(f"  Score (mean - std): {score:.2f}")
+print(f"  Baseline Required: 350.0")
+if score >= 350:
+    print(f"  Status: ✅ PASSED")
+else:
+    print(f"  Status: ❌ NOT PASSED (need {350 - score:.2f} more points)")
+print("="*60 + "\n")
+# ============================================================
+# 5. 创建 README.md（完全避免f-string中的#符号）
+# ============================================================
+# 使用字符串格式化而不是f-string来避免#符号问题
+readme_template = """---
+library_name: reinforce
+tags:
+- CartPole-v1
+- deep-reinforcement-learning
+- reinforcement-learning
+- policy-gradient
+- reinforce
+model-index:
+- name: REINFORCE
+  results:
+  - task:
+      type: reinforcement-learning
+      name: reinforcement-learning
+    dataset:
+      name: CartPole-v1
+      type: CartPole-v1
+    metrics:
+    - type: mean_reward
+      value: {mean_reward:.2f} +/- {std_reward:.2f}
+      name: mean_reward
+      verified: false
+---
+# **REINFORCE** Agent playing **CartPole-v1**
+This is a trained model of a **REINFORCE** agent playing **CartPole-v1**
+using PyTorch and the [Deep Reinforcement Learning Course](https://huggingface.co/deep-rl-course/unit4).
+## Algorithm
+REINFORCE is a policy gradient method that:
+- Directly optimizes the policy π(a|s)
+- Uses Monte Carlo sampling to estimate returns
+- Updates parameters in the direction of higher expected returns
+- Belongs to the family of Policy Gradient methods
+## Evaluation Results
+| Metric | Value |
+|--------|-------|
+| Mean Reward | {mean_reward:.2f} |
+| Std Reward | {std_reward:.2f} |
+| Min Reward | {min_reward:.2f} |
+| Max Reward | {max_reward:.2f} |
+| Mean Episode Length | {mean_length:.2f} |
+| Score (mean - std) | {score:.2f} |
+| Evaluation Episodes | {N_EVAL_EPISODES} |
+## Usage
+```python
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import gymnasium as gym
+import numpy as np
+class Policy(nn.Module):
+    def __init__(self, s_size, a_size, h_size=128):
+        super(Policy, self).__init__()
+        self.fc1 = nn.Linear(s_size, h_size)
+        self.fc2 = nn.Linear(h_size, a_size)
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return F.softmax(x, dim=1)
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+checkpoint = torch.load("reinforce_cartpole.pth", map_location=device)
+policy = Policy(checkpoint['s_size'], checkpoint['a_size'], checkpoint['hidden_size'])
+policy.load_state_dict(checkpoint['policy_state_dict'])
+policy.eval()
+env = gym.make("CartPole-v1")
+state, _ = env.reset()
+for step in range(1000):
+    state_tensor = torch.from_numpy(state).float().unsqueeze(0)
+    with torch.no_grad():
+        probs = policy(state_tensor)
+        action = torch.argmax(probs, dim=1).item()
+    state, reward, terminated, truncated, _ = env.step(action)
+    if terminated or truncated:
+        state, _ = env.reset()
+## Training Configuration
+- **Algorithm**: REINFORCE (Policy Gradient)
+- **Policy Network**: 2-layer MLP (128 hidden units)
+- **Optimizer**: Adam
+- **Learning Rate**: 0.003
+- **Discount Factor**: 0.99
+- **Training Episodes**: 800
+- **Device**: {device}
+## Training Hyperparameters
+- Episodes: 800
+- Max steps per episode: 1000
+- Learning rate: 0.01
+- Gamma (discount factor): 0.99
+- Hidden layer size: 128
+- Optimizer: Adam
+"""
+# 使用.format()方法而不是f-string
+readme_content = readme_template.format(
+    mean_reward=mean_reward,
+    std_reward=std_reward,
+    min_reward=min_reward,
+    max_reward=max_reward,
+    mean_length=mean_length,
+    score=score,
+    N_EVAL_EPISODES=N_EVAL_EPISODES,
+    device=device
+)
+# ============================================================
+# 6. 准备上传文件
+# ============================================================
+print("Preparing files for upload...")
+upload_folder = "./upload_temp"
+os.makedirs(upload_folder, exist_ok=True)
+# 创建README.md
+readme_path = os.path.join(upload_folder, "README.md")
+with open(readme_path, "w", encoding="utf-8") as f:
+    f.write(readme_content)
+print(f"✅ Created README.md")
+# 复制模型文件
+model_dest = os.path.join(upload_folder, os.path.basename(MODEL_FILE))
+shutil.copy(MODEL_FILE, model_dest)
+print(f"✅ Copied {MODEL_FILE}")
+# 创建配置文件
+config_content = """{{
+  "env_id": "{ENV_ID}",
+  "algorithm": "REINFORCE",
+  "library": "reinforce",
+  "s_size": {s_size},
+  "a_size": {a_size},
+  "hidden_size": {hidden_size},
+  "mean_reward": {mean_reward:.2f},
+  "std_reward": {std_reward:.2f},
+  "evaluation_episodes": {N_EVAL_EPISODES}
+}}""".format(
+    ENV_ID=ENV_ID,
+    s_size=s_size,
+    a_size=a_size,
+    hidden_size=hidden_size,
+    mean_reward=mean_reward,
+    std_reward=std_reward,
+    N_EVAL_EPISODES=N_EVAL_EPISODES
+)
+config_path = os.path.join(upload_folder, "config.json")
+with open(config_path, "w", encoding="utf-8") as f:
+    f.write(config_content)
+print(f"✅ Created config.json")
+# ============================================================
+# 7. 上传到 Hugging Face
+# ============================================================
+print(f"\nUploading to {repo_id}...")
+api = HfApi()
+try:
+    create_repo(repo_id, repo_type="model", exist_ok=True)
+    print(f"✅ Repository created/verified")
+except Exception as e:
+    print(f"⚠️ Repository warning: {e}")
+try:
+    api.upload_folder(
+        folder_path=upload_folder,
+        repo_id=repo_id,
+        repo_type="model",
+        commit_message=f"REINFORCE CartPole - Mean: {mean_reward:.2f}, Std: {std_reward:.2f}, Score: {score:.2f}"
+    )
+    print(f"\n{'='*60}")
+    print("✅ Upload Successful!")
+    print(f"{'='*60}")
+    print(f"🔗 Model Page: https://huggingface.co/{repo_id}")
+    print(f"🏆 Check Progress: https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course")
+    print(f"{'='*60}\n")
+    print("📋 Important Information:")
+    print(f"   • Environment: {ENV_ID}")
+    print(f"   • Library tag: reinforce")
+    print(f"   • Required score: 350.0")
+    print(f"   • Your score: {score:.2f}")
+    print(f"   • Status: {'✅ PASSED' if score >= 350 else '❌ FAILED'}")
+except Exception as e:
+    print(f"\n❌ Upload failed: {e}")
+    print("Please check your Hugging Face token and internet connection.")
+finally:
+    shutil.rmtree(upload_folder)
+    print("🧹 Cleaned up temporary files")
+print("\n✨ Done!")