ImaghT commited on
Commit
99c9796
·
verified ·
1 Parent(s): c19d0ac

Upload Unit_4_upload.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. Unit_4_upload.py +343 -0
Unit_4_upload.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # Unit 4: Upload REINFORCE CartPole Model to Hugging Face
3
+ # ============================================================
4
+
5
+ import gymnasium as gym
6
+ import numpy as np
7
+ import os
8
+ import shutil
9
+ import torch
10
+ import torch.nn as nn
11
+ import torch.nn.functional as F
12
+ from huggingface_hub import HfApi, create_repo
13
+ from torch.distributions import Categorical
14
+
15
+ # ============================================================
16
+ # 配置参数(修改这里)
17
+ # ============================================================
18
+ USERNAME = "ImaghT" # 替换为你的用户名
19
+ MODEL_NAME = "reinforce-CartPole-v1"
20
+ MODEL_FILE = "/home/eason/Workspace/Result_DRL/reinforce_cartpole.pth" # 绝对路径
21
+ ENV_ID = "CartPole-v1"
22
+ N_EVAL_EPISODES = 100
23
+
24
+ repo_id = f"{USERNAME}/{MODEL_NAME}"
25
+
26
+ # ============================================================
27
+ # 策略网络定义(与训练时相同)
28
+ # ============================================================
29
+ class Policy(nn.Module):
30
+ def __init__(self, s_size, a_size, h_size=128):
31
+ super(Policy, self).__init__()
32
+ self.fc1 = nn.Linear(s_size, h_size)
33
+ self.fc2 = nn.Linear(h_size, a_size)
34
+
35
+ def forward(self, x):
36
+ x = F.relu(self.fc1(x))
37
+ x = self.fc2(x)
38
+ return F.softmax(x, dim=1)
39
+
40
+ # ============================================================
41
+ # 1. 加载训练好的模型
42
+ # ============================================================
43
+ print("Loading trained model...")
44
+ if not os.path.exists(MODEL_FILE):
45
+ print(f"❌ Error: Model file '{MODEL_FILE}' not found!")
46
+ print("Please run the training script first to generate the model.")
47
+ exit(1)
48
+
49
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
50
+ checkpoint = torch.load(MODEL_FILE, map_location=device, weights_only=False)
51
+
52
+ # 重建模型
53
+ s_size = checkpoint['s_size']
54
+ a_size = checkpoint['a_size']
55
+ hidden_size = checkpoint['hidden_size']
56
+
57
+ policy = Policy(s_size, a_size, hidden_size).to(device)
58
+ policy.load_state_dict(checkpoint['policy_state_dict'])
59
+ policy.eval()
60
+
61
+ print(f"✅ Model loaded from {MODEL_FILE}")
62
+ print(f" State size: {s_size}, Action size: {a_size}")
63
+ print(f" Hidden size: {hidden_size}")
64
+
65
+ # ============================================================
66
+ # 2. 创建评估环境
67
+ # ============================================================
68
+ print("\nCreating evaluation environment...")
69
+ eval_env = gym.make(ENV_ID)
70
+ print(f"✅ Environment {ENV_ID} ready")
71
+
72
+ # ============================================================
73
+ # 3. 运行评估
74
+ # ============================================================
75
+ print("="*60)
76
+ print(f"Starting Evaluation ({N_EVAL_EPISODES} episodes)...")
77
+ print("="*60)
78
+
79
+ episode_rewards = []
80
+ episode_lengths = []
81
+
82
+ for episode in range(N_EVAL_EPISODES):
83
+ state, _ = eval_env.reset()
84
+ episode_reward = 0
85
+ episode_length = 0
86
+ done = False
87
+
88
+ while not done:
89
+ with torch.no_grad():
90
+ state_tensor = torch.from_numpy(state).float().unsqueeze(0).to(device)
91
+ probs = policy.forward(state_tensor)
92
+ m = Categorical(probs)
93
+ action = m.sample().item()
94
+
95
+ state, reward, terminated, truncated, _ = eval_env.step(action)
96
+ episode_reward += reward
97
+ episode_length += 1
98
+ done = terminated or truncated
99
+
100
+ episode_rewards.append(episode_reward)
101
+ episode_lengths.append(episode_length)
102
+
103
+ if (episode + 1) % 10 == 0:
104
+ print(f"Episode {episode + 1}/{N_EVAL_EPISODES}: "
105
+ f"Reward = {episode_reward:.2f}, Length = {episode_length}")
106
+
107
+ # ============================================================
108
+ # 4. 计算统计数据
109
+ # ============================================================
110
+ mean_reward = np.mean(episode_rewards)
111
+ std_reward = np.std(episode_rewards)
112
+ min_reward = np.min(episode_rewards)
113
+ max_reward = np.max(episode_rewards)
114
+ mean_length = np.mean(episode_lengths)
115
+ score = mean_reward - std_reward
116
+
117
+ print("\n" + "="*60)
118
+ print("Evaluation Results:")
119
+ print(f" Mean Reward: {mean_reward:.2f}")
120
+ print(f" Std Reward: {std_reward:.2f}")
121
+ print(f" Min Reward: {min_reward:.2f}")
122
+ print(f" Max Reward: {max_reward:.2f}")
123
+ print(f" Mean Length: {mean_length:.2f}")
124
+ print(f" Score (mean - std): {score:.2f}")
125
+ print(f" Baseline Required: 350.0")
126
+ if score >= 350:
127
+ print(f" Status: ✅ PASSED")
128
+ else:
129
+ print(f" Status: ❌ NOT PASSED (need {350 - score:.2f} more points)")
130
+ print("="*60 + "\n")
131
+
132
+ # ============================================================
133
+ # 5. 创建 README.md(完全避免f-string中的#符号)
134
+ # ============================================================
135
+
136
+ # 使用字符串格式化而不是f-string来避免#符号问题
137
+ readme_template = """---
138
+ library_name: reinforce
139
+ tags:
140
+ - CartPole-v1
141
+ - deep-reinforcement-learning
142
+ - reinforcement-learning
143
+ - policy-gradient
144
+ - reinforce
145
+ model-index:
146
+ - name: REINFORCE
147
+ results:
148
+ - task:
149
+ type: reinforcement-learning
150
+ name: reinforcement-learning
151
+ dataset:
152
+ name: CartPole-v1
153
+ type: CartPole-v1
154
+ metrics:
155
+ - type: mean_reward
156
+ value: {mean_reward:.2f} +/- {std_reward:.2f}
157
+ name: mean_reward
158
+ verified: false
159
+ ---
160
+
161
+ # **REINFORCE** Agent playing **CartPole-v1**
162
+
163
+ This is a trained model of a **REINFORCE** agent playing **CartPole-v1**
164
+ using PyTorch and the [Deep Reinforcement Learning Course](https://huggingface.co/deep-rl-course/unit4).
165
+
166
+ ## Algorithm
167
+ REINFORCE is a policy gradient method that:
168
+ - Directly optimizes the policy π(a|s)
169
+ - Uses Monte Carlo sampling to estimate returns
170
+ - Updates parameters in the direction of higher expected returns
171
+ - Belongs to the family of Policy Gradient methods
172
+
173
+ ## Evaluation Results
174
+
175
+ | Metric | Value |
176
+ |--------|-------|
177
+ | Mean Reward | {mean_reward:.2f} |
178
+ | Std Reward | {std_reward:.2f} |
179
+ | Min Reward | {min_reward:.2f} |
180
+ | Max Reward | {max_reward:.2f} |
181
+ | Mean Episode Length | {mean_length:.2f} |
182
+ | Score (mean - std) | {score:.2f} |
183
+ | Evaluation Episodes | {N_EVAL_EPISODES} |
184
+
185
+ ## Usage
186
+
187
+ ```python
188
+ import torch
189
+ import torch.nn as nn
190
+ import torch.nn.functional as F
191
+ import gymnasium as gym
192
+ import numpy as np
193
+
194
+ class Policy(nn.Module):
195
+ def __init__(self, s_size, a_size, h_size=128):
196
+ super(Policy, self).__init__()
197
+ self.fc1 = nn.Linear(s_size, h_size)
198
+ self.fc2 = nn.Linear(h_size, a_size)
199
+
200
+ def forward(self, x):
201
+ x = F.relu(self.fc1(x))
202
+ x = self.fc2(x)
203
+ return F.softmax(x, dim=1)
204
+
205
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
206
+ checkpoint = torch.load("reinforce_cartpole.pth", map_location=device)
207
+
208
+ policy = Policy(checkpoint['s_size'], checkpoint['a_size'], checkpoint['hidden_size'])
209
+ policy.load_state_dict(checkpoint['policy_state_dict'])
210
+ policy.eval()
211
+
212
+ env = gym.make("CartPole-v1")
213
+ state, _ = env.reset()
214
+
215
+ for step in range(1000):
216
+ state_tensor = torch.from_numpy(state).float().unsqueeze(0)
217
+ with torch.no_grad():
218
+ probs = policy(state_tensor)
219
+ action = torch.argmax(probs, dim=1).item()
220
+
221
+ state, reward, terminated, truncated, _ = env.step(action)
222
+
223
+ if terminated or truncated:
224
+ state, _ = env.reset()
225
+
226
+
227
+ ## Training Configuration
228
+
229
+ - **Algorithm**: REINFORCE (Policy Gradient)
230
+ - **Policy Network**: 2-layer MLP (128 hidden units)
231
+ - **Optimizer**: Adam
232
+ - **Learning Rate**: 0.003
233
+ - **Discount Factor**: 0.99
234
+ - **Training Episodes**: 800
235
+ - **Device**: {device}
236
+
237
+ ## Training Hyperparameters
238
+ - Episodes: 800
239
+ - Max steps per episode: 1000
240
+ - Learning rate: 0.01
241
+ - Gamma (discount factor): 0.99
242
+ - Hidden layer size: 128
243
+ - Optimizer: Adam
244
+ """
245
+
246
+ # 使用.format()方法而不是f-string
247
+ readme_content = readme_template.format(
248
+ mean_reward=mean_reward,
249
+ std_reward=std_reward,
250
+ min_reward=min_reward,
251
+ max_reward=max_reward,
252
+ mean_length=mean_length,
253
+ score=score,
254
+ N_EVAL_EPISODES=N_EVAL_EPISODES,
255
+ device=device
256
+ )
257
+
258
+ # ============================================================
259
+ # 6. 准备上传文件
260
+ # ============================================================
261
+ print("Preparing files for upload...")
262
+ upload_folder = "./upload_temp"
263
+ os.makedirs(upload_folder, exist_ok=True)
264
+
265
+ # 创建README.md
266
+ readme_path = os.path.join(upload_folder, "README.md")
267
+ with open(readme_path, "w", encoding="utf-8") as f:
268
+ f.write(readme_content)
269
+ print(f"✅ Created README.md")
270
+
271
+ # 复制模型文件
272
+ model_dest = os.path.join(upload_folder, os.path.basename(MODEL_FILE))
273
+ shutil.copy(MODEL_FILE, model_dest)
274
+ print(f"✅ Copied {MODEL_FILE}")
275
+
276
+ # 创建配置文件
277
+ config_content = """{{
278
+ "env_id": "{ENV_ID}",
279
+ "algorithm": "REINFORCE",
280
+ "library": "reinforce",
281
+ "s_size": {s_size},
282
+ "a_size": {a_size},
283
+ "hidden_size": {hidden_size},
284
+ "mean_reward": {mean_reward:.2f},
285
+ "std_reward": {std_reward:.2f},
286
+ "evaluation_episodes": {N_EVAL_EPISODES}
287
+ }}""".format(
288
+ ENV_ID=ENV_ID,
289
+ s_size=s_size,
290
+ a_size=a_size,
291
+ hidden_size=hidden_size,
292
+ mean_reward=mean_reward,
293
+ std_reward=std_reward,
294
+ N_EVAL_EPISODES=N_EVAL_EPISODES
295
+ )
296
+
297
+ config_path = os.path.join(upload_folder, "config.json")
298
+ with open(config_path, "w", encoding="utf-8") as f:
299
+ f.write(config_content)
300
+ print(f"✅ Created config.json")
301
+
302
+ # ============================================================
303
+ # 7. 上传到 Hugging Face
304
+ # ============================================================
305
+ print(f"\nUploading to {repo_id}...")
306
+
307
+ api = HfApi()
308
+
309
+ try:
310
+ create_repo(repo_id, repo_type="model", exist_ok=True)
311
+ print(f"✅ Repository created/verified")
312
+ except Exception as e:
313
+ print(f"⚠️ Repository warning: {e}")
314
+
315
+ try:
316
+ api.upload_folder(
317
+ folder_path=upload_folder,
318
+ repo_id=repo_id,
319
+ repo_type="model",
320
+ commit_message=f"REINFORCE CartPole - Mean: {mean_reward:.2f}, Std: {std_reward:.2f}, Score: {score:.2f}"
321
+ )
322
+ print(f"\n{'='*60}")
323
+ print("✅ Upload Successful!")
324
+ print(f"{'='*60}")
325
+ print(f"🔗 Model Page: https://huggingface.co/{repo_id}")
326
+ print(f"🏆 Check Progress: https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course")
327
+ print(f"{'='*60}\n")
328
+
329
+ print("📋 Important Information:")
330
+ print(f" • Environment: {ENV_ID}")
331
+ print(f" • Library tag: reinforce")
332
+ print(f" • Required score: 350.0")
333
+ print(f" • Your score: {score:.2f}")
334
+ print(f" • Status: {'✅ PASSED' if score >= 350 else '❌ FAILED'}")
335
+
336
+ except Exception as e:
337
+ print(f"\n❌ Upload failed: {e}")
338
+ print("Please check your Hugging Face token and internet connection.")
339
+ finally:
340
+ shutil.rmtree(upload_folder)
341
+ print("🧹 Cleaned up temporary files")
342
+
343
+ print("\n✨ Done!")