remove unused config
Browse files- config.json +0 -1
- modeling_qwen.py +1 -1
config.json
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"activation": "swiglu",
|
| 3 |
"apply_residual_connection_post_layernorm": false,
|
| 4 |
"architectures": [
|
| 5 |
"QWenLMHeadModel"
|
|
|
|
| 1 |
{
|
|
|
|
| 2 |
"apply_residual_connection_post_layernorm": false,
|
| 3 |
"architectures": [
|
| 4 |
"QWenLMHeadModel"
|
modeling_qwen.py
CHANGED
|
@@ -245,7 +245,7 @@ class QWenAttention(nn.Module):
|
|
| 245 |
math.log(i, self.seq_length) if i > self.seq_length else 1
|
| 246 |
for i in range(1, 32768)
|
| 247 |
]
|
| 248 |
-
self.logn_tensor = torch.
|
| 249 |
self._ntk_cached = 1.0
|
| 250 |
|
| 251 |
self.attn_dropout = nn.Dropout(config.attn_pdrop)
|
|
|
|
| 245 |
math.log(i, self.seq_length) if i > self.seq_length else 1
|
| 246 |
for i in range(1, 32768)
|
| 247 |
]
|
| 248 |
+
self.logn_tensor = torch.tensor(logn_list)[None, :, None, None]
|
| 249 |
self._ntk_cached = 1.0
|
| 250 |
|
| 251 |
self.attn_dropout = nn.Dropout(config.attn_pdrop)
|