return view(-1, 1) (#5)
Browse files- return view(-1, 1) (4c3fd5f336e9f80299309a2ef125310b50704348)
- modelling_deepseek.py +1 -1
modelling_deepseek.py
CHANGED
|
@@ -341,7 +341,7 @@ class DeepseekMoE(nn.Module):
|
|
| 341 |
y = y.view(*orig_shape)
|
| 342 |
y = AddAuxiliaryLoss.apply(y, aux_loss)
|
| 343 |
else:
|
| 344 |
-
y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight
|
| 345 |
if self.config.n_shared_experts is not None:
|
| 346 |
y = y + self.shared_experts(identity)
|
| 347 |
return y
|
|
|
|
| 341 |
y = y.view(*orig_shape)
|
| 342 |
y = AddAuxiliaryLoss.apply(y, aux_loss)
|
| 343 |
else:
|
| 344 |
+
y = self.moe_infer(hidden_states, flat_topk_idx, topk_weight.view(-1, 1)).view(*orig_shape)
|
| 345 |
if self.config.n_shared_experts is not None:
|
| 346 |
y = y + self.shared_experts(identity)
|
| 347 |
return y
|