Upload model
Browse files- modeling_t5mimo.py +3 -3
modeling_t5mimo.py
CHANGED
@@ -1325,7 +1325,7 @@ class T5MIMOForConditionalGeneration(T5PreTrainedModel):
|
|
1325 |
self.decoder = T5Stack(decoder_config, self.shared)
|
1326 |
|
1327 |
|
1328 |
-
|
1329 |
self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)
|
1330 |
|
1331 |
# Initialize weights and apply final processing
|
@@ -1518,8 +1518,8 @@ class T5MIMOForConditionalGeneration(T5PreTrainedModel):
|
|
1518 |
sequence_output = decoder_outputs[0]
|
1519 |
|
1520 |
|
1521 |
-
|
1522 |
-
|
1523 |
|
1524 |
# Set device for model parallelism
|
1525 |
if self.model_parallel:
|
|
|
1325 |
self.decoder = T5Stack(decoder_config, self.shared)
|
1326 |
|
1327 |
|
1328 |
+
self.conv_block = MultivariateConvBlock(config)
|
1329 |
self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)
|
1330 |
|
1331 |
# Initialize weights and apply final processing
|
|
|
1518 |
sequence_output = decoder_outputs[0]
|
1519 |
|
1520 |
|
1521 |
+
if use_conv:
|
1522 |
+
sequence_output = self.conv_block(sequence_output)
|
1523 |
|
1524 |
# Set device for model parallelism
|
1525 |
if self.model_parallel:
|