Skip to content
Snippets Groups Projects
Commit c14b4bab authored by Armin Bacher's avatar Armin Bacher
Browse files

ddm-wrap probleme

parent f3c213d1
Branches
No related tags found
No related merge requests found
......@@ -96,6 +96,7 @@ def count_model_params(model):
def train_model(attention_impl="torch"):
model = get_gpt2_model(attention_impl)
config = model.config
model = accelerator.prepare(model)
train_args = TrainingArguments(
......@@ -117,7 +118,7 @@ def train_model(attention_impl="torch"):
global_batch_size = per_device_batch_size * world_size
print(f"\n Benchmark-Konfiguration für {attention_impl.upper()}")
print(f" Modell: GPT2 | Layers: {model.config.n_layer} | Embedding Dim: {model.config.n_embd}")
print(f" Modell: GPT2 | Layers: {config.n_layer} | Embedding Dim: {model.config.n_embd}")
print(f" Sequence Length: {model.config.n_positions} | Batch Size: {per_device_batch_size} | Global Batch: {global_batch_size} | FP16: {train_args.fp16}")
start_time = time.time()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment