From 31552c61eb9d326d7dfad4dcbf00e62ecb6bafba Mon Sep 17 00:00:00 2001 From: Riko Uphoff <riko.uphoff@student.uni-halle.de> Date: Sat, 29 Mar 2025 13:37:34 +0100 Subject: [PATCH] Updated batch size --- scripts/shell/pretrain_60m.sh | 4 ++-- scripts/shell/pretrain_7b.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/shell/pretrain_60m.sh b/scripts/shell/pretrain_60m.sh index dc6a2e0..77432d9 100755 --- a/scripts/shell/pretrain_60m.sh +++ b/scripts/shell/pretrain_60m.sh @@ -4,9 +4,9 @@ python3 main.py \ --mode pretraining \ --optimizer galore \ --model llama_60m \ - --batch_size 8 \ + --batch_size 512 \ --num_epochs 1 \ - --num_training_tokens 10000 \ + --num_training_tokens 1310000000 \ --max_length 256 \ --shuffle false \ --dtype bf16 \ diff --git a/scripts/shell/pretrain_7b.sh b/scripts/shell/pretrain_7b.sh index 5be0e2a..4b04554 100644 --- a/scripts/shell/pretrain_7b.sh +++ b/scripts/shell/pretrain_7b.sh @@ -4,7 +4,7 @@ python3 main.py \ --mode pretraining \ --optimizer galore8bit \ --model llama_7b \ - --batch_size 131000 \ + --batch_size 512 \ --num_epochs 1 \ --num_training_tokens 13100000 \ --max_length 256 \ -- GitLab