From 31552c61eb9d326d7dfad4dcbf00e62ecb6bafba Mon Sep 17 00:00:00 2001
From: Riko Uphoff <riko.uphoff@student.uni-halle.de>
Date: Sat, 29 Mar 2025 13:37:34 +0100
Subject: [PATCH] Updated batch size

---
 scripts/shell/pretrain_60m.sh | 4 ++--
 scripts/shell/pretrain_7b.sh  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/shell/pretrain_60m.sh b/scripts/shell/pretrain_60m.sh
index dc6a2e0..77432d9 100755
--- a/scripts/shell/pretrain_60m.sh
+++ b/scripts/shell/pretrain_60m.sh
@@ -4,9 +4,9 @@ python3 main.py \
     --mode pretraining \
     --optimizer galore \
     --model llama_60m \
-    --batch_size 8 \
+    --batch_size 512 \
     --num_epochs 1 \
-    --num_training_tokens 10000 \
+    --num_training_tokens 1310000000 \
     --max_length 256 \
     --shuffle false \
     --dtype bf16 \
diff --git a/scripts/shell/pretrain_7b.sh b/scripts/shell/pretrain_7b.sh
index 5be0e2a..4b04554 100644
--- a/scripts/shell/pretrain_7b.sh
+++ b/scripts/shell/pretrain_7b.sh
@@ -4,7 +4,7 @@ python3 main.py \
     --mode pretraining \
     --optimizer galore8bit \
     --model llama_7b \
-    --batch_size 131000 \
+    --batch_size 512 \
     --num_epochs 1 \
     --num_training_tokens 13100000 \
     --max_length 256 \
-- 
GitLab