From 996a27bfc86f3ad7098da575b9919de243ba028a Mon Sep 17 00:00:00 2001
From: Riko Uphoff <riko.uphoff@student.uni-halle.de>
Date: Sun, 30 Mar 2025 16:14:28 +0200
Subject: [PATCH] Updated pretraining scripts

---
 scripts/shell/pretrain_60m.sh | 1 +
 scripts/shell/pretrain_7b.sh  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/scripts/shell/pretrain_60m.sh b/scripts/shell/pretrain_60m.sh
index f7ae642..a44e6cc 100755
--- a/scripts/shell/pretrain_60m.sh
+++ b/scripts/shell/pretrain_60m.sh
@@ -7,6 +7,7 @@ do
       --mode pretraining \
       --optimizer "$optimizer" \
       --model llama_60m \
+      --dataset c4 \
       --batch_size 512 \
       --num_epochs 1 \
       --num_training_tokens 1310000000 \
diff --git a/scripts/shell/pretrain_7b.sh b/scripts/shell/pretrain_7b.sh
index 2421458..3bc655f 100644
--- a/scripts/shell/pretrain_7b.sh
+++ b/scripts/shell/pretrain_7b.sh
@@ -7,6 +7,7 @@ do
       --mode pretraining \
       --optimizer "$optimizer" \
       --model llama_7b \
+      --dataset c4 \
       --batch_size 512 \
       --num_epochs 1 \
       --num_training_tokens 13100000 \
-- 
GitLab