From 996a27bfc86f3ad7098da575b9919de243ba028a Mon Sep 17 00:00:00 2001 From: Riko Uphoff <riko.uphoff@student.uni-halle.de> Date: Sun, 30 Mar 2025 16:14:28 +0200 Subject: [PATCH] Updated pretraining scripts --- scripts/shell/pretrain_60m.sh | 1 + scripts/shell/pretrain_7b.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/shell/pretrain_60m.sh b/scripts/shell/pretrain_60m.sh index f7ae642..a44e6cc 100755 --- a/scripts/shell/pretrain_60m.sh +++ b/scripts/shell/pretrain_60m.sh @@ -7,6 +7,7 @@ do --mode pretraining \ --optimizer "$optimizer" \ --model llama_60m \ + --dataset c4 \ --batch_size 512 \ --num_epochs 1 \ --num_training_tokens 1310000000 \ diff --git a/scripts/shell/pretrain_7b.sh b/scripts/shell/pretrain_7b.sh index 2421458..3bc655f 100644 --- a/scripts/shell/pretrain_7b.sh +++ b/scripts/shell/pretrain_7b.sh @@ -7,6 +7,7 @@ do --mode pretraining \ --optimizer "$optimizer" \ --model llama_7b \ + --dataset c4 \ --batch_size 512 \ --num_epochs 1 \ --num_training_tokens 13100000 \ -- GitLab