diff --git a/scripts/shell/pretrain_60m.sh b/scripts/shell/pretrain_60m.sh index f7ae642413e81c0caefd8e2b751fe3bb4ddb96a4..a44e6ccd7220fd4fe3a27e1e00795521e1afe68c 100755 --- a/scripts/shell/pretrain_60m.sh +++ b/scripts/shell/pretrain_60m.sh @@ -7,6 +7,7 @@ do --mode pretraining \ --optimizer "$optimizer" \ --model llama_60m \ + --dataset c4 \ --batch_size 512 \ --num_epochs 1 \ --num_training_tokens 1310000000 \ diff --git a/scripts/shell/pretrain_7b.sh b/scripts/shell/pretrain_7b.sh index 242145856523abca8fa8894dcd4e9fe68d16eea3..3bc655f23737e06bf559544ba894f30f900f4583 100644 --- a/scripts/shell/pretrain_7b.sh +++ b/scripts/shell/pretrain_7b.sh @@ -7,6 +7,7 @@ do --mode pretraining \ --optimizer "$optimizer" \ --model llama_7b \ + --dataset c4 \ --batch_size 512 \ --num_epochs 1 \ --num_training_tokens 13100000 \