Skip to content
Snippets Groups Projects
Commit 140033be authored by Riko Corwin Uphoff's avatar Riko Corwin Uphoff
Browse files

Fixed number of training steps for real; Adjusted default parameters;

parent c36e7631
No related branches found
No related tags found
No related merge requests found
Pipeline #25297 passed
......@@ -6,7 +6,7 @@ parser.add_argument("--optimizer", type=str, choices=["lora", "galore", "galore8
parser.add_argument("--lr_scheduler", type=str, choices=["constant", "linear", "cosine"], default="constant")
parser.add_argument("--model", type=str, choices=["llama_60m", "llama_1b", "llama_7b", "roberta", "gpt2"], required=True, help="Model to use")
parser.add_argument("--batch_size", type=int, default=16, help="Batch size")
parser.add_argument("--num_epochs", type=int, default=30, help="Number of epochs")
parser.add_argument("--num_epochs", type=int, default=1, help="Number of epochs")
parser.add_argument("--max_length", type=int, default=512, help="Max length of input tokens")
parser.add_argument("--num_training_tokens", type=int, default=1e9, help="Number of training tokens")
parser.add_argument("--shuffle", type=str, choices=["true", "false"], default="false", help="Shuffle data (doesn't work in streaming mode)")
......
......@@ -66,6 +66,7 @@ if __name__ == "__main__":
print(f"Running on: {device}")
print(f"Using optimizer: {args.optimizer}")
print(f"Arguments: {args}")
init_csv()
model, tokenizer = get_model(args)
......@@ -82,7 +83,8 @@ if __name__ == "__main__":
optimizer, model = get_optimizer(args, model)
num_steps = ceil(args.num_epochs * len(dataloader))
num_batches = len(dataloader) if args.mode == "finetuning" else ceil(args.num_training_tokens / args.batch_size)
num_steps = args.num_epochs * num_batches
scheduler = get_scheduler(
optimizer, args.lr_scheduler, args.warm_up_fraction, num_steps, args.lr, args.lr_min
)
......
#!/bin/bash
python3 main.py \
--mode pretraining \
--optimizer galore \
--model llama_60m \
--batch_size 8 \
--num_epochs 1 \
--num_training_tokens 10000 \
--max_length 256 \
--shuffle false \
--dtype bf16 \
--lr_scheduler cosine \
--lr 1e-4 \
--lr_min 1e-5 \
--warm_up_fraction 0.1 \
--weight_decay 0 \
--test false
\ No newline at end of file
#!/bin/bash
python3 main.py \
--mode pretraining \
--optimizer galore8bit \
--model llama_7b \
--batch_size 131000 \
--num_epochs 1 \
--num_training_tokens 13100000 \
--max_length 256 \
--shuffle false \
--dtype bf16 \
--lr_scheduler cosine \
--lr 1e-4 \
--lr_min 1e-5 \
--warm_up_fraction 0.1 \
--weight_decay 0 \
--test false
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment