Skip to content
Snippets Groups Projects
Commit c36e7631 authored by Riko Corwin Uphoff's avatar Riko Corwin Uphoff
Browse files

Added package versions; Added date tag on output CSV file; Fixed(?) number of steps;

parent 9cefc71a
Branches
No related tags found
No related merge requests found
Pipeline #25296 passed
...@@ -2,8 +2,11 @@ import torch ...@@ -2,8 +2,11 @@ import torch
import psutil import psutil
import csv import csv
import math import math
import datetime
NOW_STRING = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
CSV_FILE = f"output_{NOW_STRING}.csv"
CSV_FILE = "output.csv"
def init_csv(): def init_csv():
"""Initialize CSV file with headers.""" """Initialize CSV file with headers."""
...@@ -13,6 +16,7 @@ def init_csv(): ...@@ -13,6 +16,7 @@ def init_csv():
"peak_memory_usage_allocated_GB", "peak_memory_usage_reserved_GB", "peak_memory_usage_allocated_GB", "peak_memory_usage_reserved_GB",
"loss", "perplexity"]) "loss", "perplexity"])
def measure_memory(): def measure_memory():
"""Measure memory usage from CUDA or CPU.""" """Measure memory usage from CUDA or CPU."""
if torch.cuda.is_available(): if torch.cuda.is_available():
...@@ -28,6 +32,7 @@ def measure_memory(): ...@@ -28,6 +32,7 @@ def measure_memory():
return peak_history, max_allocated, max_reserved return peak_history, max_allocated, max_reserved
def log_to_csv(epoch, step, compute_time, loss): def log_to_csv(epoch, step, compute_time, loss):
"""Log training metrics to CSV file.""" """Log training metrics to CSV file."""
peak_history, max_allocated, max_reserved = measure_memory() peak_history, max_allocated, max_reserved = measure_memory()
......
...@@ -72,13 +72,6 @@ if __name__ == "__main__": ...@@ -72,13 +72,6 @@ if __name__ == "__main__":
dataset = load_data(args, tokenizer) dataset = load_data(args, tokenizer)
optimizer, model = get_optimizer(args, model)
num_steps = ceil(args.num_epochs * len(dataset) / args.batch_size)
scheduler = get_scheduler(
optimizer, args.lr_scheduler, args.warm_up_fraction, num_steps, args.lr, args.lr_min
)
shuffle = True if args.shuffle == "true" else False shuffle = True if args.shuffle == "true" else False
if args.mode == "pretraining": if args.mode == "pretraining":
dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=shuffle) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=shuffle)
...@@ -87,6 +80,13 @@ if __name__ == "__main__": ...@@ -87,6 +80,13 @@ if __name__ == "__main__":
else: else:
raise ValueError("Invalid mode. Choose 'pretraining' or 'finetuning'") raise ValueError("Invalid mode. Choose 'pretraining' or 'finetuning'")
optimizer, model = get_optimizer(args, model)
num_steps = ceil(args.num_epochs * len(dataloader))
scheduler = get_scheduler(
optimizer, args.lr_scheduler, args.warm_up_fraction, num_steps, args.lr, args.lr_min
)
trained_model = train(device, accelerator, scheduler, model, optimizer, dataloader, num_epochs=args.num_epochs) trained_model = train(device, accelerator, scheduler, model, optimizer, dataloader, num_epochs=args.num_epochs)
file_name = f"{args.model}_{args.optimizer}_pretrained" if args.mode == "pretraining" else f"{args.model}_{args.optimizer}_finetuned" file_name = f"{args.model}_{args.optimizer}_pretrained" if args.mode == "pretraining" else f"{args.model}_{args.optimizer}_finetuned"
......
galore-torch galore-torch==1.0
datasets datasets==3.1.0
transformers transformers==4.46.3
torch torch==2.4.1
accelerate accelerate==1.0.1
psutil psutil==7.0.0
peft peft==0.13.2
argparse argparse==1.4.0
\ No newline at end of file \ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment