From c36e76311bdf330c19f950197a5cac9f3808db66 Mon Sep 17 00:00:00 2001 From: Riko Uphoff <riko.uphoff@student.uni-halle.de> Date: Sat, 29 Mar 2025 12:27:24 +0100 Subject: [PATCH] Added package versions; Added date tag on output CSV file; Fixed(?) number of steps; --- logger.py | 7 ++++++- main.py | 14 +++++++------- requirements.txt | 16 ++++++++-------- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/logger.py b/logger.py index 13e465e..794367c 100644 --- a/logger.py +++ b/logger.py @@ -2,8 +2,11 @@ import torch import psutil import csv import math +import datetime + +NOW_STRING = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +CSV_FILE = f"output_{NOW_STRING}.csv" -CSV_FILE = "output.csv" def init_csv(): """Initialize CSV file with headers.""" @@ -13,6 +16,7 @@ def init_csv(): "peak_memory_usage_allocated_GB", "peak_memory_usage_reserved_GB", "loss", "perplexity"]) + def measure_memory(): """Measure memory usage from CUDA or CPU.""" if torch.cuda.is_available(): @@ -28,6 +32,7 @@ def measure_memory(): return peak_history, max_allocated, max_reserved + def log_to_csv(epoch, step, compute_time, loss): """Log training metrics to CSV file.""" peak_history, max_allocated, max_reserved = measure_memory() diff --git a/main.py b/main.py index 2cc3203..b077348 100644 --- a/main.py +++ b/main.py @@ -72,13 +72,6 @@ if __name__ == "__main__": dataset = load_data(args, tokenizer) - optimizer, model = get_optimizer(args, model) - - num_steps = ceil(args.num_epochs * len(dataset) / args.batch_size) - scheduler = get_scheduler( - optimizer, args.lr_scheduler, args.warm_up_fraction, num_steps, args.lr, args.lr_min - ) - shuffle = True if args.shuffle == "true" else False if args.mode == "pretraining": dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=shuffle) @@ -87,6 +80,13 @@ if __name__ == "__main__": else: raise ValueError("Invalid mode. Choose 'pretraining' or 'finetuning'") + optimizer, model = get_optimizer(args, model) + + num_steps = ceil(args.num_epochs * len(dataloader)) + scheduler = get_scheduler( + optimizer, args.lr_scheduler, args.warm_up_fraction, num_steps, args.lr, args.lr_min + ) + trained_model = train(device, accelerator, scheduler, model, optimizer, dataloader, num_epochs=args.num_epochs) file_name = f"{args.model}_{args.optimizer}_pretrained" if args.mode == "pretraining" else f"{args.model}_{args.optimizer}_finetuned" diff --git a/requirements.txt b/requirements.txt index 0220976..3ca4858 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -galore-torch -datasets -transformers -torch -accelerate -psutil -peft -argparse \ No newline at end of file +galore-torch==1.0 +datasets==3.1.0 +transformers==4.46.3 +torch==2.4.1 +accelerate==1.0.1 +psutil==7.0.0 +peft==0.13.2 +argparse==1.4.0 \ No newline at end of file -- GitLab