From e03afc92c2dce264dec7e27249a6f8b6bc852a6a Mon Sep 17 00:00:00 2001
From: Till-Ole Herbst <till-ole.herbst@student.uni-halle.de>
Date: Wed, 4 Sep 2024 11:26:09 +0200
Subject: [PATCH] pipeline

---
 batch_pipeline.sh | 10 ++++++
 pipeline.sh       | 77 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 batch_pipeline.sh
 create mode 100644 pipeline.sh

diff --git a/batch_pipeline.sh b/batch_pipeline.sh
new file mode 100644
index 000000000..058e287b5
--- /dev/null
+++ b/batch_pipeline.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+#SBATCH --job-name=pipe1
+#SBATCH --gres=gpu:1
+#SBATCH --cpus-per-gpu=8
+#SBATCH --mem-per-cpu=4G
+
+cd /home/ambcj/BA/text2sql-sft
+source dbgpt-hub/bin/activate
+
+sh pipeline.sh
diff --git a/pipeline.sh b/pipeline.sh
new file mode 100644
index 000000000..13f3937c9
--- /dev/null
+++ b/pipeline.sh
@@ -0,0 +1,77 @@
+wandb offline # Close wandb
+# a100 ,单卡
+current_date=$(date +"%Y%m%d_%H%M")
+train_log="output/logs/train_sft_test_${current_date}.log"
+start_time=$(date +%s)
+echo " Train Start time: $(date -d @$start_time +'%Y-%m-%d %H:%M:%S')" >>${train_log}
+
+# default train , zero-shot, 
+num_shot=0
+
+# one-shot train
+# num_shot=1
+
+dataset_train="example_text2sql_train"
+dataset_dev="data/example_text2sql_dev.json"
+if [ "$num_shot" -eq 1 ]; then
+    dataset="example_text2sql_train_one_shot"
+fi
+model_name_or_path="meta-llama/Meta-Llama-3-8B-Instruct"
+output_dir="output/adapter/llama3_instruct_qlora"
+
+# the default param set could be run in a server with one a100(40G) gpu, if your server not support the set,you can set smaller param such as  lora_rank and use qlora with quant 4 eg...
+CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk '{ print $NF }') python -m src.sft_train \
+    --model_name_or_path $model_name_or_path \
+    --quantization_bit 4 \
+    --do_train \
+    --dataset $dataset_train \
+    --max_source_length 2048 \
+    --max_target_length 512 \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --template llama3 \
+    --lora_rank 64 \
+    --lora_alpha 32 \
+    --output_dir $output_dir \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 16 \
+    --lr_scheduler_type cosine_with_restarts \
+    --logging_steps 50 \
+    --save_steps 2000 \
+    --learning_rate 2e-4 \
+    --num_train_epoch 8 \
+    --plot_loss \
+    --bf16  >> ${train_log}
+    # --bf16#v100不支持bf16
+    
+echo "############train end###############" >>${train_log}
+echo "Train End time: $(date)" >>${train_log}
+end_time=$(date +%s)
+duration=$((end_time - start_time))
+hours=$((duration / 3600))
+min=$(( (duration % 3600) / 60))
+echo "Time elapsed: ${hour}  hour $min min " >>${train_log}
+
+current_date=$(date +"%Y%m%d_%H%M")
+pred_log="output/logs/pred_test_${current_date}.log"
+start_time=$(date +%s)
+echo " Pred Start time: $(date -d @$start_time +'%Y-%m-%d %H:%M:%S')" >>${pred_log}
+
+CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk '{ print $NF }')  python -m src.predict \
+    --model_name_or_path $model_name_or_path \
+    --template llama3 \
+    --quantization_bit 4 \
+    --finetuning_type lora \
+    --predicted_input_filename $dataset_dev \
+    --checkpoint_dir $output_dir \
+    --predicted_out_filename output/pred/pred_llama3_instruct_qlora.sql >> ${pred_log}
+
+echo "############pred end###############" >>${pred_log}
+echo "pred End time: $(date)" >>${pred_log}
+end_time=$(date +%s)
+duration=$((end_time - start_time))
+hours=$((duration / 3600))
+min=$(( (duration % 3600) / 60))
+echo "Time elapsed: ${hour}  hour $min min " >>${pred_log}
-- 
GitLab