diff --git a/src/models/sofia_modelle/CNN.ipynb b/src/models/sofia_modelle/CNN.ipynb index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b8fb945c12585107e0f43da88774f97c487a0c4f 100644 --- a/src/models/sofia_modelle/CNN.ipynb +++ b/src/models/sofia_modelle/CNN.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# importe\n", + "from CNN import CNN\n", + "import sys\n", + "import pathlib\n", + "sys.path.append(str(pathlib.Path.absolute)+ '../../')\n", + "from src.utils.slidingWindows import find_length_rank\n", + "from src.run_model_wrapper import main" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = {\n", + " 'window_size': [50, 100, 150],\n", + " 'num_channel': [[32, 32, 40], [16, 32, 64]]\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_CNN(data_train, data_test, window_size=100, num_channel=[32, 32, 40], lr=0.0008, n_jobs=1):\n", + " clf = CNN(window_size=window_size, num_channel=num_channel, feats=data_test.shape[1], lr=lr, batch_size=128)\n", + " clf.fit(data_train)\n", + " score = clf.decision_function(data_test)\n", + " return score.ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = 'CNN'\n", + "output_path = '../../../docs/evaluation/'\n", + "\n", + "main(run_CNN,params,model,data_folders = '../../../data/', model_type='semi-supervised',output_dir = output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "data_empty = {\n", + " 'params':[],\n", + " 'file_name': [],\n", + " 'duration': [],\n", + " 'group': [],\n", + " 'point anomaly': [],\n", + " 'seq anomaly': [],\n", + " 'AUC-PR': [],\n", + " 'AUC-ROC': [],\n", + " 'VUS-PR': [],\n", + " 'VUS-ROC': [],\n", + " 'Standard-F1': [],\n", + " 'PA-F1': [],\n", + " 'Event-based-F1': [],\n", + " 'R-based-F1': [],\n", + " 'Affiliation-F': [],\n", + " 'Recall': [],\n", + " 'Precision': []\n", + "}\n", + "\n", + "df = pd.DataFrame(data_empty)\n", + "\n", + "path = '../../../docs/evaluation/'\n", + "model = 'CNN'\n", + "#concant all batch-files to big one\n", + "for file in os.listdir(path):\n", + " file_path = os.path.join(path,file_path)\n", + " #check if current file belongs to selected model and avoid overwriting existing model.csv data\n", + " if file.startswith(model) and file.split('.')[0] != model:\n", + " df_batch = pd.read_csv(file_path)\n", + " #join with dataframe with all data\n", + " df = pd.concat(df,df_batch)\n", + "\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(output_path+model+'.csv')" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/models/sofia_modelle/IForest.ipynb b/src/models/sofia_modelle/IForest.ipynb index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..2de109e887e28e0194f819a1378524822aa109ab 100644 --- a/src/models/sofia_modelle/IForest.ipynb +++ b/src/models/sofia_modelle/IForest.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# importe\n", + "from IForest import IForest\n", + "import sys\n", + "import pathlib\n", + "sys.path.append(str(pathlib.Path.absolute)+ '../../')\n", + "from src.utils.slidingWindows import find_length_rank\n", + "from src.run_model_wrapper import main" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "params = {\n", + " 'n_estimators': [25, 50, 100, 150, 200]\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_IForest(data, slidingWindow=100, n_estimators=100, max_features=1, n_jobs=1):\n", + " clf = IForest(slidingWindow=slidingWindow, n_estimators=n_estimators, max_features=max_features, n_jobs=n_jobs)\n", + " clf.fit(data)\n", + " score = clf.decision_scores_\n", + " return score.ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = 'IForest'\n", + "output_path = '../../../docs/evaluation/'\n", + "\n", + "main(run_IForest,params,model,data_folders = '../../../data/', model_type='unsupervised',output_dir = output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "data_empty = {\n", + " 'params':[],\n", + " 'file_name': [],\n", + " 'duration': [],\n", + " 'group': [],\n", + " 'point anomaly': [],\n", + " 'seq anomaly': [],\n", + " 'AUC-PR': [],\n", + " 'AUC-ROC': [],\n", + " 'VUS-PR': [],\n", + " 'VUS-ROC': [],\n", + " 'Standard-F1': [],\n", + " 'PA-F1': [],\n", + " 'Event-based-F1': [],\n", + " 'R-based-F1': [],\n", + " 'Affiliation-F': [],\n", + " 'Recall': [],\n", + " 'Precision': []\n", + "}\n", + "\n", + "df = pd.DataFrame(data_empty)\n", + "\n", + "path = '../../../docs/evaluation/'\n", + "model = 'IForest'\n", + "#concant all batch-files to big one\n", + "for file in os.listdir(path):\n", + " file_path = os.path.join(path,file_path)\n", + " #check if current file belongs to selected model and avoid overwriting existing model.csv data\n", + " if file.startswith(model) and file.split('.')[0] != model:\n", + " df_batch = pd.read_csv(file_path)\n", + " #join with dataframe with all data\n", + " df = pd.concat(df,df_batch)\n", + "\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(output_path+model+'.csv')" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/models/sofia_modelle/KNN.ipynb b/src/models/sofia_modelle/KNN.ipynb index 4b34ef00372d9c01eb5a314036145108951bce8d..800bbb1b67778fa5feafa4a0706f03d4fa17fbbd 100644 --- a/src/models/sofia_modelle/KNN.ipynb +++ b/src/models/sofia_modelle/KNN.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CUDA available: False\n", + "cuDNN version: None\n" + ] + } + ], "source": [ "# importe\n", "from KNN import KNN\n", @@ -17,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -31,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -61,10 +70,174 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Start Processing files\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Start Hyperparameter Tuning\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/threadpoolctl.py:1214: RuntimeWarning: \n", + "Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at\n", + "the same time. Both libraries are known to be incompatible and this\n", + "can cause random crashes or deadlocks on Linux when loaded in the\n", + "same Python program.\n", + "Using threadpoolctl may cause crashes or deadlocks. For more\n", + "information and possible workarounds, please see\n", + " https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md\n", + "\n", + " warnings.warn(msg, RuntimeWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File: 806_YAHOO_id_256_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 40, 'method': 'largest'}\n", + "File: 811_Exathlon_id_2_Facility_tr_10766_1st_12590.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 780_YAHOO_id_230_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 30, 'method': 'largest'}\n", + "File: 734_YAHOO_id_184_WebService_tr_500_1st_768.csv, best hyperparameter: {'n_neighbors': 50, 'method': 'largest'}\n", + "File: 555_YAHOO_id_5_WebService_tr_500_1st_730.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 814_Exathlon_id_5_Facility_tr_10766_1st_12590.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 658_YAHOO_id_108_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'largest'}\n", + "File: 723_YAHOO_id_173_WebService_tr_500_1st_1214.csv, best hyperparameter: {'n_neighbors': 50, 'method': 'largest'}\n", + "File: 818_Exathlon_id_9_Facility_tr_11665_1st_13484.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 549_SMAP_id_19_Sensor_tr_1998_1st_2098.csv, best hyperparameter: {'n_neighbors': 40, 'method': 'largest'}\n", + "File: 603_YAHOO_id_53_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 570_YAHOO_id_20_Synthetic_tr_500_1st_658.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'median'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File: 813_Exathlon_id_4_Facility_tr_10766_1st_12590.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 810_Exathlon_id_1_Facility_tr_10766_1st_12590.csv, best hyperparameter: {'n_neighbors': 50, 'method': 'largest'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n", + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File: 817_Exathlon_id_8_Facility_tr_10766_1st_12590.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 724_YAHOO_id_174_WebService_tr_500_1st_1030.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'largest'}\n", + "File: 701_YAHOO_id_151_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'median'}\n", + "File: 815_Exathlon_id_6_Facility_tr_10766_1st_12590.csv, best hyperparameter: {'n_neighbors': 30, 'method': 'mean'}\n", + "File: 672_YAHOO_id_122_WebService_tr_500_1st_857.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'largest'}\n", + "File: 680_YAHOO_id_130_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 821_Exathlon_id_12_Facility_tr_6985_1st_7085.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 649_YAHOO_id_99_WebService_tr_500_1st_1386.csv, best hyperparameter: {'n_neighbors': 30, 'method': 'median'}\n", + "File: 819_Exathlon_id_10_Facility_tr_10766_1st_12590.csv, best hyperparameter: {'n_neighbors': 50, 'method': 'largest'}\n", + "File: 568_YAHOO_id_18_WebService_tr_500_1st_333.csv, best hyperparameter: {'n_neighbors': 50, 'method': 'median'}\n", + "File: 762_YAHOO_id_212_WebService_tr_500_1st_1055.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 657_YAHOO_id_107_WebService_tr_500_1st_1260.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 552_YAHOO_id_2_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'largest'}\n", + "File: 755_YAHOO_id_205_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 558_YAHOO_id_8_WebService_tr_500_1st_1125.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'median'}\n", + "File: 788_YAHOO_id_238_WebService_tr_500_1st_973.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'largest'}\n", + "File: 643_YAHOO_id_93_WebService_tr_500_1st_1038.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'mean'}\n", + "File: 551_YAHOO_id_1_Synthetic_tr_500_1st_893.csv, best hyperparameter: {'n_neighbors': 10, 'method': 'median'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/sofiawuckert/Desktop/master_bioinformatik/übungen/2_Semester/data_mining_maschinelles_lernen/data-mining/src/utils/utility.py:30: RuntimeWarning: invalid value encountered in divide\n", + " res = ((a - np.expand_dims(mns, axis=axis)) /\n" + ] + } + ], "source": [ - "# main run model wrapper\n", - "main(run_Sub_KNN,params,'Sub_KNN',data_folders = '../../../data/', model_type='unsupervised',output_dir = '../../../docs/evaluation/')" + "# find . -name \"*.DS_Store\" -type f -delete\n", + "model = 'Sub_KNN'\n", + "output_path = '../../../docs/evaluation/'\n", + "\n", + "main(run_Sub_KNN,params,model,data_folders = '../../../data/', model_type='unsupervised',output_dir = output_path)" ] }, { @@ -74,9 +247,40 @@ "outputs": [], "source": [ "import pandas as pd\n", + "import os\n", + "data_empty = {\n", + " 'params':[],\n", + " 'file_name': [],\n", + " 'duration': [],\n", + " 'group': [],\n", + " 'point anomaly': [],\n", + " 'seq anomaly': [],\n", + " 'AUC-PR': [],\n", + " 'AUC-ROC': [],\n", + " 'VUS-PR': [],\n", + " 'VUS-ROC': [],\n", + " 'Standard-F1': [],\n", + " 'PA-F1': [],\n", + " 'Event-based-F1': [],\n", + " 'R-based-F1': [],\n", + " 'Affiliation-F': [],\n", + " 'Recall': [],\n", + " 'Precision': []\n", + "}\n", + "\n", + "df = pd.DataFrame(data_empty)\n", + "\n", + "path = '../../../docs/evaluation/'\n", + "#concant all batch-files to big one\n", + "for file in os.listdir(path):\n", + " file_path = os.path.join(path,file_path)\n", + " #check if current file belongs to selected model and avoid overwriting existing model.csv data\n", + " if file.startswith(model) and file.split('.')[0] != model:\n", + " df_batch = pd.read_csv(file_path)\n", + " #join with dataframe with all data\n", + " df = pd.concat(df,df_batch)\n", "\n", - "path = '../../../docs/evaluation/Sub_KNN.csv'\n", - "df = pd.read_csv(path)" + "df.shape" ] }, { @@ -87,11 +291,34 @@ "source": [ "df.head()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(output_path+model+'.csv')" + ] } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" } }, "nbformat": 4, diff --git a/src/models/sofia_modelle/call_cnn.py b/src/models/sofia_modelle/call_cnn.py new file mode 100644 index 0000000000000000000000000000000000000000..99536904dc49a13f97335418a79544839f2e32a1 --- /dev/null +++ b/src/models/sofia_modelle/call_cnn.py @@ -0,0 +1,27 @@ +from CNN import CNN +import sys +import pathlib +sys.path.append(str(pathlib.Path.absolute)+ '../../') +from src.utils.slidingWindows import find_length_rank +from src.run_model_wrapper import main + +#optimal hyperparameters from autors: 'POLY': {'periodicity': 1, 'power': 4} +params = { + 'window_size': [50, 100, 150], + 'num_channel': [[32, 32, 40], [16, 32, 64]] + } + +def run_CNN(data_train, data_test, window_size=100, num_channel=[32, 32, 40], lr=0.0008, n_jobs=1): + clf = CNN(window_size=window_size, num_channel=num_channel, feats=data_test.shape[1], lr=lr, batch_size=128) + clf.fit(data_train) + score = clf.decision_function(data_test) + return score.ravel() + +model = 'CNN' +output_path = '../../../docs/evaluation/' + +#writes results in .csv +main(run_CNN,params,model,data_folders = '../../../data/', model_type='semi-supervised',output_dir = output_path) + +#pip3 install -r requirements.txt +# python src/models/desi/call_poly.py \ No newline at end of file diff --git a/src/models/sofia_modelle/call_iforest.py b/src/models/sofia_modelle/call_iforest.py new file mode 100644 index 0000000000000000000000000000000000000000..50631433019223c5b60f3280ca0b6f465777e9b4 --- /dev/null +++ b/src/models/sofia_modelle/call_iforest.py @@ -0,0 +1,26 @@ +from IForest import IForest +import sys +import pathlib +sys.path.append(str(pathlib.Path.absolute)+ '../../') +from src.utils.slidingWindows import find_length_rank +from src.run_model_wrapper import main + +#optimal hyperparameters from autors: 'POLY': {'periodicity': 1, 'power': 4} +params = { + 'n_estimators': [25, 50, 100, 150, 200] + } + +def run_IForest(data, slidingWindow=100, n_estimators=100, max_features=1, n_jobs=1): + clf = IForest(slidingWindow=slidingWindow, n_estimators=n_estimators, max_features=max_features, n_jobs=n_jobs) + clf.fit(data) + score = clf.decision_scores_ + return score.ravel() + +model = 'IForest' +output_path = '../../../docs/evaluation/' + +#writes results in .csv +main(run_IForest,params,model,data_folders = '../../../data/', model_type='unsupervised',output_dir = output_path) + +#pip3 install -r requirements.txt +# python src/models/desi/call_poly.py \ No newline at end of file diff --git a/src/models/sofia_modelle/call_knn.py b/src/models/sofia_modelle/call_knn.py new file mode 100644 index 0000000000000000000000000000000000000000..efa40fadcf7cbcecf779bf44b36107bd369549b2 --- /dev/null +++ b/src/models/sofia_modelle/call_knn.py @@ -0,0 +1,28 @@ +from KNN import KNN +import sys +import pathlib +sys.path.append(str(pathlib.Path.absolute)+ '../../') +from src.utils.slidingWindows import find_length_rank +from src.run_model_wrapper import main + +#optimal hyperparameters from autors: 'POLY': {'periodicity': 1, 'power': 4} +params = { + 'n_neighbors': [10, 20, 30, 40, 50], + 'method': ['largest', 'mean', 'median'] + } + +def run_Sub_KNN(data, n_neighbors=10, method='largest', periodicity=1, n_jobs=1): + slidingWindow = find_length_rank(data, rank=periodicity) + clf = KNN(slidingWindow=slidingWindow, n_neighbors=n_neighbors,method=method, n_jobs=n_jobs) + clf.fit(data) + score = clf.decision_scores_ + return score.ravel() + +model = 'Sub_KNN' +output_path = '../../../docs/evaluation/' + +#writes results in .csv +main(run_Sub_KNN,params,model,data_folders = '../../../data/', model_type='unsupervised',output_dir = output_path) + +#pip3 install -r requirements.txt +# python src/models/desi/call_poly.py \ No newline at end of file