From 860a14599ce542df11388ff17c5a1498909bf05d Mon Sep 17 00:00:00 2001 From: anhtx <sofia.wuckert@student.uni-halle.de> Date: Sun, 2 Feb 2025 12:21:52 +0100 Subject: [PATCH] modelle angepasst --- src/models/sofia_modelle/CNN.ipynb | 96 +++++++++++++++++++++++++- src/models/sofia_modelle/IForest.ipynb | 93 ++++++++++++++++++++++++- src/models/sofia_modelle/KNN.ipynb | 45 +++++++++++- 3 files changed, 229 insertions(+), 5 deletions(-) diff --git a/src/models/sofia_modelle/CNN.ipynb b/src/models/sofia_modelle/CNN.ipynb index abdac9f..b8fb945 100644 --- a/src/models/sofia_modelle/CNN.ipynb +++ b/src/models/sofia_modelle/CNN.ipynb @@ -7,7 +7,7 @@ "outputs": [], "source": [ "# importe\n", - "from IForest import IForest\n", + "from CNN import CNN\n", "import sys\n", "import pathlib\n", "sys.path.append(str(pathlib.Path.absolute)+ '../../')\n", @@ -20,7 +20,99 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "params = {\n", + " 'window_size': [50, 100, 150],\n", + " 'num_channel': [[32, 32, 40], [16, 32, 64]]\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_CNN(data_train, data_test, window_size=100, num_channel=[32, 32, 40], lr=0.0008, n_jobs=1):\n", + " clf = CNN(window_size=window_size, num_channel=num_channel, feats=data_test.shape[1], lr=lr, batch_size=128)\n", + " clf.fit(data_train)\n", + " score = clf.decision_function(data_test)\n", + " return score.ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = 'CNN'\n", + "output_path = '../../../docs/evaluation/'\n", + "\n", + "main(run_CNN,params,model,data_folders = '../../../data/', model_type='semi-supervised',output_dir = output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "data_empty = {\n", + " 'params':[],\n", + " 'file_name': [],\n", + " 'duration': [],\n", + " 'group': [],\n", + " 'point anomaly': [],\n", + " 'seq anomaly': [],\n", + " 'AUC-PR': [],\n", + " 'AUC-ROC': [],\n", + " 'VUS-PR': [],\n", + " 'VUS-ROC': [],\n", + " 'Standard-F1': [],\n", + " 'PA-F1': [],\n", + " 'Event-based-F1': [],\n", + " 'R-based-F1': [],\n", + " 'Affiliation-F': [],\n", + " 'Recall': [],\n", + " 'Precision': []\n", + "}\n", + "\n", + "df = pd.DataFrame(data_empty)\n", + "\n", + "path = '../../../docs/evaluation/'\n", + "model = 'CNN'\n", + "#concant all batch-files to big one\n", + "for file in os.listdir(path):\n", + " file_path = os.path.join(path,file_path)\n", + " #check if current file belongs to selected model and avoid overwriting existing model.csv data\n", + " if file.startswith(model) and file.split('.')[0] != model:\n", + " df_batch = pd.read_csv(file_path)\n", + " #join with dataframe with all data\n", + " df = pd.concat(df,df_batch)\n", + "\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(output_path+model+'.csv')" + ] } ], "metadata": { diff --git a/src/models/sofia_modelle/IForest.ipynb b/src/models/sofia_modelle/IForest.ipynb index abdac9f..2de109e 100644 --- a/src/models/sofia_modelle/IForest.ipynb +++ b/src/models/sofia_modelle/IForest.ipynb @@ -20,7 +20,98 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "params = {\n", + " 'n_estimators': [25, 50, 100, 150, 200]\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_IForest(data, slidingWindow=100, n_estimators=100, max_features=1, n_jobs=1):\n", + " clf = IForest(slidingWindow=slidingWindow, n_estimators=n_estimators, max_features=max_features, n_jobs=n_jobs)\n", + " clf.fit(data)\n", + " score = clf.decision_scores_\n", + " return score.ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = 'IForest'\n", + "output_path = '../../../docs/evaluation/'\n", + "\n", + "main(run_IForest,params,model,data_folders = '../../../data/', model_type='unsupervised',output_dir = output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "data_empty = {\n", + " 'params':[],\n", + " 'file_name': [],\n", + " 'duration': [],\n", + " 'group': [],\n", + " 'point anomaly': [],\n", + " 'seq anomaly': [],\n", + " 'AUC-PR': [],\n", + " 'AUC-ROC': [],\n", + " 'VUS-PR': [],\n", + " 'VUS-ROC': [],\n", + " 'Standard-F1': [],\n", + " 'PA-F1': [],\n", + " 'Event-based-F1': [],\n", + " 'R-based-F1': [],\n", + " 'Affiliation-F': [],\n", + " 'Recall': [],\n", + " 'Precision': []\n", + "}\n", + "\n", + "df = pd.DataFrame(data_empty)\n", + "\n", + "path = '../../../docs/evaluation/'\n", + "model = 'IForest'\n", + "#concant all batch-files to big one\n", + "for file in os.listdir(path):\n", + " file_path = os.path.join(path,file_path)\n", + " #check if current file belongs to selected model and avoid overwriting existing model.csv data\n", + " if file.startswith(model) and file.split('.')[0] != model:\n", + " df_batch = pd.read_csv(file_path)\n", + " #join with dataframe with all data\n", + " df = pd.concat(df,df_batch)\n", + "\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(output_path+model+'.csv')" + ] } ], "metadata": { diff --git a/src/models/sofia_modelle/KNN.ipynb b/src/models/sofia_modelle/KNN.ipynb index 5a23ba6..436b14c 100644 --- a/src/models/sofia_modelle/KNN.ipynb +++ b/src/models/sofia_modelle/KNN.ipynb @@ -736,9 +736,41 @@ "outputs": [], "source": [ "import pandas as pd\n", + "import os\n", + "data_empty = {\n", + " 'params':[],\n", + " 'file_name': [],\n", + " 'duration': [],\n", + " 'group': [],\n", + " 'point anomaly': [],\n", + " 'seq anomaly': [],\n", + " 'AUC-PR': [],\n", + " 'AUC-ROC': [],\n", + " 'VUS-PR': [],\n", + " 'VUS-ROC': [],\n", + " 'Standard-F1': [],\n", + " 'PA-F1': [],\n", + " 'Event-based-F1': [],\n", + " 'R-based-F1': [],\n", + " 'Affiliation-F': [],\n", + " 'Recall': [],\n", + " 'Precision': []\n", + "}\n", "\n", - "path = '../../../docs/evaluation/Sub_KNN.csv'\n", - "df = pd.read_csv(path)" + "df = pd.DataFrame(data_empty)\n", + "\n", + "path = '../../../docs/evaluation/'\n", + "model = 'Sub_KNN'\n", + "#concant all batch-files to big one\n", + "for file in os.listdir(path):\n", + " file_path = os.path.join(path,file_path)\n", + " #check if current file belongs to selected model and avoid overwriting existing model.csv data\n", + " if file.startswith(model) and file.split('.')[0] != model:\n", + " df_batch = pd.read_csv(file_path)\n", + " #join with dataframe with all data\n", + " df = pd.concat(df,df_batch)\n", + "\n", + "df.shape" ] }, { @@ -749,6 +781,15 @@ "source": [ "df.head()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(output_path+model+'.csv')" + ] } ], "metadata": { -- GitLab