From 860a14599ce542df11388ff17c5a1498909bf05d Mon Sep 17 00:00:00 2001
From: anhtx <sofia.wuckert@student.uni-halle.de>
Date: Sun, 2 Feb 2025 12:21:52 +0100
Subject: [PATCH] modelle angepasst

---
 src/models/sofia_modelle/CNN.ipynb     | 96 +++++++++++++++++++++++++-
 src/models/sofia_modelle/IForest.ipynb | 93 ++++++++++++++++++++++++-
 src/models/sofia_modelle/KNN.ipynb     | 45 +++++++++++-
 3 files changed, 229 insertions(+), 5 deletions(-)

diff --git a/src/models/sofia_modelle/CNN.ipynb b/src/models/sofia_modelle/CNN.ipynb
index abdac9f..b8fb945 100644
--- a/src/models/sofia_modelle/CNN.ipynb
+++ b/src/models/sofia_modelle/CNN.ipynb
@@ -7,7 +7,7 @@
    "outputs": [],
    "source": [
     "# importe\n",
-    "from IForest import IForest\n",
+    "from CNN import CNN\n",
     "import sys\n",
     "import pathlib\n",
     "sys.path.append(str(pathlib.Path.absolute)+ '../../')\n",
@@ -20,7 +20,99 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "params = {\n",
+    "        'window_size': [50, 100, 150],\n",
+    "        'num_channel': [[32, 32, 40], [16, 32, 64]]\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run_CNN(data_train, data_test, window_size=100, num_channel=[32, 32, 40], lr=0.0008, n_jobs=1):\n",
+    "    clf = CNN(window_size=window_size, num_channel=num_channel, feats=data_test.shape[1], lr=lr, batch_size=128)\n",
+    "    clf.fit(data_train)\n",
+    "    score = clf.decision_function(data_test)\n",
+    "    return score.ravel()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = 'CNN'\n",
+    "output_path = '../../../docs/evaluation/'\n",
+    "\n",
+    "main(run_CNN,params,model,data_folders = '../../../data/', model_type='semi-supervised',output_dir = output_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "data_empty = {\n",
+    "    'params':[],\n",
+    "    'file_name': [],\n",
+    "    'duration': [],\n",
+    "    'group': [],\n",
+    "    'point anomaly': [],\n",
+    "    'seq anomaly': [],\n",
+    "    'AUC-PR': [],\n",
+    "    'AUC-ROC': [],\n",
+    "    'VUS-PR': [],\n",
+    "    'VUS-ROC': [],\n",
+    "    'Standard-F1': [],\n",
+    "    'PA-F1': [],\n",
+    "    'Event-based-F1': [],\n",
+    "    'R-based-F1': [],\n",
+    "    'Affiliation-F': [],\n",
+    "    'Recall': [],\n",
+    "    'Precision': []\n",
+    "}\n",
+    "\n",
+    "df = pd.DataFrame(data_empty)\n",
+    "\n",
+    "path = '../../../docs/evaluation/'\n",
+    "model = 'CNN'\n",
+    "#concant all batch-files to big one\n",
+    "for file in os.listdir(path):\n",
+    "    file_path = os.path.join(path,file_path)\n",
+    "    #check if current file belongs to selected model and avoid overwriting existing model.csv data\n",
+    "    if file.startswith(model) and file.split('.')[0] != model:\n",
+    "        df_batch = pd.read_csv(file_path)\n",
+    "        #join with dataframe with all data\n",
+    "        df = pd.concat(df,df_batch)\n",
+    "\n",
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(output_path+model+'.csv')"
+   ]
   }
  ],
  "metadata": {
diff --git a/src/models/sofia_modelle/IForest.ipynb b/src/models/sofia_modelle/IForest.ipynb
index abdac9f..2de109e 100644
--- a/src/models/sofia_modelle/IForest.ipynb
+++ b/src/models/sofia_modelle/IForest.ipynb
@@ -20,7 +20,98 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "params = {\n",
+    "        'n_estimators': [25, 50, 100, 150, 200]\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run_IForest(data, slidingWindow=100, n_estimators=100, max_features=1, n_jobs=1):\n",
+    "    clf = IForest(slidingWindow=slidingWindow, n_estimators=n_estimators, max_features=max_features, n_jobs=n_jobs)\n",
+    "    clf.fit(data)\n",
+    "    score = clf.decision_scores_\n",
+    "    return score.ravel()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = 'IForest'\n",
+    "output_path = '../../../docs/evaluation/'\n",
+    "\n",
+    "main(run_IForest,params,model,data_folders = '../../../data/', model_type='unsupervised',output_dir = output_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "data_empty = {\n",
+    "    'params':[],\n",
+    "    'file_name': [],\n",
+    "    'duration': [],\n",
+    "    'group': [],\n",
+    "    'point anomaly': [],\n",
+    "    'seq anomaly': [],\n",
+    "    'AUC-PR': [],\n",
+    "    'AUC-ROC': [],\n",
+    "    'VUS-PR': [],\n",
+    "    'VUS-ROC': [],\n",
+    "    'Standard-F1': [],\n",
+    "    'PA-F1': [],\n",
+    "    'Event-based-F1': [],\n",
+    "    'R-based-F1': [],\n",
+    "    'Affiliation-F': [],\n",
+    "    'Recall': [],\n",
+    "    'Precision': []\n",
+    "}\n",
+    "\n",
+    "df = pd.DataFrame(data_empty)\n",
+    "\n",
+    "path = '../../../docs/evaluation/'\n",
+    "model = 'IForest'\n",
+    "#concant all batch-files to big one\n",
+    "for file in os.listdir(path):\n",
+    "    file_path = os.path.join(path,file_path)\n",
+    "    #check if current file belongs to selected model and avoid overwriting existing model.csv data\n",
+    "    if file.startswith(model) and file.split('.')[0] != model:\n",
+    "        df_batch = pd.read_csv(file_path)\n",
+    "        #join with dataframe with all data\n",
+    "        df = pd.concat(df,df_batch)\n",
+    "\n",
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(output_path+model+'.csv')"
+   ]
   }
  ],
  "metadata": {
diff --git a/src/models/sofia_modelle/KNN.ipynb b/src/models/sofia_modelle/KNN.ipynb
index 5a23ba6..436b14c 100644
--- a/src/models/sofia_modelle/KNN.ipynb
+++ b/src/models/sofia_modelle/KNN.ipynb
@@ -736,9 +736,41 @@
    "outputs": [],
    "source": [
     "import pandas as pd\n",
+    "import os\n",
+    "data_empty = {\n",
+    "    'params':[],\n",
+    "    'file_name': [],\n",
+    "    'duration': [],\n",
+    "    'group': [],\n",
+    "    'point anomaly': [],\n",
+    "    'seq anomaly': [],\n",
+    "    'AUC-PR': [],\n",
+    "    'AUC-ROC': [],\n",
+    "    'VUS-PR': [],\n",
+    "    'VUS-ROC': [],\n",
+    "    'Standard-F1': [],\n",
+    "    'PA-F1': [],\n",
+    "    'Event-based-F1': [],\n",
+    "    'R-based-F1': [],\n",
+    "    'Affiliation-F': [],\n",
+    "    'Recall': [],\n",
+    "    'Precision': []\n",
+    "}\n",
     "\n",
-    "path = '../../../docs/evaluation/Sub_KNN.csv'\n",
-    "df = pd.read_csv(path)"
+    "df = pd.DataFrame(data_empty)\n",
+    "\n",
+    "path = '../../../docs/evaluation/'\n",
+    "model = 'Sub_KNN'\n",
+    "#concant all batch-files to big one\n",
+    "for file in os.listdir(path):\n",
+    "    file_path = os.path.join(path,file_path)\n",
+    "    #check if current file belongs to selected model and avoid overwriting existing model.csv data\n",
+    "    if file.startswith(model) and file.split('.')[0] != model:\n",
+    "        df_batch = pd.read_csv(file_path)\n",
+    "        #join with dataframe with all data\n",
+    "        df = pd.concat(df,df_batch)\n",
+    "\n",
+    "df.shape"
    ]
   },
   {
@@ -749,6 +781,15 @@
    "source": [
     "df.head()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(output_path+model+'.csv')"
+   ]
   }
  ],
  "metadata": {
-- 
GitLab