diff --git a/docs/evaluation/Ranking der Modelle im Vergleich zu den Autoren.md b/docs/evaluation/Ranking der Modelle im Vergleich zu den Autoren.md index 3fd162808f3f63a86e890fed0bed37224c68400e..54a132d4b63ebaeecd6884e528f1ee75eb30c23d 100644 --- a/docs/evaluation/Ranking der Modelle im Vergleich zu den Autoren.md +++ b/docs/evaluation/Ranking der Modelle im Vergleich zu den Autoren.md @@ -2,18 +2,18 @@ | Platz nach Autoren | Model | VUS-PR nach Autoren | AUC-PR | VUS-PR | Platz nach AUC-PR | Platz nach VUS-PR | | ------------------ | ----------- | ------------------- | ------ | ------ | ----------------- | ----------------- | | 1 | Sub-PCA | 0.42 | 0.407 | 0.444 | 2 | 4 | -| 2 | KShapeAD | 0.40 | 0.383 | 0.423 | | | +| 2 | KShapeAD | 0.40 | 0.383 | 0.423 | 4 | 6 | | 3 | POLY | 0.39 | 0.394 | 0.478 | 3 | 2 | | 4 | KMeansAD | 0.37 | 0.444 | 0.489 | 1 | 1 | -| 5 | USAD | 0.36 | 0.352 | 0.375 | | | -| 6 | Sub-KNN | 0.35 | 0.292 | 0.384 | | | -| 7 | CNN | 0.34 | 0.366 | 0.367 | | | -| 8 | LSTMAD | 0.33 | 0.313 | 0.320 | | | -| 9 | IForest | 0.30 | 0.304 | 0.341 | | | -| 10 | OmniAnomaly | 0.29 | 0.354 | 0.377 | | | -| 11 | Sub-LOF | 0.25 | 0.374 | 0.473 | | 3 | -| 12 | Sub-OCSVM | 0.23 | 0.275 | 0.339 | | | -| 13 | Sub-iForest | 0.22 | | | | | +| 5 | USAD | 0.36 | 0.352 | 0.375 | 8 | 8 | +| 6 | Sub-KNN | 0.35 | 0.351 | 0.431 | 9 | 5 | +| 7 | CNN | 0.34 | 0.366 | 0.367 | 6 | 9 | +| 8 | LSTMAD | 0.33 | 0.313 | 0.320 | 11 | 13 | +| 9 | IForest | 0.30 | 0.304 | 0.341 | 12 | 11 | +| 10 | OmniAnomaly | 0.29 | 0.354 | 0.377 | 7 | 7 | +| 11 | Sub-LOF | 0.25 | 0.374 | 0.473 | 5 | 3 | +| 12 | Sub-OCSVM | 0.23 | 0.275 | 0.339 | 13 | 12 | +| 13 | Sub-iForest | 0.22 | 0.326 | 0.362 | 10 | 10 | | 14 | LOF | 0.17 | 0.156 | 0.185 | 14 | 14 | # Verbesserungen der Modelle @@ -25,15 +25,16 @@ | POLY | 0.31 | 0.394 | 27.1 | 0.39 | 0.478 | 22.6 | | KMeansAD | 0.32 | 0.444 | 38.8 | 0.37 | 0.489 | 32.2 | | USAD | 0.32 | 0.352 | 9.4 | 0.36 | 0.375 | 4.2 | -| Sub-KNN | 0.27 | 0.292 | 8.15 | 0.35 | 0.384 | 9.7 | -| Sub-KNN V2 | 0.27 | | | 0.35 | | | +| Sub-KNN V2 | 0.27 | 0.351 | 30 | 0.35 | 0.431 | 23.1 | | CNN | 0.33 | 0.366 | 10.9 | 0.34 | 0.367 | 7.9 | | LSTMAD | 0.31 | 0.313 | - | 0.33 | 0.320 | 3.0 | | IForest | 0.29 | 0.304 | 4.8 | 0.30 | 0.341 | 13.7 | -| Sub-iForest | 0.16 | | | 0.22 | | | +| Sub-iForest | 0.16 | 0.326 | 103.8 | 0.22 | 0.362 | 64.5 | | OmniAnomaly | 0.27 | 0.354 | 31.1 | 0.29 | 0.377 | 30 | | Sub-LOF | 0.16 | 0.374 | 133.8 | 0.25 | 0.473 | 89.2 | | Sub-OCSVM | 0.16 | 0.275 | 71.9 | 0.23 | 0.339 | 47.4 | | LOF | 0.14 | 0.156 | 11.4 | 0.17 | 0.185 | 8.8 | -| **Durchschnitt** | | | $\approx$ 28.21 | | | $\approx$ 21.55 | +| **Durchschnitt** | | | $\approx$ 35.17 | | | $\approx$ 25.58 | +| **Range** | | 0.288 | 133.8 | | 0.304 | 86.2 | +| *Alt*: Sub-KNN | 0.27 | 0.292 | 8.15 | 0.35 | 0.384 | 9.7 | diff --git a/docs/evaluation/Reproducibility.md b/docs/evaluation/Reproducibility.md index 027b9b8c5839094ef5a01cd016eb7d2367ec120f..53ab3f4c5263a97ba87c886263d2542f709ff52e 100644 --- a/docs/evaluation/Reproducibility.md +++ b/docs/evaluation/Reproducibility.md @@ -76,9 +76,8 @@ **Allgemein:** * Da eine Grid-Suche pro Zeitreihe durchgeführt wurde und nicht pauschal die optimalen Hyperparameter der Autoren für alle Zeitserien verwendet wurden, ist eine Verbesserung der Modelle zu erwarten + In einigen Datensätzen bei einigen Modellen sind die Ergebnisse jedoch nicht reproduzierbar, selbst mit den optimalen Hyperparametern der Autoren (meist noch schlechtere Werte). Hier nur Vergleich mit VUS-PR möglich, da zu AUC-PR keine Daten pro Datensatz zu Model. - *Missing: Sub-IForest und KNN* *Betreffende Datensätze:* - + **TAO** (Modelle: **PCA**: -0.09; **KShapeAD**: - 0.19; **Poly**: - 0.11; **KMeans**: - 0.17; **IForest**: - 0.26; **Sub-LOF**: - 0.11; **Sub-OCSVM**: - 0.2; **LOF** - 0.2, **Sub-IForest**: -0.17; ) + + **TAO** (Modelle: **PCA**: -0.09; **KShapeAD**: - 0.19; **Poly**: - 0.11; **KMeans**: - 0.17; **IForest**: - 0.26; **Sub-LOF**: - 0.11; **Sub-OCSVM**: - 0.2; **LOF** - 0.2, **Sub-IForest**: -0.17; **Sub-KNN**: - 0.17; ) + **Stock** (Modelle: **IForest**: - 0.25; ) + **MSL** (Modelle: **KShapeAD**: -0.09; ) + **IOPS**: (Modelle: **IForest**: - 0.15; ) @@ -89,18 +88,18 @@ + **SMAP** (Modelle: **Sub-IForest**: - 0.11; ) + **TODS** (Modelle: **Sub-IForest**: - 0.16; ) *Stark Verbesserte Datensätze bei einzelnen Modellen:* -+ **Power** (Modelle: **KShapeAD**: + 0.32; **KMeans**: + 0.13; ) ++ **Power** (Modelle: **KShapeAD**: + 0.32; **KMeans**: + 0.13; **Sub-KNN**: + 0.12; ) + **MSL** (Modelle: **POLY**: + 0.19; **IForest**: + 0.16; **OA**: + 0.18; **Sub-LOF**: + 0.19; **Sub-IForest**: + 0.12; ) + **OPPORTUNITY** (Modelle: **Poly**: + 0.33; **CNN**: +0.22; **IForest**: + 0.31; **Sub-IForest**: + 0.69; ) -+ **Exathlon:** (Modelle: **Poly**: + 0.11; **KMeans**: + 0.21; **IForest**:+ 0.29; **Sub-OCSVM**: + 0.27; **Sub-IForest**: + 0.47; ) -+ **NAB** (Modelle: **Poly**: + 0.1; **IForest**: + 0.12; **OA**: + 0.15; **Sub-LOF**: + 0.21; **Sub-OCSVM**: + 0.12; **Sub-IForest**: + 0.1; ) ++ **Exathlon:** (Modelle: **Poly**: + 0.11; **KMeans**: + 0.21; **IForest**:+ 0.29; **Sub-OCSVM**: + 0.27; **Sub-IForest**: + 0.47; **Sub-KNN**: + 0.13; ) ++ **NAB** (Modelle: **Poly**: + 0.1; **IForest**: + 0.12; **OA**: + 0.15; **Sub-LOF**: + 0.21; **Sub-OCSVM**: + 0.12; **Sub-IForest**: + 0.1; **Sub-KNN**: + 0.12; ) + **NEK** (Modelle: **Poly**: + 0.15; **KMeans:** + 0.12; **USAD**: + 0.1; **Sub-LOF**: + 0.44; **Sub-OCSVM**: + 0.11; **Sub-IForest**: + 0.54; ) + **WSD** (Modelle: **Poly**: + 0.12; **USAD**: + 0.12; **Sub-LOF**: + 0.52; ) -+ **LTDB** (Modelle: **KMeans**: + 0.35; **IForest**: + 0.2; **Sub-OCSVM**: + 0.36; ) -+ **MITDB** (Modelle: **KMeans**: + 0.32; **Sub-LOF**: + 0.11; **Sub-OCSVM**: + 0.21; ) ++ **LTDB** (Modelle: **KMeans**: + 0.35; **IForest**: + 0.2; **Sub-OCSVM**: + 0.36; **Sub-KNN**: + 0.13; ) ++ **MITDB** (Modelle: **KMeans**: + 0.32; **Sub-LOF**: + 0.11; **Sub-OCSVM**: + 0.21; **Sub-KNN**: + 0.19; ) + **SVDB** (Modelle: **KMeans**: + 0.37; **IForest**: + 0.22; **OA**: + 0.36; **Sub-LOF**: + 0.2; **Sub-OCSVM**: + 0.39; ) -+ **UCR** (Modelle: **KMeans**: + 0.11; **Sub-LOF**: + 0.22; **Sub-OCSVM**: + 0.16; ) -+ **Yahoo** (Modelle: **KMeans**: + 0.23; **Sub-LOF**: + 0.15; **Sub-IForest**: + 0.1; ) ++ **UCR** (Modelle: **KMeans**: + 0.11; **Sub-LOF**: + 0.22; **Sub-OCSVM**: + 0.16; **Sub-KNN**: + 0.11; ) ++ **Yahoo** (Modelle: **KMeans**: + 0.23; **Sub-LOF**: + 0.15; **Sub-IForest**: + 0.1; **Sub-KNN**: + 0.21; ) + **Daphnet** (Modelle: **IForest**: + 0.16; **Sub-LOF**: + 0.21; **Sub-IForest**: +0.46; ) + **SED** (Modelle: **IForest**: + 0.46; **Sub-OCSVM**: + 0.1; ) + **Catsv2** (Modelle: **OA**: + 0.16; ) @@ -212,30 +211,30 @@ *Anpassen nach hinzufügen von Sub-IForest und ggf. neues KNN* -| Datensatz | AUC-PR | VUS-PR | Differenz AUC zu VUS-PR | Differenz prozentual | -| ------------ | ------ | ------ | ----------------------- | -------------------- | -| **Exathlon** | 0.658 | 0.659 | 0.001 | | -| **NAB** | 0.341 | 0.367 | 0.026 | | -| **NEK** | 0.566 | 0.592 | 0.026 | | -| **LTDB** | 0.485 | 0.545 | *0.06* | | -| **Dapthnet** | 0.227 | 0.217 | -0.01 | | -| **SWaT** | 0.434 | 0.302 | *-0.132* | | -| **Power** | 0.174 | 0.172 | -0.002 | | -| **TAO** | 0.297 | 0.817 | *0.52* | | -| **TODS** | 0.252 | 0.614 | *0.362* | | -| **Stock** | 0.214 | 0.769 | *0.555* | | -| MITDB | 0.313 | 0.300 | | | -| MSL | 0.357 | 0.412 | | | -| YAHOO | 0.268 | 0.374 | | | -| OPPORTUNITY | 0.464 | 0.464 | | | -| SMAP | 0.426 | 0.470 | | | -| MGAB | 0.095 | 0.081 | | | -| UCR | 0.210 | 0.219 | | | -| SVDB | 0.441 | 0.440 | | | -| SMD | 0.396 | 0.381 | | | -| WSD | 0.206 | 0.188 | | | -| SED | 0.260 | 0.332 | | | -| IOPS | 0.226 | 0.187 | | | -| CATSv2 | 0.394 | 0.266 | | | -| **Gesamt:** | 0.335 | 0.399 | | | +| Datensatz | AUC-PR | VUS-PR | Differenz AUC zu VUS-PR | +| ------------ | ------ | ------ | ----------------------- | +| **Exathlon** | 0.682 | 0.683 | 0.001 | +| **NAB** | 0.346 | 0.371 | 0.025 | +| **NEK** | 0.590 | 0.614 | 0.024 | +| **LTDB** | 0.490 | 0.549 | *0.069* | +| **Dapthnet** | 0.247 | 0.240 | - 0.007 | +| **SWaT** | 0.453 | 0.309 | *- 0.144* | +| **Power** | 0.179 | 0.178 | - 0.001 | +| **TAO** | 0.284 | 0.815 | *0.531* | +| **TODS** | 0.241 | 0.603 | *0.362* | +| **Stock** | 0.210 | 0.769 | *0.559* | +| MITDB | 0.305 | 0.293 | - 0.012 | +| MSL | 0.361 | 0.413 | 0.052 | +| YAHOO | 0.263 | 0.360 | 0.103 | +| OPPORTUNITY | 0.488 | 0.488 | 0.00 | +| SMAP | 0.423 | 0.463 | 0.040 | +| MGAB | 0.088 | 0.080 | 0.00 | +| UCR | 0.213 | 0.219 | 0.006 | +| SVDB | 0.436 | 0.436 | 0.00 | +| SMD | 0.398 | 0.381 | - 0.017 | +| WSD | 0.198 | 0.180 | - 0.018 | +| SED | 0.275 | 0.358 | 0.088 | +| IOPS | 0.225 | 0.184 | - 0.041 | +| CATSv2 | 0.378 | 0.257 | - 0.121 | +| **Gesamt:** | 0.335 | 0.399 | 0.064 | diff --git a/src/group_evaluation/create_heatmap.ipynb b/src/group_evaluation/create_heatmap.ipynb index 07d16e8e566fdcfb434acf85ec35d57b76a30619..cc245bef517dfef99a77e18d72ba7d926f640589 100644 --- a/src/group_evaluation/create_heatmap.ipynb +++ b/src/group_evaluation/create_heatmap.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -245,7 +245,7 @@ "[5 rows x 23 columns]" ] }, - "execution_count": 7, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1932,7 +1932,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -1978,7 +1978,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -1990,7 +1990,7 @@ " dtype='object', name='Model')" ] }, - "execution_count": 10, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } diff --git a/src/models/desi/desi_evaluate_groups.ipynb b/src/models/desi/desi_evaluate_groups.ipynb index 14a616c7b029798574639238e1da6898edd8110f..0a2f81a6b8fc1191b87c8ddf782ecc503ca9d980 100644 --- a/src/models/desi/desi_evaluate_groups.ipynb +++ b/src/models/desi/desi_evaluate_groups.ipynb @@ -5976,47 +5976,47 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "group\n", - "CATSv2 0.361837\n", "Daphnet 0.037377\n", - "Exathlon 0.597445\n", "IOPS 0.105166\n", - "LTDB 0.709125\n", + "WSD 0.106099\n", + "SWaT 0.110668\n", + "SMD 0.193423\n", + "NEK 0.273441\n", "MGAB 0.301167\n", - "MITDB 0.548677\n", "MSL 0.321555\n", - "NAB 0.373190\n", - "NEK 0.273441\n", - "OPPORTUNITY 0.333758\n", "Power 0.333374\n", - "SED 0.882987\n", + "OPPORTUNITY 0.333758\n", + "CATSv2 0.361837\n", + "NAB 0.373190\n", + "UCR 0.475202\n", + "YAHOO 0.521357\n", + "MITDB 0.548677\n", "SMAP 0.573129\n", - "SMD 0.193423\n", + "Exathlon 0.597445\n", "SVDB 0.617909\n", - "SWaT 0.110668\n", + "LTDB 0.709125\n", "Stock 0.719049\n", - "TAO 0.752695\n", "TODS 0.742028\n", - "UCR 0.475202\n", - "WSD 0.106099\n", - "YAHOO 0.521357\n", + "TAO 0.752695\n", + "SED 0.882987\n", "Name: VUS-PR, dtype: float64" ] }, - "execution_count": 31, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Gruppen aufsteigend sortiert nach durchschnittlichem VUS-PR wert\n", - "df_groups['VUS-PR'].mean().sort_index()" + "df_groups['VUS-PR'].mean().sort_values()" ] }, {