Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
Data Mining Project
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Desiree Wyrzykala
Data Mining Project
Commits
6d2ba2da
Commit
6d2ba2da
authored
2 months ago
by
DesireeWyrzylala
Browse files
Options
Downloads
Patches
Plain Diff
prepare working with uni server
parent
c4fb08bd
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/models/desi/call_poly.py
+25
-0
25 additions, 0 deletions
src/models/desi/call_poly.py
src/run_model_wrapper.py
+33
-37
33 additions, 37 deletions
src/run_model_wrapper.py
with
58 additions
and
37 deletions
src/models/desi/call_poly.py
0 → 100644
+
25
−
0
View file @
6d2ba2da
from
POLY
import
POLY
import
sys
import
pathlib
sys
.
path
.
append
(
str
(
pathlib
.
Path
.
absolute
)
+
'
../../
'
)
from
src.utils.slidingWindows
import
find_length_rank
from
src.run_model_wrapper
import
main
#optimal hyperparameters from autors: 'POLY': {'periodicity': 1, 'power': 4}
params
=
{
'
periodicity
'
:
[
1
,
2
,
3
],
'
power
'
:
[
1
,
2
,
3
,
4
,
5
,
7
]
}
def
run_POLY
(
data
,
periodicity
=
1
,
power
=
3
,
n_jobs
=
1
):
slidingWindow
=
find_length_rank
(
data
,
rank
=
periodicity
)
clf
=
POLY
(
power
=
power
,
window
=
slidingWindow
)
clf
.
fit
(
data
)
score
=
clf
.
decision_scores_
return
score
.
ravel
()
model
=
'
POLY
'
output_path
=
'
../../../docs/evaluation/
'
#writes results in .csv
main
(
run_POLY
,
params
,
model
,
data_folders
=
'
../../../data/
'
,
model_type
=
'
unsupervised
'
,
output_dir
=
output_path
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
src/run_model_wrapper.py
+
33
−
37
View file @
6d2ba2da
...
...
@@ -47,16 +47,13 @@ Unsupervise_AD_Pool = ['FFT', 'SR', 'IForest', 'LOF', 'POLY', 'MatrixProfile', '
Semisupervise_AD_Pool = [
'
SAND
'
,
'
OCSVM
'
,
'
AutoEncoder
'
,
'
CNN
'
,
'
LSTMAD
'
,
'
USAD
'
,
'
OmniAnomaly
'
]
'''
def
hyperparameter_optimization
(
run_model
,
data
,
label
,
train_data
,
hyperparams
,
slidingWindow
,
model_type
=
'
unsupervised
'
):
def
hyperparameter_optimization
(
run_model
,
data
,
label
,
train_data
,
hyperparams
,
slidingWindow
,
param_combinations
,
model_type
=
'
unsupervised
'
):
#ignore warnings for hyperparameter tuning
warnings
.
simplefilter
(
"
ignore
"
,
category
=
UndefinedMetricWarning
)
#warnings.simplefilter("ignore", category=np.RankWarning)
best_params
=
None
best_score
=
float
(
"
-inf
"
)
# Creates combinations of all hyperparameters
param_combinations
=
list
(
itertools
.
product
(
*
hyperparams
.
values
()))
#test each combination of hyperparameter and save best ones
for
param_set
in
param_combinations
:
params
=
dict
(
zip
(
hyperparams
.
keys
(),
param_set
))
...
...
@@ -262,41 +259,40 @@ def main(run_model, hyperparams, model_name, data_folders = '../../../data/', mo
path_data_all
=
data_folders
+
'
train/all
'
path_to_folder
=
data_folders
+
'
train/
'
for
folder
in
os
.
listdir
(
path_data_all
):
#go through all files in train data and store additionally informations about group and anomaly types
grid_List
=
[]
print
(
f
"
Start Processing files
"
)
#reutrns [{file_name:str,train_data:list,data:list, label:list, sliding_window:int}]
path
=
os
.
path
.
join
(
path_data_all
,
folder
)
file_data_dict_list
=
preprocess_data
(
path
)
#go through all files in train data and store additionally informations about group and anomaly types
grid_List
=
[]
print
(
f
"
Start Processing files
"
)
#reutrns [{file_name:str,train_data:list,data:list, label:list, sliding_window:int}]
file_data_dict_list
=
preprocess_data
(
path_data_all
)
print
(
'
Start Hyperparameter Tuning
'
)
#find hyperparametrs for each file:
for
time_series
in
file_data_dict_list
:
filename
=
time_series
[
'
file_name
'
]
data
=
time_series
[
'
data
'
]
train_data
=
time_series
[
'
train_data
'
]
label
=
time_series
[
'
label
'
]
sliding_window
=
time_series
[
'
sliding_window
'
]
print
(
'
Start Hyperparameter Tuning
'
)
# Creates combinations of all hyperparameters
param_combinations
=
list
(
itertools
.
product
(
*
hyperparams
.
values
()))
#find hyperparametrs for each file:
for
time_series
in
file_data_dict_list
:
filename
=
time_series
[
'
file_name
'
]
data
=
time_series
[
'
data
'
]
train_data
=
time_series
[
'
train_data
'
]
label
=
time_series
[
'
label
'
]
sliding_window
=
time_series
[
'
sliding_window
'
]
# Hyperparameter-Optimierung
best_params
=
hyperparameter_optimization
(
run_model
,
data
,
label
,
train_data
,
hyperparams
,
sliding_window
,
model_type
)
print
(
f
"
File:
{
filename
}
, best hyperparameter:
{
best_params
}
"
)
#grid_list looks like this: [{ params: {key:value}, file_name:str,train_data:list,data:list, label:list, sliding_window:int}]
grid_List
.
append
({
'
file_name
'
:
filename
,
'
params
'
:
best_params
,
'
train_data
'
:
train_data
,
'
data
'
:
data
,
'
label
'
:
label
,
'
sliding_window
'
:
sliding_window
})
# Hyperparameter-Optimierung
best_params
=
hyperparameter_optimization
(
run_model
,
data
,
label
,
train_data
,
hyperparams
,
sliding_window
,
param_combinations
,
model_type
)
print
(
f
"
File:
{
filename
}
, best hyperparameter:
{
best_params
}
"
)
#grid_list looks like this: [{ params: {key:value}, file_name:str,train_data:list,data:list, label:list, sliding_window:int}]
grid_List
.
append
({
'
file_name
'
:
filename
,
'
params
'
:
best_params
,
'
train_data
'
:
train_data
,
'
data
'
:
data
,
'
label
'
:
label
,
'
sliding_window
'
:
sliding_window
})
print
(
'
Start Training and evaluation
'
)
results_list
=
train_test_model
(
run_model
,
grid_List
,
path_to_folder
,
model_type
)
#convert list of dicts to a dict with lists
results
=
create_dict_from_array
(
results_list
)
print
(
'
Start Training and evaluation
'
)
results_list
=
train_test_model
(
run_model
,
grid_List
,
path_to_folder
,
model_type
)
#convert list of dicts to a dict with lists
results
=
create_dict_from_array
(
results_list
)
#check duration time
duration
=
sum
(
results
[
'
duration
'
])
print
(
f
'
Job done. Duration:
{
duration
:
.
3
f
}
seconds
'
)
#check duration time
duration
=
sum
(
results
[
'
duration
'
])
print
(
f
'
Job done. Duration:
{
duration
:
.
3
f
}
seconds
'
)
#store results for each model by folder
results_df
=
pd
.
DataFrame
(
results
)
output_csv
=
os
.
path
.
join
(
output_dir
,
model_name
+
folder
+
'
.csv
'
)
results_df
.
to_csv
(
output_csv
,
index
=
False
)
print
(
f
"
Ergebnisse gespeichert in:
{
output_csv
}
"
)
time
.
sleep
(
60
)
#store results for each model by folder
results_df
=
pd
.
DataFrame
(
results
)
output_csv
=
os
.
path
.
join
(
output_dir
,
model_name
+
'
.csv
'
)
results_df
.
to_csv
(
output_csv
,
index
=
False
)
print
(
f
"
Ergebnisse gespeichert in:
{
output_csv
}
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment