Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
instance_segmentation_classic
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Maciej Wielgosz
instance_segmentation_classic
Commits
68033c66
Commit
68033c66
authored
2 years ago
by
Maciej Wielgosz
Browse files
Options
Downloads
Patches
Plain Diff
update of feature importance script
parent
fcfc03fb
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
helpers/find_param_importance.py
+11
-7
11 additions, 7 deletions
helpers/find_param_importance.py
with
11 additions
and
7 deletions
helpers/find_param_importance.py
+
11
−
7
View file @
68033c66
...
@@ -162,8 +162,10 @@ class FindParamImportance:
...
@@ -162,8 +162,10 @@ class FindParamImportance:
ss
=
StandardScaler
()
ss
=
StandardScaler
()
X_scaled
=
ss
.
fit_transform
(
X
)
X_scaled
=
ss
.
fit_transform
(
X
)
# remove from X_scaled the columns that have zeros or nan
# remove from X_scaled the columns that have nan values
X_scaled
=
X_scaled
[:,
~
np
.
all
(
X_scaled
==
0
,
axis
=
0
)]
X_scaled
=
X_scaled
[:,
~
np
.
isnan
(
X_scaled
).
any
(
axis
=
0
)]
# replace nan values with 0
X_scaled
=
np
.
nan_to_num
(
X_scaled
)
# compute the correlation matrix and put values in the figur
# compute the correlation matrix and put values in the figur
corr
=
np
.
corrcoef
(
X_scaled
.
T
)
corr
=
np
.
corrcoef
(
X_scaled
.
T
)
...
@@ -184,10 +186,11 @@ class FindParamImportance:
...
@@ -184,10 +186,11 @@ class FindParamImportance:
upper
=
np
.
triu
(
corr
)
upper
=
np
.
triu
(
corr
)
# find the indices of the upper triangle that are not zero
# find the indices of the upper triangle that are not zero
# these are the indices of the correlated features
# these are the indices of the correlated features
correlated_features
=
np
.
where
(
upper
>
0.1
)
correlated_features
=
np
.
where
(
upper
>
0.1
5
)
# get the feature names
# get the feature names
feature_names
=
X
.
columns
feature_names
=
X
.
columns
# print the correlated features
# print the correlated features
print
(
'
Correlated features:
'
)
for
i
in
range
(
len
(
correlated_features
[
0
])):
for
i
in
range
(
len
(
correlated_features
[
0
])):
if
correlated_features
[
0
][
i
]
!=
correlated_features
[
1
][
i
]:
if
correlated_features
[
0
][
i
]
!=
correlated_features
[
1
][
i
]:
print
(
feature_names
[
correlated_features
[
0
][
i
]],
feature_names
[
correlated_features
[
1
][
i
]])
print
(
feature_names
[
correlated_features
[
0
][
i
]],
feature_names
[
correlated_features
[
1
][
i
]])
...
@@ -211,12 +214,13 @@ class FindParamImportance:
...
@@ -211,12 +214,13 @@ class FindParamImportance:
X_highest_mean
=
X_highest
.
mean
()
X_highest_mean
=
X_highest
.
mean
()
# print the features with the highest mean values
# print the features with the highest mean values
print
(
'
'
)
print
(
'
Features with the highest mean values:
'
)
print
(
'
Feature name
'
,
'
Mean value
'
)
for
i
in
range
(
len
(
X_highest_mean
)):
for
i
in
range
(
len
(
X_highest_mean
)):
print
(
X_highest_mean
.
index
[
i
],
X_highest_mean
[
i
])
print
(
X_highest_mean
.
index
[
i
],
X_highest_mean
[
i
])
def
gen_plot_of_feature_importance
(
self
,
feature_importance
):
def
gen_plot_of_feature_importance
(
self
,
feature_importance
):
plt
.
figure
(
figsize
=
(
10
,
6
))
plt
.
figure
(
figsize
=
(
10
,
6
))
plt
.
barh
(
feature_importance
[
'
feature
'
],
feature_importance
[
'
importance
'
])
plt
.
barh
(
feature_importance
[
'
feature
'
],
feature_importance
[
'
importance
'
])
...
@@ -236,8 +240,8 @@ class FindParamImportance:
...
@@ -236,8 +240,8 @@ class FindParamImportance:
print
(
'
Done
'
)
print
(
'
Done
'
)
print
(
'
Plot saved to:
'
,
self
.
plot_file_path
)
print
(
'
Plot saved to:
'
,
self
.
plot_file_path
)
cluster_labels
=
self
.
cluster_results_kmeans
()
#
cluster_labels = self.cluster_results_kmeans()
cluster_labels_dbscan
=
self
.
cluster_dbscan
()
#
cluster_labels_dbscan = self.cluster_dbscan()
self
.
find_correlation
()
self
.
find_correlation
()
self
.
find_highest_params_values
()
self
.
find_highest_params_values
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment