-
Thomas Purcell authored
Comment out blank lines
Thomas Purcell authoredComment out blank lines
classification.py 3.64 KiB
# Copyright 2021 Thomas A. R. Purcell
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Retrain the SVM decision boundaries of a classification model
Functions:
update_model_svm: Generate a new model with an updated SVM from sckitlearn
"""
from sklearn import svm
import numpy as np
from sissopp import ModelClassifier
def update_model_svm(model, c=1.0, max_iter=-1, tol=0.0001, filename=None):
"""Generate a new model with an updated SVM from sckitlearn
Args:
model (list of str, str, or ModelClassifier): The model to be updated
c (float): The new c value to use
max_iter(int): The maximum number of iterations to use
tol(float): Maximum allowable error
filename (str): Filename to store the updated model
Returns:
ModelClassifier: The updated model with better SVM parameters
"""
if isinstance(model, str):
model = ModelClassifier(model)
elif isinstance(model, list):
if (
len(model) != 2
or not isinstance(model[0], str)
or not isinstance(model[1], str)
):
raise ValeError(
"If model is a list it must only contain the train/test filenames in that order."
)
model = ModelClassifier(model[0], model[1])
start_train = 0
start_test = 0
updated_coefs = []
updated_prop_train_est = []
updated_prop_test_est = []
for ts_train, ts_test in zip(model.task_size_train, model.task_size_test):
X = np.column_stack(
[feat.value[start_train : start_train + ts_train] for feat in model.feats]
)
c0 = np.min(X, axis=0)
a = 1.0 / (np.max(X, axis=0) - c0)
lin_clf = svm.LinearSVC(C=c, max_iter=max_iter, tol=tol)
lin_clf.fit(
a * (X - c0), model.prop_train[start_train : start_train + ts_train]
)
if model.fix_intercept:
updated_coefs.append(
np.column_stack((lin_clf.coef_, np.zeros(len(lin_clf.coef_))))
)
else:
updated_coefs.append(np.column_stack((lin_clf.coef_, lin_clf.intercept_)))
for cc in range(len(lin_clf.coef_)):
for dd in range(model.n_dim):
updated_coefs[-1][cc][dd] = a[dd] * lin_clf.coef_[cc][dd]
updated_coefs[-1][cc][-1] -= c0[dd] * updated_coefs[-1][cc][dd]
updated_prop_train_est.append(lin_clf.predict(a * (X - c0)))
if ts_test > 0:
X = np.column_stack(
[
feat.test_value[start_test : start_test + ts_test]
for feat in model.feats
]
)
updated_prop_test_est.append(lin_clf.predict(a * (X - c0)))
else:
updated_prop_test_est.append([])
start_train += ts_train
start_test += ts_test
print("The updated coefficient for the decision boundaries:")
print(updated_coefs)
new_model = ModelClassifier(
model,
np.row_stack(updated_coefs),
np.concatenate(updated_prop_train_est),
np.concatenate(updated_prop_test_est),
)
if filename:
new_model.to_file(filename, True)
return new_model