Define your own sklearn classifier and evaluate it on the toy dataset. An example is shown below:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0)
evaluate(clf)
Try to achieve a test accuracy of 0.85 or better! Get some inspiration for possible classifiers here.
Enter your code below, then press Shift+Enter:
from statistics import mean
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_validate
import tabulate
X, y = make_classification(n_samples=1000, n_informative=10, random_state=0)
def evaluate(clf):
cv_result = cross_validate(clf, X, y, scoring='accuracy', cv=5)
time_fit = sum(cv_result['fit_time'])
time_score = sum(cv_result['score_time'])
print(f"Mean test accuracy: {mean(cv_result['test_score']):.3f}")
print(f"Total training time: {time_fit:.1f} seconds")
print(f"Total time for scoring: {time_score:.1f} seconds")
show_result = {'split': [1, 2, 3, 4, 5], 'accuracy': cv_result['test_score']}
print("Accuracy for each cross validation split:")
return tabulate.tabulate(show_result, tablefmt='html', headers='keys', floatfmt='.3')