In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import preprocessing
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import sklearn.metrics as met


In [15]:
def classification(message, x_train, x_test, y_train, y_test):

    print(message, '\n')

    hidden_layer_sizes = []
    for i in range(1,4):
        for j in range(5,10):
            hidden_layer_sizes.append((j,)*i)

    # Parametri za unakrsnu validacuju
    params = [{'solver': ['sgd'],
               'learning_rate': ['constant', 'adaptive'],
               'activation': ['identity', 'logistic', 'tanh', 'relu'],
               'hidden_layer_sizes': hidden_layer_sizes,
               'max_iter': [500]

               }]

    clf = GridSearchCV(MLPClassifier(), params, cv=5)

    clf.fit(x_train, y_train)

    print("Najbolji parametri:")
    print(clf.best_params_)
    print()

    print("Izvestaj za trening skup:")
    y_pred =clf.predict(x_train)

    print('Preciznost', met.accuracy_score(y_train, y_pred))
    print(met.classification_report(y_train, y_pred))
    print()

    cnf_matrix = met.confusion_matrix(y_train, y_pred)
    print("Matrica konfuzije", cnf_matrix, sep="\n")
    print("\n")

    print("Izvestaj za test skup:")
    y_pred =clf.predict(x_test)
    print('Preciznost', met.accuracy_score(y_test, y_pred))
    print(met.classification_report(y_test, y_pred))
    print()

    cnf_matrix = met.confusion_matrix(y_test, y_pred)
    print("Matrica konfuzije", cnf_matrix, sep="\n")
    print("\n")


In [16]:

df = pd.read_csv("C:/Users/student/Desktop/ipIndustija4/ipVezbe92021/vezbe9/car.csv")


In [21]:
#prikaz imena kolona + 5 prvih instanci
#print('Prvih 5 instanci', df.head(), sep='\n')
#print('\n\n')


#print('Opis podataka', df.describe(), sep='\n')
#print('\n\n')

#print('Klase:', print(df["class"].value_counts()), sep='\n')
#print('\n\n')


In [6]:
features=df.columns[1:]
x=df[features]
y=df["class"]


num_features = x.shape[1]


In [8]:
# podela na trening i test skup
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, stratify=y)


In [9]:
#standardizacija podataka
scaler = preprocessing.StandardScaler().fit(x_train)
x_train =pd.DataFrame(scaler.transform(x_train))
x_train.columns = features

x_test =pd.DataFrame(scaler.transform(x_test))
x_test.columns = features


In [13]:
pca=PCA(n_components=2)
pca.fit(x_train)
pca_columns = ['pca%d'%i for i in range(1, pca.n_components_+1)]

print('explained_variance_ratio_  ')

for i, evr in zip(range(1, num_features+1), pca.explained_variance_ratio_):
    print("pca%d: %.10f"%(i,evr))
print()

x_pca_train = pd.DataFrame(pca.transform(x_train), columns=pca_columns)

x_pca_test = pd.DataFrame(pca.transform(x_test), columns=pca_columns)


explained_variance_ratio_  
pca1: 0.8119190563
pca2: 0.1445945170



In [19]:
classification('Original', x_train, x_test, y_train, y_test)


In [20]:
classification('PCA', x_pca_train, x_pca_test, y_train, y_test)
