결정 트리

Untitled

from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

dt_clf = DecisionTreeClassifier(random_state=156)

iris_data = load_iris()
X_train,X_test,y_train, y_test = train_test_split(iris_data.data, iris_data.target,test_size=0.2, random_state=11)
dt_clf.fit(X_train, y_train)

from sklearn.tree import export_graphviz

export_graphviz(dt_clf,out_file="tree.dot",class_names=iris_data.target_names,feature_names=iris_data.feature_names,impurity=True,filled=True)
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

dt_clf=DecisionTreeClassifier(random_state=156)
dt_clf.fit(X_train,y_train)
pred=dt_clf.predict(X_test)
accuracy=accuracy_score(y_test,pred)
print('결정 트리 예측 정확도:  {0:.4f}'.format(accuracy))

print('DeicistionTreeClassifier 기본 하이퍼 파라미터: \\n',dt_clf.get_params())
max_depths = [6,8,10,12,16,20,24]
#max_depth값을 변화 시키면서 그때마다 학습과 테스트 세트에서의 예측 성능 측정
for depth in max_depths:
    dt_clf = DecisionTreeClassifier(max_depth=depth, random_state=156)
    dt_clf.fit(X_train,y_train)
    pred=dt_clf.predict(X_test)
    accuracy=accuracy_score(y_test,pred)
    print('max_depth={0} 정확도: {1:.4f}'.format(depth,accuracy))

앙상블 학습

Untitled

import pandas as pd 
from sklearn.ensemble import VotingClassifier 
from sklearn.linear_model import LogisticRegression 
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.datasets import load_breast_cancer 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score 
cancer = load_breast_cancer() 
data_df = pd.DataFrame(cancer.data, columns=cancer.feature_names) 
data_df .head (3)

#개별 모델은 로지스틱 회귀와 KNN임. 
lr_clf = LogisticRegression() 
knn_clf = KNeighborsClassifier(n_neighbors=8) 
# 개별 모델을 소프트 보팅 기반의 앙상블 모델로 구현한 분류기
vo_clf = VotingClassifier( estimators=[( 'LR' , lr_clf), (' KNN' , knn_clf)], voting='soft') 
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target,test_size=0.2,random_state=156) 
# VotingClassifier 학습/예측/평가 
vo_clf.fit(X_train , y_train) 
pred = vo_clf.predict(X_test)  
print( 'Voting {0:.4f}' .format(accuracy_score(y_test , pred))) 

# 개별 모델의 학습/예측/평가
classifiers = [lr_clf , knn_clf]
for classifier in classifiers: 
    classifier.fit(X_train , y_train) 
    pred = classifier.predict(X_test) 
    class_name= classifier. __class__.__name__
    print( '{0} 정확도: {1:.4f}' .format(class_name, accuracy_score(y_test,pred)))

랜덤 포레스트