使用RF做了一个快速训练模板,方便日后使用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

#载入数据
dataset = pd.read_csv(r'totalDataSet.csv',)
accountDataSet = pd.read_csv(r'accountFeature.csv',)
dataset = pd.merge(dataset,accountDataSet,on='addr')
X=dataset.iloc[:,1:92]
Y=dataset.iloc[:,92]

#划分数据集
XX_train,XX_test,yy_train,yy_test=train_test_split(X,Y,test_size=0.33,random_state=7)

#训练模型
clf = RandomForestClassifier(n_estimators=400,criterion='entropy',bootstrap=False, n_jobs=-1).fit(XX_train,yy_train)

#测试数据
preds = clf.predict(XX_test)

#返回测试结果
prec,rec,f1,num = precision_recall_fscore_support(yy_test,preds, average=None)
print("Random Forest Classifier")
accuracy=accuracy_score(yy_test,preds)
print('Accuracy: %.3f%%' % (accuracy*100))
print("Precision:%.3f%% \nRecall:%.3f%% \nF1 Score:%.3f%%"%(prec[1]*100,rec[1]*100,f1[1]*100))
micro_f1 = f1_score(yy_test,preds,average='micro')
print("Micro-Average F1 Score:%.3f%%"%(micro_f1*100))