import openpyxl import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from sklearn import linear_model, tree import pydotplus inputs_train = [] output_train = [] inputs_test = [] output_test = [] def train_data(): global inputs_train, output_train wb = openpyxl.load_workbook('florescence/florescence_train_data.xlsx') ws_all = wb.sheetnames ws = wb.worksheets[0] single = [] nrows = ws.max_row ncols = ws.max_column for i in range(nrows - 1): single.append(ws.cell(row=i + 2, column=5).value) output_train.append(single) single = [] y = np.array(output_train) y = y.ravel() for row in ws.iter_rows(min_row=2, min_col=1, max_col=ncols - 1, max_row=nrows, values_only=True): inputs_train.append(list(row)) x = inputs_train print(x) print(y) return x, y def test_data(): global inputs_test, output_test wb = openpyxl.load_workbook('florescence/florescence_test_data.xlsx') ws = wb.worksheets[0] single = [] nrows = ws.max_row ncols = ws.max_column for i in range(nrows - 1): single.append(ws.cell(row=i + 2, column=5).value) output_test.append(single) single = [] y = np.array(output_test) y = y.ravel() for row in ws.iter_rows(min_row=2, min_col=1, max_col=ncols - 1, max_row=nrows, values_only=True): inputs_test.append(list(row)) x = inputs_test print(x) print(y) return x, y def establish_model(): x_train, y_train = train_data() rf = RandomForestRegressor(n_estimators=500, criterion='mse', max_features=4, oob_score=True, n_jobs=-1, max_depth=None) # X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3) # print(X_train) # print(y_train) rf.fit(x_train, y_train) print(rf) x_test, y_test = test_data() predicts = rf.predict(x_test) print(predicts) predicts_list = predicts.tolist() with open('florescence/florescence_output_predicts.txt', 'w') as file: for data in predicts_list: file.write(str(data)) file.write('\n') print('test R^2:', rf.score(x_test, y_test)) tree_of_rf = rf.estimators_[5] names_list = ['Temperature', 'Humidity', 'Illumination', 'RLAI'] print(tree_of_rf) dot_data = tree.export_graphviz(tree_of_rf, out_file=None, feature_names=names_list, filled=True, rounded=True, impurity=True, node_ids=False, special_characters=False) graph = pydotplus.graph_from_dot_data(dot_data) graph.write_pdf("florescence/florescence_rule_5.pdf") # return X_train,X_test,y_train,y_test,predicts # def plot(): # X_train,X_test,y_train,y_test,predicts=establish_model() # train_samples=[] # single_sample=[] # test_samples=[] # # print(len(X_train)) # # print(y_train) # for i in range(len(X_train)): # single_sample.append(i+1) # train_samples.append(single_sample) # single_sample=[] # for i in range(len(X_test)): # single_sample.append(i+1) # test_samples.append(single_sample) # single_sample=[] # lw=2 # plt.plot(test_samples, y_test, color='b', label='test scatter') # # plt.plot(test_samples, y_test, color='y', lw=lw, label='test data ') # plt.plot(test_samples,predicts, color='c', lw=lw, label='predict data') # plt.xlabel('data') # plt.ylabel('target') # plt.title('RFR') # plt.legend() # plt.show() # y_test=np.ndarray.tolist(y_test) # a=[] # b=[] # for data in y_test: # a.append(data) # b.append(a) # a=[] # # c=[] # d=[] # predicts=np.ndarray.tolist(predicts) # for data in predicts: # c.append(data) # d.append(c) # c=[] # # print(b) # # print(d) # model=linear_model.LinearRegression() # model.fit(b,d) # print('pre and fact R^2:',model.score(b,d)) if __name__ == '__main__': establish_model()