In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pickle
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor # 修正导入方式
# 加载数据集
df = pd.read_csv('fitness analysis.csv')
# 显示前五行数据
print(df.head())
# 去除所有字符串字段的前后空格
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
# 检查和清理列名
df.columns = df.columns.str.strip()
# 选择相关特征进行建模
X = df[['Your gender', 'How important is exercise to you ?', 'How healthy do you consider yourself?']]
X = pd.get_dummies(X) # 将分类变量转为数值变量
# 将年龄段转为数值变量
y = df['Your age'].apply(lambda x: int(x.split(' ')[0])) # 假设年龄段为整数
# 将数据集划分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 创建并训练随机森林回归模型
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
# 保存训练好的模型
with open('2.2.3_model.pkl', 'wb') as model_file:
pickle.dump(rf_model, model_file)
# 进行结果预测
y_pred = rf_model.predict(X_test)
results_df = pd.DataFrame(y_pred, columns=['预测结果'])
results_df.to_csv('2.2.3_results.txt', index=False)
# 使用测试工具对模型进行测试,并记录测试结果
train_score = rf_model.score(X_train, y_train)
test_score = rf_model.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
with open('2.2.3_report.txt', 'w') as report_file:
report_file.write(f'训练集得分: {train_score:.4f}\n')
report_file.write(f'测试集得分: {test_score:.4f}\n')
report_file.write(f'均方误差(MSE): {mse:.4f}\n')
report_file.write(f'决定系数(R^2): {r2:.4f}\n')
# 运用工具分析算法中错误案例产生的原因并进行纠正
# 这里以XGBoost为例进行错误案例分析
xgb_model = XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)
results_df_xgb = pd.DataFrame(y_pred_xgb, columns=['预测结果'])
results_df_xgb.to_csv('2.2.3_results_xgb.txt', index=False)
with open('2.2.3_report_xgb.txt', 'w') as xgb_report_file:
xgb_report_file.write(f'XGBoost训练集得分: {xgb_model.score(X_train, y_train)}\n')
xgb_report_file.write(f'XGBoost测试集得分: {xgb_model.score(X_test, y_test)}\n')
xgb_report_file.write(f'XGBoost均方误差(MSE): {mean_squared_error(y_test, y_pred_xgb)}\n')
xgb_report_file.write(f'XGBoost决定系数(R^2): {r2_score(y_test, y_pred_xgb)}\n')
Timestamp Your name Your gender Your age \ 0 2019/07/03 11:48:07 PM GMT+5:30 Parkavi Female 19 to 25 1 2019/07/03 11:51:22 PM GMT+5:30 Nithilaa Female 19 to 25 2 2019/07/03 11:56:28 PM GMT+5:30 Karunya v Female 15 to 18 3 2019/07/04 5:43:35 AM GMT+5:30 Anusha Female 15 to 18 4 2019/07/04 5:44:29 AM GMT+5:30 Nikkitha Female 19 to 25 How important is exercise to you ? \ 0 2 1 4 2 3 3 4 4 3 How do you describe your current level of fitness ? \ 0 Good 1 Very good 2 Good 3 Good 4 Unfit How often do you exercise? \ 0 Never 1 Never 2 1 to 2 times a week 3 3 to 4 times a week 4 Never What barriers, if any, prevent you from exercising more regularly? (Please select all that apply) \ 0 I don't have enough time;I can't stay motivated 1 I don't have enough time;I'll become too tired 2 I can't stay motivated 3 I don't have enough time 4 I can't stay motivated What form(s) of exercise do you currently participate in ? (Please select all that apply) \ 0 I don't really exercise 1 Walking or jogging;Swimming 2 Walking or jogging 3 Walking or jogging;Gym;Lifting weights 4 I don't really exercise Do you exercise ___________ ? \ 0 I don't really exercise 1 With a group 2 Alone 3 Alone 4 I don't really exercise What time if the day do you prefer to exercise? \ 0 Early morning 1 Early morning 2 Early morning 3 Evening 4 Evening How long do you spend exercising per day ? \ 0 I don't really exercise 1 I don't really exercise 2 30 minutes 3 1 hour 4 I don't really exercise Would you say you eat a healthy balanced diet ? \ 0 Not always 1 Not always 2 Not always 3 Yes 4 Yes What prevents you from eating a healthy balanced diet, If any? (Please select all that apply) \ 0 Ease of access to fast food;Temptation and cra... 1 Ease of access to fast food;Temptation and cra... 2 Temptation and cravings 3 Temptation and cravings 4 Ease of access to fast food;Temptation and cra... How healthy do you consider yourself? \ 0 3 1 4 2 4 3 4 4 4 Have you ever recommended your friends to follow a fitness routine? \ 0 Yes 1 Yes 2 Yes 3 Yes 4 Yes Have you ever purchased a fitness equipment? \ 0 No 1 No 2 Yes 3 No 4 No What motivates you to exercise? (Please select all that applies ) 0 I'm sorry ... I'm not really interested in exe... 1 I want to be fit;I want to be flexible;I want ... 2 I want to be fit 3 I want to be fit;I want to lose weight 4 I want to be fit
C:\Users\hello\AppData\Local\Temp\ipykernel_177436\400730862.py:15: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead. df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
In [ ]: