In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pickle
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor  # 修正导入方式

# 加载数据集
df = pd.read_csv('fitness analysis.csv')

# 显示前五行数据
print(df.head())

# 去除所有字符串字段的前后空格
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# 检查和清理列名
df.columns = df.columns.str.strip()

# 选择相关特征进行建模
X = df[['Your gender', 'How important is exercise to you ?', 'How healthy do you consider yourself?']]
X = pd.get_dummies(X)  # 将分类变量转为数值变量

# 将年龄段转为数值变量
y = df['Your age'].apply(lambda x: int(x.split(' ')[0]))  # 假设年龄段为整数

# 将数据集划分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建并训练随机森林回归模型
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# 保存训练好的模型
with open('2.2.3_model.pkl', 'wb') as model_file:
    pickle.dump(rf_model, model_file)

# 进行结果预测
y_pred = rf_model.predict(X_test)
results_df = pd.DataFrame(y_pred, columns=['预测结果'])
results_df.to_csv('2.2.3_results.txt', index=False)

# 使用测试工具对模型进行测试,并记录测试结果
train_score = rf_model.score(X_train, y_train)
test_score = rf_model.score(X_test, y_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
with open('2.2.3_report.txt', 'w') as report_file:
    report_file.write(f'训练集得分: {train_score:.4f}\n')
    report_file.write(f'测试集得分: {test_score:.4f}\n')
    report_file.write(f'均方误差(MSE): {mse:.4f}\n')
    report_file.write(f'决定系数(R^2): {r2:.4f}\n')

# 运用工具分析算法中错误案例产生的原因并进行纠正
# 这里以XGBoost为例进行错误案例分析
xgb_model = XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

results_df_xgb = pd.DataFrame(y_pred_xgb, columns=['预测结果'])
results_df_xgb.to_csv('2.2.3_results_xgb.txt', index=False)

with open('2.2.3_report_xgb.txt', 'w') as xgb_report_file:
    xgb_report_file.write(f'XGBoost训练集得分: {xgb_model.score(X_train, y_train)}\n')
    xgb_report_file.write(f'XGBoost测试集得分: {xgb_model.score(X_test, y_test)}\n')
    xgb_report_file.write(f'XGBoost均方误差(MSE): {mean_squared_error(y_test, y_pred_xgb)}\n')
    xgb_report_file.write(f'XGBoost决定系数(R^2): {r2_score(y_test, y_pred_xgb)}\n')
                         Timestamp Your name  Your gender  Your age   \
0  2019/07/03 11:48:07 PM GMT+5:30    Parkavi       Female  19 to 25   
1  2019/07/03 11:51:22 PM GMT+5:30   Nithilaa       Female  19 to 25   
2  2019/07/03 11:56:28 PM GMT+5:30  Karunya v       Female  15 to 18   
3   2019/07/04 5:43:35 AM GMT+5:30    Anusha        Female  15 to 18   
4   2019/07/04 5:44:29 AM GMT+5:30   Nikkitha       Female  19 to 25   

   How important is exercise to you ?  \
0                                   2   
1                                   4   
2                                   3   
3                                   4   
4                                   3   

  How do you describe your current level of fitness ?  \
0                                               Good    
1                                          Very good    
2                                               Good    
3                                               Good    
4                                              Unfit    

  How often do you exercise?  \
0                      Never   
1                      Never   
2        1 to 2 times a week   
3        3 to 4 times a week   
4                      Never   

  What barriers, if any, prevent you from exercising more regularly?           (Please select all that apply)  \
0    I don't have enough time;I can't stay motivated                                                            
1     I don't have enough time;I'll become too tired                                                            
2                             I can't stay motivated                                                            
3                           I don't have enough time                                                            
4                             I can't stay motivated                                                            

  What form(s) of exercise do you currently participate in ?                        (Please select all that apply)  \
0                            I don't really exercise                                                                 
1                        Walking or jogging;Swimming                                                                 
2                                 Walking or jogging                                                                 
3             Walking or jogging;Gym;Lifting weights                                                                 
4                            I don't really exercise                                                                 

  Do you exercise ___________ ?  \
0       I don't really exercise   
1                  With a group   
2                         Alone   
3                         Alone   
4       I don't really exercise   

  What time if the day do you prefer to exercise?  \
0                                   Early morning   
1                                   Early morning   
2                                   Early morning   
3                                         Evening   
4                                         Evening   

  How long do you spend exercising per day ?  \
0                    I don't really exercise   
1                    I don't really exercise   
2                                 30 minutes   
3                                     1 hour   
4                    I don't really exercise   

  Would you say you eat a healthy balanced diet ?  \
0                                      Not always   
1                                      Not always   
2                                      Not always   
3                                             Yes   
4                                             Yes   

  What prevents you from eating a healthy balanced diet, If any?                         (Please select all that apply)  \
0  Ease of access to fast food;Temptation and cra...                                                                      
1  Ease of access to fast food;Temptation and cra...                                                                      
2                            Temptation and cravings                                                                      
3                            Temptation and cravings                                                                      
4  Ease of access to fast food;Temptation and cra...                                                                      

   How healthy do you consider yourself?  \
0                                      3   
1                                      4   
2                                      4   
3                                      4   
4                                      4   

  Have you ever recommended your friends to follow a fitness routine?  \
0                                                Yes                    
1                                                Yes                    
2                                                Yes                    
3                                                Yes                    
4                                                Yes                    

  Have you ever purchased a fitness equipment?  \
0                                           No   
1                                           No   
2                                          Yes   
3                                           No   
4                                           No   

  What motivates you to exercise?         (Please select all that applies )  
0  I'm sorry ... I'm not really interested in exe...                         
1  I want to be fit;I want to be flexible;I want ...                         
2                                   I want to be fit                         
3             I want to be fit;I want to lose weight                         
4                                   I want to be fit                         
C:\Users\hello\AppData\Local\Temp\ipykernel_177436\400730862.py:15: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.
  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
In [ ]: