Code
library(tidyverse)
library(rsample)
library(yardstick)
Ejemplo de predicción
library(tidyverse)
library(rsample)
library(yardstick)
<- read_csv("../datos/datos-kaggle/train.csv")
df_train |> head() df_train
<- read_csv("../datos/datos-kaggle/test.csv")
df_test |> head() df_test
<- 0.70
proporcion_train
set.seed(2025)
<-
particion initial_split(data = df_train, prop = proporcion_train)
<- training(particion)
train <- testing(particion) test
<- lm(total_updrs ~ shimmer, data = train)
modelo1 summary(modelo1)
Call:
lm(formula = total_updrs ~ shimmer, data = train)
Residuals:
Min 1Q Median 3Q Max
-23.111 -7.276 -1.587 7.494 26.330
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.8365 0.3112 89.435 < 2e-16 ***
shimmer 33.2255 7.3859 4.499 7.08e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 10.72 on 3286 degrees of freedom
Multiple R-squared: 0.006121, Adjusted R-squared: 0.005818
F-statistic: 20.24 on 1 and 3286 DF, p-value: 7.08e-06
<- lm(total_updrs ~ nhr, data = train)
modelo2 summary(modelo2)
Call:
lm(formula = total_updrs ~ nhr, data = train)
Residuals:
Min 1Q Median 3Q Max
-22.253 -7.508 -1.579 7.308 26.252
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 28.6488 0.2128 134.631 < 2e-16 ***
nhr 9.9489 3.2714 3.041 0.00238 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 10.74 on 3286 degrees of freedom
Multiple R-squared: 0.002807, Adjusted R-squared: 0.002503
F-statistic: 9.249 on 1 and 3286 DF, p-value: 0.002375
# Métricas en train
<-
pred_modelo1_train predict(object = modelo1, newdata = train)
<-
pred_modelo2_train predict(object = modelo2, newdata = train)
# Métricas en test
<-
pred_modelo1_test predict(object = modelo1, newdata = test)
<-
pred_modelo2_test predict(object = modelo2, newdata = test)
# RMSE Train
<-
rmse_modelo1_train rmse_vec(truth = train$total_updrs,
estimate = pred_modelo1_train)
<-
rmse_modelo2_train rmse_vec(truth = train$total_updrs,
estimate = pred_modelo2_train)
# RMSE Test
<-
rmse_modelo1_test rmse_vec(truth = test$total_updrs,
estimate = pred_modelo1_test)
<-
rmse_modelo2_test rmse_vec(truth = test$total_updrs,
estimate = pred_modelo2_test)
rmse_modelo1_train
[1] 10.72002
rmse_modelo1_test
[1] 10.71263
rmse_modelo2_train
[1] 10.73788
rmse_modelo2_test
[1] 10.74312
<-
modelo_final2 lm(total_updrs ~ nhr, data = df_train)
<-
predicciones_kaggle predict(object = modelo_final2, newdata = df_test)
<-
submit_kaggle |>
df_test select(id) |>
mutate(total_updrs = predicciones_kaggle)
|> head() submit_kaggle
write_csv(submit_kaggle, "../datos/datos-kaggle/ejemplo_submission1.csv")