Code
library(tidyverse)
library(gt)
Estadística
library(tidyverse)
library(gt)
Función | Descripción | Tipo de variable |
---|---|---|
mean() |
Calcular promedio | cuantitativa |
weighted.mean() |
Calcular promedio ponderado | Cunatitativa |
median() |
Calcular mediana | Cuantitativa |
sd() |
Calcular desviación estándar | cuantitativa |
var() |
Calcular la varianza | Cuantitativa |
range() |
Calcular el rango | Cuantitativa |
IQR() |
Calcular rango intercuartílico | Cuantitativa |
quantile() |
Calcular cuartiles, deciles y percentiles | Cuantitativa |
min() |
Valor mínimo | Cuantitativa |
max() |
Valor máximo | Cuantitativa |
<- read_csv("datos/PorcVoluUtilDiar.csv") |>
datos rename(id = Id,
embalse = Name,
porc_volumen = Value,
fecha = Date) |>
mutate(year_es = year(fecha),
mes = month(fecha),
trimestre = quarter(fecha),
semestre = semester(fecha))
|> head() datos
|>
datos group_by(year_es) |>
reframe(
promedio = mean(porc_volumen, na.rm = TRUE),
mediana = median(porc_volumen, na.rm = TRUE),
desviacion = sd(porc_volumen, na.rm = TRUE),
maximo = max(porc_volumen, na.rm = TRUE),
minimo = min(porc_volumen, na.rm = TRUE),
coef_var = (desviacion / promedio) * 100
|>
) gt()
year_es | promedio | mediana | desviacion | maximo | minimo | coef_var |
---|---|---|---|---|---|---|
2014 | 0.6256844 | 0.636290 | 0.2354465 | 1.17409 | 0.00105 | 37.63023 |
2015 | 0.5533925 | 0.555950 | 0.2313380 | 1.17654 | -0.01852 | 41.80360 |
2016 | 0.5194375 | 0.506725 | 0.2465720 | 1.13329 | 0.00051 | 47.46904 |
2017 | 0.7270821 | 0.756945 | 0.2194066 | 1.24386 | 0.00073 | 30.17632 |
2018 | 0.6906946 | 0.732250 | 0.2436080 | 1.20055 | 0.00195 | 35.27000 |
2019 | 0.6260755 | 0.634340 | 0.2244219 | 1.20446 | 0.00031 | 35.84582 |
2020 | 0.5602562 | 0.562100 | 0.2373018 | 1.15110 | 0.00094 | 42.35595 |
2021 | 0.7486693 | 0.790890 | 0.2376802 | 1.23771 | 0.01542 | 31.74703 |
2022 | 0.7434982 | 0.792760 | 0.2344250 | 1.19048 | 0.00031 | 31.53000 |
2023 | 0.6478680 | 0.660065 | 0.2194029 | 1.23769 | 0.00031 | 33.86537 |
2024 | 0.4418656 | 0.446140 | 0.2241496 | 1.00016 | 0.00430 | 50.72802 |
|>
datos filter(embalse == "CHUZA") |>
group_by(year_es) |>
reframe(
promedio = mean(porc_volumen, na.rm = TRUE),
mediana = median(porc_volumen, na.rm = TRUE),
desviacion = sd(porc_volumen, na.rm = TRUE),
maximo = max(porc_volumen, na.rm = TRUE),
minimo = min(porc_volumen, na.rm = TRUE),
coef_var = (desviacion / promedio) * 100,
|>
) gt()
year_es | promedio | mediana | desviacion | maximo | minimo | coef_var |
---|---|---|---|---|---|---|
2014 | 0.7231294 | 0.714940 | 0.20063589 | 1.01423 | 0.45242 | 27.74550 |
2015 | 0.6905362 | 0.673300 | 0.21891977 | 1.01315 | 0.36648 | 31.70287 |
2016 | 0.7395945 | 0.666800 | 0.15320788 | 0.96592 | 0.55513 | 20.71512 |
2017 | 0.7971378 | 0.787700 | 0.11104568 | 1.00643 | 0.55881 | 13.93055 |
2018 | 0.7775977 | 0.744930 | 0.17932210 | 1.01234 | 0.52894 | 23.06104 |
2019 | 0.6924883 | 0.636320 | 0.24324467 | 1.04113 | 0.42693 | 35.12618 |
2020 | 0.7216357 | 0.680715 | 0.18703915 | 1.03120 | 0.50704 | 25.91878 |
2021 | 0.7511966 | 0.735410 | 0.17560852 | 1.01481 | 0.48251 | 23.37717 |
2022 | 0.5813787 | 0.597430 | 0.14125597 | 0.81128 | 0.30657 | 24.29672 |
2023 | 0.4435066 | 0.427110 | 0.09342354 | 0.64818 | 0.30252 | 21.06474 |
2024 | 0.2335559 | 0.222605 | 0.07154652 | 0.37840 | 0.13351 | 30.63357 |
|>
datos group_by(year_es) |>
reframe(embalses = length(unique(embalse))) |>
ggplot(aes(x = year_es, y = embalses)) +
geom_col() +
scale_x_continuous(breaks = seq(2014, 2024, 1))
|>
datos group_by(year_es) |>
reframe(embalses = length(unique(embalse))) |>
ggplot(aes(x = year_es, y = embalses)) +
geom_col(color = "red", fill = "blue", alpha = 0.5) +
scale_x_continuous(breaks = seq(2014, 2024, 1)) +
labs(x = "Año",
y = "Embalses (n)",
title = "Total de embalses por año",
subtitle = "Colombia - XM") +
theme_minimal()
|>
datos filter(embalse == "CHUZA") |>
group_by(year_es) |>
reframe(promedio = mean(porc_volumen, na.rm = TRUE)) |>
ggplot(aes(x = year_es, y = promedio)) +
geom_point(color = "dodgerblue2", size = 3, shape = 19) +
geom_line(color = "dodgerblue2", linetype = 1) +
scale_x_continuous(breaks = seq(2014, 2024, 1)) +
labs(x = "Año",
y = "Volumen (%)",
title = "Volumen promedio por año",
subtitle = "Embalse Chuza") +
theme_minimal()
|>
datos group_by(year_es, embalse) |>
reframe(promedio = mean(porc_volumen, na.rm = TRUE)) |>
ggplot(aes(x = year_es, y = promedio)) +
facet_wrap(~ embalse, scales = "free_y", ncol = 4) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = seq(2014, 2024, 1)) +
labs(x = "Año",
y = "Volumen (%)",
title = "Volumen promedio por año",
subtitle = "Embalses de Colombia") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
<-
tabla_proporcion |>
datos filter(embalse == "CHUZA") |>
group_by(year_es) |>
reframe(total = sum(porc_volumen > 0.5)) |>
mutate(proporcion = total / 365.5)
|>
tabla_proporcion ggplot(aes(x = year_es, y = proporcion)) +
geom_col() +
geom_label(aes(label = round(proporcion, digits = 3)),
size = 3)
sum(datos$porc_volumen[1:5] > 0.5)
[1] 2
|>
datos filter(embalse == "CHUZA") |>
ggplot(aes(x = porc_volumen)) +
geom_histogram(color = "black")
|>
datos filter(embalse == "CHUZA") |>
ggplot(aes(x = porc_volumen)) +
geom_density(fill = "dodgerblue", alpha = 0.5)
|>
datos filter(embalse == "CHUZA") |>
ggplot(aes(x = "", y = porc_volumen)) +
geom_boxplot(fill = "dodgerblue", alpha = 0.5) +
coord_flip()
|>
datos filter(embalse == "CHUZA") |>
ggplot(aes(x = porc_volumen)) +
facet_wrap(~year_es, ncol = 1, scales = "free_y") +
geom_histogram(fill = "dodgerblue",
alpha = 0.5,
color = "black") +
geom_vline(xintercept = 0.5, color = "red", lty = 2)
|>
datos filter(embalse == "CHUZA") |>
mutate(year_es = as.factor(year_es)) |>
ggplot(aes(x = year_es, y = porc_volumen)) +
geom_boxplot(fill = "dodgerblue",
alpha = 0.5,
color = "black") +
geom_hline(yintercept = 0.5,
color = "red",
lty = 2)
|>
datos ggplot(aes(x = embalse, y = porc_volumen)) +
geom_boxplot(fill = "dodgerblue",
alpha = 0.5,
color = "black") +
geom_hline(yintercept = 0.5,
color = "red",
lty = 2) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|>
datos filter(embalse == "CHUZA") |>
group_by(year_es, semestre) |>
reframe(promedio = mean(porc_volumen, na.rm = TRUE),
desviacion = sd(porc_volumen, na.rm = TRUE)) |>
mutate(semestre = as.factor(semestre)) |>
ggplot(aes(x = year_es,
y = promedio,
ymin = promedio - desviacion,
ymax = promedio + desviacion,
color = semestre)) +
geom_point() +
geom_errorbar(width = 0.2) +
theme(legend.position = "top")
|>
datos filter(embalse == "CHUZA") |>
group_by(year_es, semestre) |>
reframe(promedio = mean(porc_volumen, na.rm = TRUE),
desviacion = sd(porc_volumen, na.rm = TRUE)) |>
mutate(semestre = as.factor(semestre)) |>
ggplot(aes(x = year_es,
y = promedio,
ymin = promedio - desviacion,
ymax = promedio + desviacion,
color = semestre)) +
geom_pointrange() +
theme(legend.position = "top")
|>
datos filter(embalse == "CHUZA") |>
group_by(year_es, semestre) |>
reframe(promedio = mean(porc_volumen, na.rm = TRUE),
desviacion = sd(porc_volumen, na.rm = TRUE)) |>
mutate(semestre = as.factor(semestre)) |>
ggplot(aes(x = year_es,
y = promedio,
ymin = promedio - desviacion,
ymax = promedio + desviacion,
color = semestre,
fill = semestre)) +
geom_ribbon(alpha = 0.5) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = seq(2014, 2024, 1)) +
labs(x = "Año", y = "Volumen (%)", color = "Semestre", fill = "Semestre") +
theme(legend.position = "bottom")
library(plotly)
<-
ejemplo |>
datos filter(embalse == "CHUZA") |>
group_by(year_es, semestre) |>
reframe(promedio = mean(porc_volumen, na.rm = TRUE),
desviacion = sd(porc_volumen, na.rm = TRUE)) |>
mutate(semestre = as.factor(semestre)) |>
ggplot(aes(x = year_es,
y = promedio,
ymin = promedio - desviacion,
ymax = promedio + desviacion,
color = semestre,
fill = semestre)) +
geom_ribbon(alpha = 0.5) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = seq(2014, 2024, 1)) +
labs(x = "Año", y = "Volumen (%)", color = "Semestre", fill = "Semestre") +
theme(legend.position = "bottom")
ggplotly(ejemplo)
|>
datos filter(embalse == "CHUZA") |>
ggplot(aes(x = fecha, y = porc_volumen)) +
geom_line()
|>
datos filter(embalse == "CHUZA") |>
ggplot(aes(x = fecha, y = porc_volumen)) +
geom_area(color = "forestgreen", fill = "forestgreen", alpha = 0.5)
|>
datos filter(embalse == "CHUZA") |>
ggplot(aes(x = fecha, y = porc_volumen)) +
geom_line() +
geom_smooth()
<-
datos_ancho |>
datos pivot_wider(names_from = embalse,
values_from = porc_volumen)
|>
datos_ancho ggplot(aes(x = PENOL, y = PLAYAS)) +
geom_point()
|>
datos_ancho ggplot(aes(x = PENOL, y = PLAYAS)) +
geom_density_2d()
|>
datos_ancho ggplot(aes(x = PENOL, y = PLAYAS)) +
geom_point() +
geom_smooth(method = "lm")
cor(x = datos_ancho$PENOL,
y = datos_ancho$PLAYAS,
use = "pairwise.complete.obs")
[1] 0.5148998
|>
datos_ancho select(`AGREGADO BOGOTA`:ALTOANCHICAYA) |>
cor(use = "pairwise.complete.obs")
AGREGADO BOGOTA AMANI BETANIA CALIMA1
AGREGADO BOGOTA 1.00000000 0.041133663 0.076019392 0.31109336
AMANI 0.04113366 1.000000000 0.008134462 0.71550669
BETANIA 0.07601939 0.008134462 1.000000000 0.09249142
CALIMA1 0.31109336 0.715506693 0.092491417 1.00000000
CHUZA -0.09378850 0.144845967 0.045480168 0.08066138
EL QUIMBO 0.34166096 0.367395913 0.169216761 0.43674644
ESMERALDA 0.43158201 0.342507403 0.013191343 0.27638579
GUAVIO 0.36135592 0.376343164 0.086475629 0.38713535
ITUANGO 0.45917072 0.259572540 0.227205611 0.51346889
MIRAFLORES 0.45124060 0.646525254 0.017929900 0.65742622
MUNA 0.23897660 0.129269439 0.286518642 0.26207255
PENOL 0.25654007 0.744149498 0.048064694 0.69571870
PLAYAS -0.07100517 0.391403774 -0.041935047 0.40535008
PORCE II -0.17462506 0.364008035 0.069178738 0.22465949
PORCE III -0.08711553 0.267574223 0.106191804 0.16687024
PRADO -0.09597060 0.527977934 0.226004372 0.61263430
PUNCHINA -0.08086221 0.262976892 0.007265291 0.14299005
RIOGRANDE2 0.30743322 0.763724631 0.139950966 0.68796374
SALVAJINA 0.04944611 0.314089139 0.157748883 0.44696069
SAN LORENZO 0.20192065 0.722126953 0.061130940 0.54413438
TOPOCORO 0.25477294 0.843541787 0.097295144 0.75185196
TRONERAS -0.14092429 0.343938410 0.118837843 0.25156984
URRA1 0.29840894 0.635264103 0.059628566 0.49049291
ALTOANCHICAYA -0.09647304 0.111282329 0.098218364 -0.00827293
CHUZA EL QUIMBO ESMERALDA GUAVIO ITUANGO
AGREGADO BOGOTA -0.09378850 0.341660956 0.431582014 0.36135592 0.45917072
AMANI 0.14484597 0.367395913 0.342507403 0.37634316 0.25957254
BETANIA 0.04548017 0.169216761 0.013191343 0.08647563 0.22720561
CALIMA1 0.08066138 0.436746444 0.276385788 0.38713535 0.51346889
CHUZA 1.00000000 0.445154982 0.659896960 0.72445043 0.56577465
EL QUIMBO 0.44515498 1.000000000 0.544181737 0.73348033 0.68757960
ESMERALDA 0.65989696 0.544181737 1.000000000 0.84363618 0.17009125
GUAVIO 0.72445043 0.733480329 0.843636177 1.00000000 0.50627325
ITUANGO 0.56577465 0.687579596 0.170091247 0.50627325 1.00000000
MIRAFLORES 0.33880572 0.543409198 0.668571334 0.68103591 0.46074503
MUNA 0.12821849 0.280799979 0.207334275 0.25573427 0.48338606
PENOL 0.27547523 0.523172997 0.513989354 0.59224586 0.68269381
PLAYAS 0.18464299 0.171000286 0.252028204 0.24297821 0.27532916
PORCE II 0.13063686 0.070927248 0.053259671 0.16269518 0.12743912
PORCE III 0.12140032 0.095455945 0.050045996 0.12873142 0.13778237
PRADO -0.16619709 0.238888032 -0.153570672 0.02280428 0.44713232
PUNCHINA 0.19965817 0.068111034 0.195927063 0.21516099 0.06390521
RIOGRANDE2 0.22580337 0.495249811 0.489057298 0.60251915 0.38858804
SALVAJINA -0.09613891 0.330835612 -0.001611343 0.13117939 0.22164256
SAN LORENZO 0.45273044 0.507806358 0.623280879 0.69908647 0.37920110
TOPOCORO 0.24799015 0.537841028 0.469877688 0.57783627 0.59701078
TRONERAS 0.34274071 0.337142905 0.224847113 0.35282037 0.20053572
URRA1 0.36700664 0.483855012 0.618105789 0.72832928 0.28150188
ALTOANCHICAYA 0.14823418 -0.004743978 0.092367476 0.10844314 0.12371424
MIRAFLORES MUNA PENOL PLAYAS PORCE II
AGREGADO BOGOTA 0.45124060 0.23897660 0.25654007 -0.07100517 -0.17462506
AMANI 0.64652525 0.12926944 0.74414950 0.39140377 0.36400803
BETANIA 0.01792990 0.28651864 0.04806469 -0.04193505 0.06917874
CALIMA1 0.65742622 0.26207255 0.69571870 0.40535008 0.22465949
CHUZA 0.33880572 0.12821849 0.27547523 0.18464299 0.13063686
EL QUIMBO 0.54340920 0.28079998 0.52317300 0.17100029 0.07092725
ESMERALDA 0.66857133 0.20733428 0.51398935 0.25202820 0.05325967
GUAVIO 0.68103591 0.25573427 0.59224586 0.24297821 0.16269518
ITUANGO 0.46074503 0.48338606 0.68269381 0.27532916 0.12743912
MIRAFLORES 1.00000000 0.18043561 0.78853017 0.40089595 0.24404649
MUNA 0.18043561 1.00000000 0.26740592 0.17386840 0.09357301
PENOL 0.78853017 0.26740592 1.00000000 0.51489977 0.39773943
PLAYAS 0.40089595 0.17386840 0.51489977 1.00000000 0.34423827
PORCE II 0.24404649 0.09357301 0.39773943 0.34423827 1.00000000
PORCE III 0.28046045 0.12083169 0.31465199 0.30557715 0.43406906
PRADO 0.25922613 0.22398684 0.38330871 0.27150639 0.13239018
PUNCHINA 0.20677836 0.15605460 0.28913664 0.29751925 0.31487575
RIOGRANDE2 0.80929197 0.32948512 0.84774045 0.40894148 0.39706313
SALVAJINA 0.11696417 0.29046675 0.25767252 0.06898259 -0.01169367
SAN LORENZO 0.72727031 0.21915286 0.71039654 0.37103617 0.38444930
TOPOCORO 0.71039530 0.35556809 0.83213542 0.45267555 0.37751046
TRONERAS 0.29397245 0.26497288 0.44639117 0.42705192 0.52981811
URRA1 0.74401843 0.08343807 0.67682057 0.29933848 0.33263293
ALTOANCHICAYA 0.05227836 0.18192698 0.07920718 0.14231576 0.12040414
PORCE III PRADO PUNCHINA RIOGRANDE2 SALVAJINA
AGREGADO BOGOTA -0.08711553 -0.09597060 -0.080862209 0.3074332 0.049446114
AMANI 0.26757422 0.52797793 0.262976892 0.7637246 0.314089139
BETANIA 0.10619180 0.22600437 0.007265291 0.1399510 0.157748883
CALIMA1 0.16687024 0.61263430 0.142990050 0.6879637 0.446960692
CHUZA 0.12140032 -0.16619709 0.199658167 0.2258034 -0.096138906
EL QUIMBO 0.09545595 0.23888803 0.068111034 0.4952498 0.330835612
ESMERALDA 0.05004600 -0.15357067 0.195927063 0.4890573 -0.001611343
GUAVIO 0.12873142 0.02280428 0.215160991 0.6025192 0.131179393
ITUANGO 0.13778237 0.44713232 0.063905210 0.3885880 0.221642559
MIRAFLORES 0.28046045 0.25922613 0.206778364 0.8092920 0.116964174
MUNA 0.12083169 0.22398684 0.156054596 0.3294851 0.290466751
PENOL 0.31465199 0.38330871 0.289136637 0.8477405 0.257672519
PLAYAS 0.30557715 0.27150639 0.297519255 0.4089415 0.068982592
PORCE II 0.43406906 0.13239018 0.314875748 0.3970631 -0.011693673
PORCE III 1.00000000 0.15511457 0.251104413 0.3504755 -0.026756487
PRADO 0.15511457 1.00000000 0.016050474 0.4525739 0.552719988
PUNCHINA 0.25110441 0.01605047 1.000000000 0.2299305 0.005916317
RIOGRANDE2 0.35047551 0.45257392 0.229930549 1.0000000 0.280157397
SALVAJINA -0.02675649 0.55271999 0.005916317 0.2801574 1.000000000
SAN LORENZO 0.36949596 0.23100262 0.357200844 0.7974894 0.219918208
TOPOCORO 0.30325670 0.49908944 0.313488113 0.8549322 0.504323150
TRONERAS 0.45546836 0.14580332 0.371664810 0.4524165 0.078543981
URRA1 0.28283160 0.18889093 0.263439866 0.7276359 0.100604312
ALTOANCHICAYA 0.11739917 0.11675999 0.167198178 0.1168749 0.049517452
SAN LORENZO TOPOCORO TRONERAS URRA1 ALTOANCHICAYA
AGREGADO BOGOTA 0.20192065 0.25477294 -0.14092429 0.29840894 -0.096473043
AMANI 0.72212695 0.84354179 0.34393841 0.63526410 0.111282329
BETANIA 0.06113094 0.09729514 0.11883784 0.05962857 0.098218364
CALIMA1 0.54413438 0.75185196 0.25156984 0.49049291 -0.008272930
CHUZA 0.45273044 0.24799015 0.34274071 0.36700664 0.148234180
EL QUIMBO 0.50780636 0.53784103 0.33714291 0.48385501 -0.004743978
ESMERALDA 0.62328088 0.46987769 0.22484711 0.61810579 0.092367476
GUAVIO 0.69908647 0.57783627 0.35282037 0.72832928 0.108443136
ITUANGO 0.37920110 0.59701078 0.20053572 0.28150188 0.123714242
MIRAFLORES 0.72727031 0.71039530 0.29397245 0.74401843 0.052278356
MUNA 0.21915286 0.35556809 0.26497288 0.08343807 0.181926981
PENOL 0.71039654 0.83213542 0.44639117 0.67682057 0.079207180
PLAYAS 0.37103617 0.45267555 0.42705192 0.29933848 0.142315762
PORCE II 0.38444930 0.37751046 0.52981811 0.33263293 0.120404143
PORCE III 0.36949596 0.30325670 0.45546836 0.28283160 0.117399171
PRADO 0.23100262 0.49908944 0.14580332 0.18889093 0.116759994
PUNCHINA 0.35720084 0.31348811 0.37166481 0.26343987 0.167198178
RIOGRANDE2 0.79748940 0.85493221 0.45241645 0.72763589 0.116874921
SALVAJINA 0.21991821 0.50432315 0.07854398 0.10060431 0.049517452
SAN LORENZO 1.00000000 0.79710656 0.48266125 0.77545330 0.127249131
TOPOCORO 0.79710656 1.00000000 0.44415180 0.73611776 0.108537181
TRONERAS 0.48266125 0.44415180 1.00000000 0.34304283 0.183150446
URRA1 0.77545330 0.73611776 0.34304283 1.00000000 0.084690914
ALTOANCHICAYA 0.12724913 0.10853718 0.18315045 0.08469091 1.000000000
library(corrplot)
|>
datos_ancho select(`AGREGADO BOGOTA`:ALTOANCHICAYA) |>
cor(use = "pairwise.complete.obs") |>
corrplot()
|>
datos_ancho select(`AGREGADO BOGOTA`:ALTOANCHICAYA) |>
cor(use = "pairwise.complete.obs") |>
corrplot(
method = "pie",
type = "lower",
diag = FALSE,
tl.col = "black",
tl.srt = 35
)
library(corrr)
|>
datos_ancho select(`AGREGADO BOGOTA`:ALTOANCHICAYA) |>
correlate() |>
network_plot()