Code
library(tidyverse)
library(janitor)
library(readxl)
library(ggsci) # colores preestablecidos
library(plotly) # Visualización interactiva
Estadística
library(tidyverse)
library(janitor)
library(readxl)
library(ggsci) # colores preestablecidos
library(plotly) # Visualización interactiva
<- read_csv("datos-ejemplos/PorcVoluUtilDiar.csv")
df_embalses |> head() df_embalses
<- read_excel("datos-ejemplos/Base agrícola 2019 - 2023.xlsx", skip = 6) |>
df_evas clean_names()
|> head() df_evas
<- read_csv("datos-ejemplos/Colocaciones_de_Cr_dito_Sector_Agropecuario_-_2021-_2024_20240910.csv") |>
df_creditos clean_names()
|> head() df_creditos
|>
df_creditos count(ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
geom_col()
|>
df_creditos count(ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
labs(x = "Año",
y = "Total (n)",
title = "Créditos agropecuarios en Colombia",
subtitle = "Años 2021 a 2024",
caption = "*2024: información incompleta") +
theme_bw()
|>
df_creditos count(ano, name = "total") |>
ggplot(aes(x = ano, y = total, label = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_text() +
labs(x = "Año",
y = "Total (n)",
title = "Créditos agropecuarios en Colombia",
subtitle = "Años 2021 a 2024",
caption = "*2024: información incompleta") +
theme_bw()
|>
df_creditos count(ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_label(aes(label = total), color = "white", fill = "#3B1E54") +
labs(x = "Año",
y = "Total (n)",
title = "Créditos agropecuarios en Colombia",
subtitle = "Años 2021 a 2024",
caption = "*2024: información incompleta") +
theme_bw()
|>
df_evas count(departamento, name = "total") |>
ggplot(aes(x = departamento, y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_label(aes(label = total), color = "white", fill = "#3B1E54") +
labs(x = "Año",
y = "Total (n)",
title = "Evaluaciones agropecuarias por departamento",
subtitle = "Colombia 2019 a 2023") +
theme_bw()
|>
df_evas count(departamento, name = "total") |>
ggplot(aes(x = departamento, y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_label(aes(label = total), color = "white", fill = "#3B1E54", size = 1.7) +
labs(x = "Año",
y = "Total (n)",
title = "Evaluaciones agropecuarias por departamento",
subtitle = "Colombia 2019 a 2023") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|>
df_evas count(departamento, name = "total") |>
ggplot(aes(x = reorder(departamento, total), y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_label(aes(label = total), color = "white", fill = "#3B1E54", size = 1.7) +
labs(x = "Año",
y = "Total (n)",
title = "Evaluaciones agropecuarias por departamento",
subtitle = "Colombia 2019 a 2023") +
theme_bw() +
coord_flip()
|>
df_embalses group_by(Name) |>
reframe(mediana = median(Value)) |>
ggplot(aes(x = reorder(Name, mediana), y = mediana)) +
geom_point(color = "red") +
coord_flip() +
labs(x = "")
<- df_embalses$Value |> median()
mediana_general_vol
|>
df_embalses group_by(Name) |>
reframe(mediana = median(Value)) |>
ggplot(aes(x = reorder(Name, mediana), y = mediana)) +
geom_point(color = "red") +
labs(x = "",
caption = "Línea azul: mediana general") +
geom_hline(yintercept = mediana_general_vol,
color = "blue",
linetype = 2) +
coord_flip()
|>
df_creditos count(departamento_inversion, ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
facet_wrap(~ departamento_inversion, ncol = 3, scales = "free_y") +
geom_col()
|>
df_creditos count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5)
|>
df_creditos count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge")
|>
df_creditos count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge") +
scale_color_manual(values = c("orangered", "magenta2", "gray40")) +
scale_fill_manual(values = c("orangered", "magenta2", "gray40"))
|>
df_creditos count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge") +
scale_color_npg() +
scale_fill_npg() +
theme(legend.position = "top") + #left top right bottom
labs(x = "Año",
y = "Total (n)",
title = "Créditos agropecuarios en Colombia por producto y género",
subtitle = "Años 2021 a 2024",
caption = "*2024: información incompleta",
color = "Género",
fill = "Género")
|>
df_creditos group_by(ano, genero, tipo_productor) |>
reframe(total = sum(valor_inversion, na.rm = TRUE)) |>
ggplot(aes(x = ano, y = total, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge")
|>
df_creditos group_by(ano, genero, tipo_productor) |>
reframe(total = sum(valor_inversion, na.rm = TRUE),
creditos = n()) |>
mutate(promedio = total / creditos) |>
ggplot(aes(x = ano, y = promedio, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge")
ggplotly(
|>
df_creditos count(ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
geom_col()
)
|>
df_creditos count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "fill") +
coord_flip()
|>
df_creditos count(ano, genero, tipo_productor, departamento_inversion) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ departamento_inversion ~ tipo_productor, ncol = 4) +
geom_col(position = "fill", alpha = 0.5) +
theme(legend.position = "top") +
coord_flip()
|>
df_creditos count(ano, genero, tipo_productor, departamento_inversion) |>
ggplot(aes(x = ano, y = n, color = tipo_productor, fill = tipo_productor)) +
facet_wrap(~ departamento_inversion ~ genero, ncol = 3) +
geom_col(position = "fill", alpha = 0.5) +
theme(legend.position = "top") +
coord_flip()
|>
df_creditos count(ano, genero, tipo_productor, departamento_inversion) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_grid(~ departamento_inversion ~ tipo_productor) +
geom_col(position = "fill", alpha = 0.5) +
theme(legend.position = "top") +
coord_flip()
# Filtramos el cultivo de interés
<-
df_eva_cultivo |>
df_evas filter(cultivo == "Café")
# Calculamos primero la media histórica del rendimiento
<-
media_rto_cultivo $rendimiento_t_ha |> mean()
df_eva_cultivo
# Finalmente extraemos la variable "resultado" que indica si
# el rendimiento de esa evaluación agropecuaria es superior o inferior al
# promedio histórico
<-
df_resumen_rto |>
df_eva_cultivo mutate(
resultado = if_else(
condition = rendimiento_t_ha > media_rto_cultivo,
true = "Superior",
false = "Inferior"
)
)
|>
df_resumen_rto count(departamento, ano, resultado) |>
ggplot(aes(x = ano, y = n, color = resultado, fill = resultado)) +
facet_wrap(~ departamento, ncol = 4) +
geom_col(position = "fill", alpha = 0.5) +
coord_flip() +
theme(legend.position = "top")
<-
df_embalses2 |>
df_embalses mutate(
resultado = if_else(
condition = Value > 0.5,
true = "Superior",
false = "Inferior"
),year_es = year(Date)
)
|>
df_embalses2 count(year_es, resultado) |>
mutate(year_es = as.factor(year_es)) |>
ggplot(aes(x = year_es, y = n, fill = resultado)) +
geom_col(position = "fill") +
coord_flip()
|>
df_embalses2 count(year_es, resultado, Name) |>
mutate(year_es = as.factor(year_es)) |>
ggplot(aes(x = year_es, y = n, fill = resultado)) +
geom_col(position = "fill") +
coord_flip() +
facet_wrap(~ Name) +
theme(legend.position = "top")
ggplotly(
|>
df_creditos count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "fill") +
coord_flip()
)
|>
df_embalses ggplot(aes(x = Value)) +
geom_histogram(color = "red",
fill = "gray70",
bins = 30)
|>
df_embalses ggplot(aes(x = Value)) +
geom_histogram(color = "red",
fill = "gray70",
bins = 30) +
facet_wrap(~ Name, ncol = 1)
|>
df_embalses ggplot(aes(x = Value)) +
geom_density(color = "red",
fill = "gray70")
|>
df_embalses ggplot(aes(x = Value)) +
geom_boxplot(color = "red", fill = "gray70")
|>
df_embalses ggplot(aes(x = Name, y = Value)) +
geom_boxplot(color = "red", fill = "gray70") +
coord_flip()
|>
df_embalses ggplot(aes(x = Value, color = Name)) +
geom_density()
ggplotly(
|>
df_embalses ggplot(aes(x = Value, color = Name)) +
geom_density()
)
|>
df_embalses ggplot(aes(x = Value, color = Name)) +
geom_density(show.legend = FALSE) +
facet_wrap(~Name, ncol = 4, scales = "free_y")
# Primero calculamos el área perdida y la expresamos en porcentaje
<-
df_eva_cacao |>
df_evas filter(cultivo == "Cacao") |>
mutate(area_perdida_porc = (area_sembrada_ha - area_cosechada_ha) / area_sembrada_ha) |>
filter(area_perdida_porc > 0)
|>
df_eva_cacao ggplot(aes(x = area_perdida_porc, y = rendimiento_t_ha)) +
geom_point() +
geom_smooth(method = "lm")
|>
df_eva_cacao ggplot(aes(x = area_sembrada_ha, y = rendimiento_t_ha)) +
geom_point() +
scale_x_log10() +
geom_smooth(method = "lm")
cor(x = df_eva_cacao$area_sembrada_ha,
y = df_eva_cacao$rendimiento_t_ha,
use = "pairwise.complete.obs")
[1] 0.02916119
cor(x = log1p(df_eva_cacao$area_sembrada_ha),
y = log1p(df_eva_cacao$rendimiento_t_ha),
use = "pairwise.complete.obs")
[1] 0.209491
|>
df_embalses filter(Name == "AGREGADO BOGOTA") |>
ggplot(aes(x = Date, y = Value)) +
geom_line() +
geom_smooth(color = "red")
|>
df_embalses ggplot(aes(x = Date, y = Value)) +
geom_bin_2d() +
geom_smooth(color = "red")
<-
matriz_cor1 |>
df_embalses pivot_wider(names_from = Name, values_from = Value) |>
select(where(is.numeric)) |>
cor(use = "pairwise.complete.obs")
library(corrplot)
|>
matriz_cor1 corrplot()
|>
matriz_cor1 corrplot(
type = "lower",
diag = FALSE,
method = "pie",
tl.srt = 45,
tl.cex = 0.7,
tl.col = "black",
order = "hclust"
)
library(corrr)
<-
mtx_cor2 |>
df_embalses pivot_wider(names_from = Name, values_from = Value) |>
select(where(is.numeric)) |>
correlate()
|> network_plot() mtx_cor2
|>
df_eva_cacao group_by(departamento) |>
reframe(promedio = mean(rendimiento_t_ha, na.rm = TRUE),
desviacion = sd(rendimiento_t_ha, na.rm = TRUE)) |>
ggplot(aes(x = departamento, y = promedio,
ymin = promedio - desviacion,
ymax = promedio + desviacion)) +
geom_point() +
geom_errorbar() +
coord_flip()
|>
df_eva_cacao group_by(departamento) |>
reframe(promedio = mean(rendimiento_t_ha, na.rm = TRUE),
desviacion = sd(rendimiento_t_ha, na.rm = TRUE)) |>
ggplot(aes(x = departamento, y = promedio,
ymin = promedio - desviacion,
ymax = promedio + desviacion)) +
geom_pointrange() +
coord_flip()
library(geodata)
library(sf)
library(ggspatial)
<- gadm(country = "COL", level = 0, path = "datos-ejemplos/")
colombia_pais |> plot() colombia_pais
<- gadm(country = "COL", level = 1, path = "datos-ejemplos/")
colombia_deptos |> plot() colombia_deptos
<- gadm(country = "COL", level = 2, path = "datos-ejemplos/")
colombia_mpios |> plot() colombia_mpios
<- readRDS("datos-ejemplos/gadm/gadm41_COL_0_pk.rds")
mapa_colombia |> plot() mapa_colombia
<-
colombia_deptos_shp st_read("datos-ejemplos/MGN2021_DPTO_POLITICO/MGN_DPTO_POLITICO.shp") |>
mutate(DPTO_CCDGO = as.numeric(DPTO_CCDGO))
Reading layer `MGN_DPTO_POLITICO' from data source
`D:\Otros\UdeA\2024-02\01-estadistica\estadistica-202402\datos-ejemplos\MGN2021_DPTO_POLITICO\MGN_DPTO_POLITICO.shp'
using driver `ESRI Shapefile'
Simple feature collection with 33 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -81.73562 ymin: -4.229406 xmax: -66.84722 ymax: 13.39473
Geodetic CRS: MAGNA-SIRGAS
|>
colombia_deptos_shp ggplot() +
geom_sf()
<- st_read("datos-ejemplos/MGN2021_MPIO_POLITICO/MGN_MPIO_POLITICO.shp") colombia_mpios_shp
Reading layer `MGN_MPIO_POLITICO' from data source
`D:\Otros\UdeA\2024-02\01-estadistica\estadistica-202402\datos-ejemplos\MGN2021_MPIO_POLITICO\MGN_MPIO_POLITICO.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1121 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -81.73562 ymin: -4.229406 xmax: -66.84722 ymax: 13.39473
Geodetic CRS: MAGNA-SIRGAS
|>
colombia_mpios_shp ggplot() +
geom_sf()
|>
colombia_mpios_shp filter(DPTO_CNMBR == "ANTIOQUIA") |>
ggplot() +
geom_sf()
codigo_dane_departamento
<-
df_eva_maiz |>
df_evas filter(cultivo == "Maíz") |>
mutate(codigo_dane_departamento = as.numeric(codigo_dane_departamento)) |>
rename(DPTO_CCDGO = codigo_dane_departamento)
<-
df_resumen_maiz |>
df_eva_maiz group_by(DPTO_CCDGO) |>
reframe(promedio_rto = mean(rendimiento_t_ha, na.rm = TRUE))
df_resumen_maiz
<-
df_maiz_mapa |>
colombia_deptos_shp left_join(df_resumen_maiz, by = "DPTO_CCDGO")
df_maiz_mapa
|>
df_maiz_mapa ggplot(mapping = aes(fill = promedio_rto)) +
geom_sf() +
scale_fill_viridis_c() +
theme_void() +
annotation_north_arrow(location = "tl") +
labs(title = "Rendimiento del maíz en Colombia",
subtitle = "Departamental",
fill = "Rendimiento (t/ha)")
<-
df_filtro_creditos |>
df_creditos filter(ano == 2023) |>
filter(id_rubro == 165000)
|> head() df_filtro_creditos
|>
colombia_deptos_shp ggplot() +
geom_sf() +
geom_point(data = df_filtro_creditos,
mapping = aes(x = longitud, y = latitud, color = valor_inversion)) +
scale_color_viridis_c() +
theme_void() +
annotation_north_arrow(location = "tl") +
labs(title = "Créditos agropecuarios en Colombia - Año 2023",
subtitle = "Capital de trabajo microcrédito rural",
color = "Valor (COP)")
library(terra)
<- rast("datos-ejemplos/wc2.1_2.5m_elev/wc2.1_2.5m_elev.tif")
elevacion |> plot() elevacion
<-
elevacion_colombia |>
elevacion mask(colombia_deptos_shp) |>
crop(colombia_deptos_shp)
|> plot() elevacion_colombia