Code
library(tidyverse)
library(janitor)
library(readxl)
library(ggsci) # colores preestablecidos
library(plotly) # Visualización interactivaEstadística
library(tidyverse)
library(janitor)
library(readxl)
library(ggsci) # colores preestablecidos
library(plotly) # Visualización interactivadf_embalses <- read_csv("datos-ejemplos/PorcVoluUtilDiar.csv")
df_embalses |> head()df_evas <- read_excel("datos-ejemplos/Base agrícola 2019 - 2023.xlsx", skip = 6) |>
clean_names()
df_evas |> head()df_creditos <- read_csv("datos-ejemplos/Colocaciones_de_Cr_dito_Sector_Agropecuario_-_2021-_2024_20240910.csv") |>
clean_names()
df_creditos |> head()df_creditos |>
count(ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
geom_col()
df_creditos |>
count(ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
labs(x = "Año",
y = "Total (n)",
title = "Créditos agropecuarios en Colombia",
subtitle = "Años 2021 a 2024",
caption = "*2024: información incompleta") +
theme_bw()
df_creditos |>
count(ano, name = "total") |>
ggplot(aes(x = ano, y = total, label = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_text() +
labs(x = "Año",
y = "Total (n)",
title = "Créditos agropecuarios en Colombia",
subtitle = "Años 2021 a 2024",
caption = "*2024: información incompleta") +
theme_bw()
df_creditos |>
count(ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_label(aes(label = total), color = "white", fill = "#3B1E54") +
labs(x = "Año",
y = "Total (n)",
title = "Créditos agropecuarios en Colombia",
subtitle = "Años 2021 a 2024",
caption = "*2024: información incompleta") +
theme_bw()
df_evas |>
count(departamento, name = "total") |>
ggplot(aes(x = departamento, y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_label(aes(label = total), color = "white", fill = "#3B1E54") +
labs(x = "Año",
y = "Total (n)",
title = "Evaluaciones agropecuarias por departamento",
subtitle = "Colombia 2019 a 2023") +
theme_bw()
df_evas |>
count(departamento, name = "total") |>
ggplot(aes(x = departamento, y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_label(aes(label = total), color = "white", fill = "#3B1E54", size = 1.7) +
labs(x = "Año",
y = "Total (n)",
title = "Evaluaciones agropecuarias por departamento",
subtitle = "Colombia 2019 a 2023") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
df_evas |>
count(departamento, name = "total") |>
ggplot(aes(x = reorder(departamento, total), y = total)) +
geom_col(color = "cornflowerblue", fill = "#F87A53", alpha = 0.5) +
geom_label(aes(label = total), color = "white", fill = "#3B1E54", size = 1.7) +
labs(x = "Año",
y = "Total (n)",
title = "Evaluaciones agropecuarias por departamento",
subtitle = "Colombia 2019 a 2023") +
theme_bw() +
coord_flip()
df_embalses |>
group_by(Name) |>
reframe(mediana = median(Value)) |>
ggplot(aes(x = reorder(Name, mediana), y = mediana)) +
geom_point(color = "red") +
coord_flip() +
labs(x = "")
mediana_general_vol <- df_embalses$Value |> median()
df_embalses |>
group_by(Name) |>
reframe(mediana = median(Value)) |>
ggplot(aes(x = reorder(Name, mediana), y = mediana)) +
geom_point(color = "red") +
labs(x = "",
caption = "Línea azul: mediana general") +
geom_hline(yintercept = mediana_general_vol,
color = "blue",
linetype = 2) +
coord_flip()
df_creditos |>
count(departamento_inversion, ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
facet_wrap(~ departamento_inversion, ncol = 3, scales = "free_y") +
geom_col()
df_creditos |>
count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5)
df_creditos |>
count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge")
df_creditos |>
count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge") +
scale_color_manual(values = c("orangered", "magenta2", "gray40")) +
scale_fill_manual(values = c("orangered", "magenta2", "gray40"))
df_creditos |>
count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge") +
scale_color_npg() +
scale_fill_npg() +
theme(legend.position = "top") + #left top right bottom
labs(x = "Año",
y = "Total (n)",
title = "Créditos agropecuarios en Colombia por producto y género",
subtitle = "Años 2021 a 2024",
caption = "*2024: información incompleta",
color = "Género",
fill = "Género")
df_creditos |>
group_by(ano, genero, tipo_productor) |>
reframe(total = sum(valor_inversion, na.rm = TRUE)) |>
ggplot(aes(x = ano, y = total, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge")
df_creditos |>
group_by(ano, genero, tipo_productor) |>
reframe(total = sum(valor_inversion, na.rm = TRUE),
creditos = n()) |>
mutate(promedio = total / creditos) |>
ggplot(aes(x = ano, y = promedio, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "dodge")
ggplotly(
df_creditos |>
count(ano, name = "total") |>
ggplot(aes(x = ano, y = total)) +
geom_col()
)df_creditos |>
count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "fill") +
coord_flip()
df_creditos |>
count(ano, genero, tipo_productor, departamento_inversion) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ departamento_inversion ~ tipo_productor, ncol = 4) +
geom_col(position = "fill", alpha = 0.5) +
theme(legend.position = "top") +
coord_flip()
df_creditos |>
count(ano, genero, tipo_productor, departamento_inversion) |>
ggplot(aes(x = ano, y = n, color = tipo_productor, fill = tipo_productor)) +
facet_wrap(~ departamento_inversion ~ genero, ncol = 3) +
geom_col(position = "fill", alpha = 0.5) +
theme(legend.position = "top") +
coord_flip()
df_creditos |>
count(ano, genero, tipo_productor, departamento_inversion) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_grid(~ departamento_inversion ~ tipo_productor) +
geom_col(position = "fill", alpha = 0.5) +
theme(legend.position = "top") +
coord_flip()
# Filtramos el cultivo de interés
df_eva_cultivo <-
df_evas |>
filter(cultivo == "Café")
# Calculamos primero la media histórica del rendimiento
media_rto_cultivo <-
df_eva_cultivo$rendimiento_t_ha |> mean()
# Finalmente extraemos la variable "resultado" que indica si
# el rendimiento de esa evaluación agropecuaria es superior o inferior al
# promedio histórico
df_resumen_rto <-
df_eva_cultivo |>
mutate(
resultado = if_else(
condition = rendimiento_t_ha > media_rto_cultivo,
true = "Superior",
false = "Inferior"
)
)
df_resumen_rto |>
count(departamento, ano, resultado) |>
ggplot(aes(x = ano, y = n, color = resultado, fill = resultado)) +
facet_wrap(~ departamento, ncol = 4) +
geom_col(position = "fill", alpha = 0.5) +
coord_flip() +
theme(legend.position = "top")
df_embalses2 <-
df_embalses |>
mutate(
resultado = if_else(
condition = Value > 0.5,
true = "Superior",
false = "Inferior"
),
year_es = year(Date)
)
df_embalses2 |>
count(year_es, resultado) |>
mutate(year_es = as.factor(year_es)) |>
ggplot(aes(x = year_es, y = n, fill = resultado)) +
geom_col(position = "fill") +
coord_flip()
df_embalses2 |>
count(year_es, resultado, Name) |>
mutate(year_es = as.factor(year_es)) |>
ggplot(aes(x = year_es, y = n, fill = resultado)) +
geom_col(position = "fill") +
coord_flip() +
facet_wrap(~ Name) +
theme(legend.position = "top")
ggplotly(
df_creditos |>
count(ano, genero, tipo_productor) |>
ggplot(aes(x = ano, y = n, color = genero, fill = genero)) +
facet_wrap(~ tipo_productor, scales = "free_y") +
geom_col(alpha = 0.5, position = "fill") +
coord_flip()
)df_embalses |>
ggplot(aes(x = Value)) +
geom_histogram(color = "red",
fill = "gray70",
bins = 30)
df_embalses |>
ggplot(aes(x = Value)) +
geom_histogram(color = "red",
fill = "gray70",
bins = 30) +
facet_wrap(~ Name, ncol = 1)
df_embalses |>
ggplot(aes(x = Value)) +
geom_density(color = "red",
fill = "gray70")
df_embalses |>
ggplot(aes(x = Value)) +
geom_boxplot(color = "red", fill = "gray70")
df_embalses |>
ggplot(aes(x = Name, y = Value)) +
geom_boxplot(color = "red", fill = "gray70") +
coord_flip()
df_embalses |>
ggplot(aes(x = Value, color = Name)) +
geom_density()
ggplotly(
df_embalses |>
ggplot(aes(x = Value, color = Name)) +
geom_density()
)df_embalses |>
ggplot(aes(x = Value, color = Name)) +
geom_density(show.legend = FALSE) +
facet_wrap(~Name, ncol = 4, scales = "free_y")
# Primero calculamos el área perdida y la expresamos en porcentaje
df_eva_cacao <-
df_evas |>
filter(cultivo == "Cacao") |>
mutate(area_perdida_porc = (area_sembrada_ha - area_cosechada_ha) / area_sembrada_ha) |>
filter(area_perdida_porc > 0)
df_eva_cacao |>
ggplot(aes(x = area_perdida_porc, y = rendimiento_t_ha)) +
geom_point() +
geom_smooth(method = "lm")
df_eva_cacao |>
ggplot(aes(x = area_sembrada_ha, y = rendimiento_t_ha)) +
geom_point() +
scale_x_log10() +
geom_smooth(method = "lm")
cor(x = df_eva_cacao$area_sembrada_ha,
y = df_eva_cacao$rendimiento_t_ha,
use = "pairwise.complete.obs")[1] 0.02916119
cor(x = log1p(df_eva_cacao$area_sembrada_ha),
y = log1p(df_eva_cacao$rendimiento_t_ha),
use = "pairwise.complete.obs")[1] 0.209491
df_embalses |>
filter(Name == "AGREGADO BOGOTA") |>
ggplot(aes(x = Date, y = Value)) +
geom_line() +
geom_smooth(color = "red")
df_embalses |>
ggplot(aes(x = Date, y = Value)) +
geom_bin_2d() +
geom_smooth(color = "red")
matriz_cor1 <-
df_embalses |>
pivot_wider(names_from = Name, values_from = Value) |>
select(where(is.numeric)) |>
cor(use = "pairwise.complete.obs")library(corrplot)
matriz_cor1 |>
corrplot()
matriz_cor1 |>
corrplot(
type = "lower",
diag = FALSE,
method = "pie",
tl.srt = 45,
tl.cex = 0.7,
tl.col = "black",
order = "hclust"
)
library(corrr)
mtx_cor2 <-
df_embalses |>
pivot_wider(names_from = Name, values_from = Value) |>
select(where(is.numeric)) |>
correlate()
mtx_cor2 |> network_plot()
df_eva_cacao |>
group_by(departamento) |>
reframe(promedio = mean(rendimiento_t_ha, na.rm = TRUE),
desviacion = sd(rendimiento_t_ha, na.rm = TRUE)) |>
ggplot(aes(x = departamento, y = promedio,
ymin = promedio - desviacion,
ymax = promedio + desviacion)) +
geom_point() +
geom_errorbar() +
coord_flip()
df_eva_cacao |>
group_by(departamento) |>
reframe(promedio = mean(rendimiento_t_ha, na.rm = TRUE),
desviacion = sd(rendimiento_t_ha, na.rm = TRUE)) |>
ggplot(aes(x = departamento, y = promedio,
ymin = promedio - desviacion,
ymax = promedio + desviacion)) +
geom_pointrange() +
coord_flip()
library(geodata)
library(sf)
library(ggspatial)
colombia_pais <- gadm(country = "COL", level = 0, path = "datos-ejemplos/")
colombia_pais |> plot()
colombia_deptos <- gadm(country = "COL", level = 1, path = "datos-ejemplos/")
colombia_deptos |> plot()
colombia_mpios <- gadm(country = "COL", level = 2, path = "datos-ejemplos/")
colombia_mpios |> plot()
mapa_colombia <- readRDS("datos-ejemplos/gadm/gadm41_COL_0_pk.rds")
mapa_colombia |> plot()
colombia_deptos_shp <-
st_read("datos-ejemplos/MGN2021_DPTO_POLITICO/MGN_DPTO_POLITICO.shp") |>
mutate(DPTO_CCDGO = as.numeric(DPTO_CCDGO))Reading layer `MGN_DPTO_POLITICO' from data source
`D:\Otros\UdeA\2024-02\01-estadistica\estadistica-202402\datos-ejemplos\MGN2021_DPTO_POLITICO\MGN_DPTO_POLITICO.shp'
using driver `ESRI Shapefile'
Simple feature collection with 33 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -81.73562 ymin: -4.229406 xmax: -66.84722 ymax: 13.39473
Geodetic CRS: MAGNA-SIRGAS
colombia_deptos_shp |>
ggplot() +
geom_sf()
colombia_mpios_shp <- st_read("datos-ejemplos/MGN2021_MPIO_POLITICO/MGN_MPIO_POLITICO.shp")Reading layer `MGN_MPIO_POLITICO' from data source
`D:\Otros\UdeA\2024-02\01-estadistica\estadistica-202402\datos-ejemplos\MGN2021_MPIO_POLITICO\MGN_MPIO_POLITICO.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1121 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -81.73562 ymin: -4.229406 xmax: -66.84722 ymax: 13.39473
Geodetic CRS: MAGNA-SIRGAS
colombia_mpios_shp |>
ggplot() +
geom_sf()
colombia_mpios_shp |>
filter(DPTO_CNMBR == "ANTIOQUIA") |>
ggplot() +
geom_sf()
codigo_dane_departamentodf_eva_maiz <-
df_evas |>
filter(cultivo == "Maíz") |>
mutate(codigo_dane_departamento = as.numeric(codigo_dane_departamento)) |>
rename(DPTO_CCDGO = codigo_dane_departamento)df_resumen_maiz <-
df_eva_maiz |>
group_by(DPTO_CCDGO) |>
reframe(promedio_rto = mean(rendimiento_t_ha, na.rm = TRUE))
df_resumen_maizdf_maiz_mapa <-
colombia_deptos_shp |>
left_join(df_resumen_maiz, by = "DPTO_CCDGO")
df_maiz_mapadf_maiz_mapa |>
ggplot(mapping = aes(fill = promedio_rto)) +
geom_sf() +
scale_fill_viridis_c() +
theme_void() +
annotation_north_arrow(location = "tl") +
labs(title = "Rendimiento del maíz en Colombia",
subtitle = "Departamental",
fill = "Rendimiento (t/ha)")
df_filtro_creditos <-
df_creditos |>
filter(ano == 2023) |>
filter(id_rubro == 165000)
df_filtro_creditos |> head()colombia_deptos_shp |>
ggplot() +
geom_sf() +
geom_point(data = df_filtro_creditos,
mapping = aes(x = longitud, y = latitud, color = valor_inversion)) +
scale_color_viridis_c() +
theme_void() +
annotation_north_arrow(location = "tl") +
labs(title = "Créditos agropecuarios en Colombia - Año 2023",
subtitle = "Capital de trabajo microcrédito rural",
color = "Valor (COP)")
library(terra)
elevacion <- rast("datos-ejemplos/wc2.1_2.5m_elev/wc2.1_2.5m_elev.tif")
elevacion |> plot()
elevacion_colombia <-
elevacion |>
mask(colombia_deptos_shp) |>
crop(colombia_deptos_shp)
elevacion_colombia |> plot()