14.6 Preparando insumos para STAN
- Lectura y adecuación de covariables
<-
statelevel_predictors_df readRDS('Recursos/Día5/Sesion1/Data/statelevel_predictors_df_dam2.rds')
## Estandarizando las variables para controlar el efecto de la escala.
%<>%
statelevel_predictors_df mutate_at(vars("luces_nocturnas",
"cubrimiento_cultivo",
"cubrimiento_urbano",
"modificacion_humana",
"accesibilidad_hospitales",
"accesibilidad_hosp_caminado"),
function(x)as.numeric(scale(x)))
head(statelevel_predictors_df,10) %>% tba()
dam2 | modificacion_humana | accesibilidad_hospitales | accesibilidad_hosp_caminado | cubrimiento_cultivo | cubrimiento_urbano | luces_nocturnas | area1 | sexo2 | edad2 | edad3 | edad4 | edad5 | anoest2 | anoest3 | anoest4 | anoest99 | tiene_sanitario | tiene_acueducto | tiene_gas | eliminar_basura | tiene_internet | piso_tierra | material_paredes | material_techo | rezago_escolar | alfabeta | hacinamiento | tasa_desocupacion | id_municipio |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
00101 | 3.6127 | -1.1835 | -1.5653 | -1.1560 | 7.2782 | 4.9650 | 1.0000 | 0.5224 | 0.2781 | 0.2117 | 0.1808 | 0.0725 | 0.2000 | 0.3680 | 0.2286 | 0.0193 | 0.0119 | 0.7946 | 0.0673 | 0.0810 | 0.6678 | 0.0033 | 0.0109 | 0.0111 | 0.3694 | 0.9247 | 0.1962 | 0.0066 | 100101 |
00201 | -0.0553 | 0.4449 | 0.2100 | 0.0684 | -0.0682 | -0.1511 | 0.8904 | 0.4933 | 0.2726 | 0.1849 | 0.1520 | 0.0614 | 0.3149 | 0.3022 | 0.0775 | 0.0082 | 0.1005 | 0.7220 | 0.2261 | 0.1300 | 0.9276 | 0.0664 | 0.0812 | 0.0249 | 0.1501 | 0.7975 | 0.3014 | 0.0007 | 050201 |
00202 | -0.3758 | 0.0000 | 0.1482 | -0.2345 | -0.2855 | -0.4234 | 0.6799 | 0.4697 | 0.2804 | 0.1895 | 0.1430 | 0.0515 | 0.3757 | 0.2405 | 0.0148 | 0.0014 | 0.1322 | 0.9230 | 0.2693 | 0.2884 | 0.9759 | 0.0625 | 0.0986 | 0.0673 | 0.0278 | 0.7140 | 0.3454 | 0.0001 | 050202 |
00203 | -0.9259 | 0.5732 | -0.1402 | -0.5511 | -0.3822 | -0.5612 | 0.5814 | 0.4601 | 0.2665 | 0.1733 | 0.1586 | 0.0713 | 0.3778 | 0.2463 | 0.0219 | 0.0052 | 0.2579 | 0.7602 | 0.4824 | 0.2589 | 0.9919 | 0.1937 | 0.2342 | 0.1238 | 0.0485 | 0.7104 | 0.2755 | 0.0001 | 050203 |
00204 | -1.3166 | 1.1111 | 0.4438 | -0.5027 | -0.3835 | -0.6042 | 0.5708 | 0.4663 | 0.2647 | 0.1683 | 0.1673 | 0.0757 | 0.3306 | 0.2402 | 0.0440 | 0.0049 | 0.1672 | 0.6375 | 0.5040 | 0.3837 | 0.9759 | 0.1403 | 0.1354 | 0.0176 | 0.0873 | 0.6737 | 0.2671 | 0.0002 | 050204 |
00205 | -0.7474 | 2.1155 | 1.2271 | -0.5838 | -0.3345 | -0.5909 | 0.6937 | 0.4633 | 0.2849 | 0.2107 | 0.1473 | 0.0583 | 0.2794 | 0.2821 | 0.0562 | 0.0067 | 0.3800 | 0.6596 | 0.5014 | 0.2852 | 0.9894 | 0.2309 | 0.2498 | 0.0459 | 0.1016 | 0.6751 | 0.4973 | 0.0001 | 050205 |
00206 | 0.5157 | -0.1468 | -0.1811 | 1.1894 | -0.1191 | -0.4022 | 0.9563 | 0.4557 | 0.2910 | 0.1814 | 0.1495 | 0.0626 | 0.3793 | 0.2815 | 0.0427 | 0.0052 | 0.1301 | 0.8817 | 0.2565 | 0.1495 | 0.9659 | 0.0629 | 0.0472 | 0.0337 | 0.0835 | 0.8027 | 0.2200 | 0.0001 | 050206 |
00207 | 1.7368 | -0.7648 | -0.4861 | 0.7170 | -0.0609 | 0.0042 | 0.5201 | 0.4783 | 0.2898 | 0.1675 | 0.1464 | 0.0531 | 0.3552 | 0.2901 | 0.0328 | 0.0061 | 0.2434 | 0.5775 | 0.2758 | 0.0950 | 0.9911 | 0.0717 | 0.2004 | 0.1304 | 0.0714 | 0.7778 | 0.3936 | 0.0001 | 050207 |
00208 | -0.5942 | 0.3212 | -0.1697 | -0.3627 | -0.3044 | -0.4750 | 0.6625 | 0.4334 | 0.2943 | 0.1875 | 0.1523 | 0.0654 | 0.3557 | 0.2486 | 0.0250 | 0.0054 | 0.1908 | 0.8251 | 0.4152 | 0.1450 | 0.9907 | 0.1458 | 0.1517 | 0.0852 | 0.0509 | 0.6897 | 0.3051 | 0.0001 | 050208 |
00209 | -1.5280 | 3.0192 | 1.9428 | -0.8078 | -0.4046 | -0.6423 | 0.6798 | 0.4311 | 0.2858 | 0.1687 | 0.1628 | 0.0701 | 0.3648 | 0.2645 | 0.0752 | 0.0061 | 0.1893 | 0.5760 | 0.4096 | 0.3557 | 0.9978 | 0.1097 | 0.0941 | 0.0292 | 0.1357 | 0.7680 | 0.2189 | 0.0001 | 050209 |
- Seleccionar las variables del modelo y crear matriz de covariables.
<-
names_cov c(
"dam2",
"tasa_desocupacion",
"hacinamiento",
"piso_tierra",
"luces_nocturnas",
"cubrimiento_cultivo",
"modificacion_humana"
)<-
X_pred anti_join(statelevel_predictors_df %>% select(all_of(names_cov)),
%>% select(dam2)) indicador_dam1
En el bloque de código se identifican que dominios serán los predichos.
%>% select(dam2) %>%
X_pred saveRDS(file = "Recursos/Día5/Sesion1/Data/dam_pred.rds")
Creando la matriz de covariables para los dominios no observados (X_pred
) y los observados (X_obs
)
## Obteniendo la matrix
%<>%
X_pred data.frame() %>%
select(-dam2) %>% as.matrix()
## Identificando los dominios para realizar estimación del modelo
<- inner_join(indicador_dam1 %>% select(dam2, id_orden),
X_obs %>% select(all_of(names_cov))) %>%
statelevel_predictors_df arrange(id_orden) %>%
data.frame() %>%
select(-dam2, -id_orden) %>% as.matrix()
- Calculando el n_efectivo y el \(\tilde{y}\)
<- nrow(indicador_dam1)
D <- 3 # Ocupado, desocupado, inactivo.
P <- matrix(NA, D, P)
Y_tilde <- matrix(NA, D, P)
n_tilde <- matrix(NA, D, P)
Y_hat
# n efectivos ocupado
1] <- (indicador_dam1$Ocupado*(1 - indicador_dam1$Ocupado))/indicador_dam1$Ocupado_var
n_tilde[,1] <- n_tilde[,1]* indicador_dam1$Ocupado
Y_tilde[,
# n efectivos desocupado
2] <- (indicador_dam1$Desocupado*(1 - indicador_dam1$Desocupado))/indicador_dam1$Desocupado_var
n_tilde[,2] <- n_tilde[,2]* indicador_dam1$Desocupado
Y_tilde[,
# n efectivos Inactivo
3] <- (indicador_dam1$Inactivo*(1 - indicador_dam1$Inactivo))/indicador_dam1$Inactivo_var
n_tilde[,3] <- n_tilde[,3]* indicador_dam1$Inactivo Y_tilde[,
Ahora, validamos la coherencia de los cálculos realizados
= rowSums(Y_tilde)
ni_hat 1] <- ni_hat* indicador_dam1$Ocupado
Y_hat[,2] <- ni_hat* indicador_dam1$Desocupado
Y_hat[,3] <- ni_hat* indicador_dam1$Inactivo
Y_hat[,<- ceiling(Y_hat)
Y_hat
<- Y_hat/rowSums(Y_hat)
hat_p par(mfrow = c(1,3))
plot(hat_p[,1],indicador_dam1$Ocupado)
abline(a = 0,b=1,col = "red")
plot(hat_p[,2],indicador_dam1$Desocupado)
abline(a = 0,b=1,col = "red")
plot(hat_p[,3],indicador_dam1$Inactivo)
abline(a = 0,b=1,col = "red")
- Compilando el modelo
<- cbind(matrix(1,nrow = D,ncol = 1),X_obs)
X1_obs = ncol(X1_obs)
K <- nrow(X_pred)
D1 <- cbind(matrix(1,nrow = D1,ncol = 1),X_pred)
X1_pred
<- list(D = D,
sample_data P = P,
K = K,
y_tilde = Y_hat,
X_obs = X1_obs,
X_pred = X1_pred,
D1 = D1)
library(rstan)
<- stan(
fit_mcmc2 file = "Recursos/Día5/Sesion1/Data/modelosStan/00 Multinomial_simple_no_cor.stan", # Stan program
data = sample_data, # named list of data
verbose = TRUE,
warmup = 1000, # number of warmup iterations per chain
iter = 2000, # total number of iterations per chain
cores = 4, # number of cores (could use one per chain)
)
saveRDS(fit_mcmc2,
"Recursos/Día5/Sesion1/Data/fit_multinomial_no_cor.Rds")