# Monitoria de Econometria 2 - 08/04/2022 # Monitor: Alan Leal/ Professor: Raphael Corbi ##### Pacotes: if(!require(tidyverse)){install.packages("tidyverse");require(tidyverse)} if(!require(haven)){install.packages("haven");require(haven)} if(!require(boot)){install.packages("boot");require(boot)} if(!require(clue)){install.packages("clue");require(clue)} ## Exact matching pm_df <- read.csv("C:\\Users\\Alanm\\OneDrive\\PAE\\2022I\\Dados\\perfect_matching_cunningham.CSV",sep=";") # Vamos agora fazer matching separando as observa??es do grupo de controle que se juntam ao grupo de tratamento pm_df_control <- pm_df %>% filter(Trainees==0) %>% rename(Earnings_counterfactual=Earnings) %>% select(Age,Earnings_counterfactual) %>% group_by(Age) %>% summarise(Earnings_counterfactual=mean(Earnings_counterfactual)) pm_df_treated <- pm_df %>% filter(Trainees==1) pm_df_matched <- left_join(pm_df_treated,pm_df_control,by="Age") # Calculando a diferen?a: pm_df_matched <- pm_df_matched %>% mutate(y1_y0=Earnings-Earnings_counterfactual) pm_df_matched %>% select(y1_y0) %>% colMeans() ## Propensity score: cattaneo2 <- read_dta("http://www.stata-press.com/data/r13/cattaneo2.dta") # Vamos agpra estimar o propensity score: probit_intermediario <- glm(mbsmoke~mmarried+mage+I(mage^2)+fbaby+medu, family = binomial(link = "probit"),data=cattaneo2) cattaneo2 <- cattaneo2 %>% mutate(pscore=probit_intermediario$fitted.values) # Agora, seguindo Cunningham (2021), temos algumas op??es do que fazer aqui: cattaneo2 <- cattaneo2 %>% mutate( d1=mbsmoke/pscore, d0=(1-mbsmoke)/(1-pscore), s1=sum(d1), s0=sum(d0), y1=mbsmoke*bweight/pscore, y0=(1-mbsmoke)*bweight/(1-pscore), ht=y1-y0, y1=(mbsmoke*bweight/pscore)/(s1/n()), y0=((1-mbsmoke)*bweight/(1-pscore))/(s0/n()), norm=y1-y0 ) # Vendo se o resultado?? o mesmo do do Stata: cattaneo2 %>% select(ht,norm) %>% colMeans() # Agora, vamos realizar o trimmimg do propensity score: cattaneo2 <- cattaneo2 %>% filter(pscore>0.1 & pscore<0.9) cattaneo2 <- cattaneo2 %>% mutate( d1=mbsmoke/pscore, d0=(1-mbsmoke)/(1-pscore), s1=sum(d1), s0=sum(d0), y1=mbsmoke*bweight/pscore, y0=(1-mbsmoke)*bweight/(1-pscore), ht=y1-y0, y1=(mbsmoke*bweight/pscore)/(s1/n()), y0=((1-mbsmoke)*bweight/(1-pscore))/(s0/n()), norm=y1-y0 ) # Vendo se o resultado?? o mesmo do do Stata: cattaneo2 %>% select(ht,norm) %>% colMeans() ## Propensity score matching: # Propensity score: cattaneo2 <- read_dta("http://www.stata-press.com/data/r13/cattaneo2.dta") # Vamos agora estimar o propensity score: probit_intermediario <- glm(mbsmoke~mmarried+mage+I(mage^2)+fbaby+medu, family = binomial(link = "probit"),data=cattaneo2) cattaneo2 <- cattaneo2 %>% mutate(pscore=probit_intermediario$fitted.values) # Dividir a minha base em controle e tratamento: x <- cattaneo2 %>% filter(mbsmoke==1) y <- cattaneo2 %>% filter(mbsmoke==0) # 1) Calcular uma matriz de dist?ncia: distance_matrix <- function(x,y){ dist <- matrix(0,nrow=dim(x)[1],ncol=dim(y)[1]) for (i in 1:dim(x)[1]){ dist[i,] <- abs(rep(x$pscore[i],dim(y)[1])-y$pscore) } return(dist) } # Mudar o ordenamento das observa??es no grupo de tratamento: x <- x %>% arrange(desc(pscore)) # Agora, calculando a matriz de dist?ncias: teste <- distance_matrix(x,y) ## 2) Matriz ordenada: matrix_order <- matrix(0,nrow=dim(x)[1],ncol=dim(y)[1]) for (i in 1:dim(x)[1]){ matrix_order[i,] <- rank(teste[i,]) } ## 3) Aplica??o do algoritmo h?ngaro (algoritmo que implementa o matching, na pr?tica) controles_matched <- solve_LSAP(matrix_order, maximum = FALSE) # Agora, vamos fazer o matching propriamente dito: bweight_matched <- rep(0,dim(x)[1]) for (i in 1:dim(x)[1]){ bweight_matched[i] <- y$bweight[controles_matched[i]] } x_matched <- x %>% mutate(bweight_counterfactual=bweight_matched) # Agora, vamos calcular o ATT: mean(x_matched$bweight-x_matched$bweight_counterfactual) # Estimando o erro-padr?o do ATT por bootstrap: media_x <- function(x,indices){ return(mean(x_matched$bweight[indices]-x_matched$bweight_counterfactual[indices])) } set.seed(12345) boot(x,media_x,R=100)