big.sim <- function(n1, n2, rho1, rho2, q = 10, beta = 1, gamma = rep(0.2, q), nsim = 1000) {

    one.sim <- function(){

        Z1 <- mvrnorm(n1, rep(1, q), Sigma1)
        Z1 <- matrix(2 * as.numeric(Z1 > 0) - 1, nrow = n1)
        UV1 <- mvrnorm(n1, rep(0, 2), matrix(c(1, 0.5, 0.5, 1), 2))
        X1 <- as.numeric(Z1 %*% gamma + UV1[, 1] > 0)
        Y1 <- X1 * beta + UV1[, 2]

        Z2 <- mvrnorm(n2, rep(1, q), Sigma2)
        Z2 <- matrix(2 * as.numeric(Z2 > 0) - 1, nrow = n2)
        UV2 <- mvrnorm(n2, rep(0, 2), matrix(c(2, 0.5 * sqrt(2), 0.5 * sqrt(2), 1), 2))
        X2 <- as.numeric(Z2 %*% gamma + UV2[, 1] > 0)
        Y2 <- X2 * beta + UV2[, 2]

        Z1 <- scale(Z1, center = TRUE, scale = FALSE)
        Z2 <- scale(Z2, center = TRUE, scale = FALSE)
        X1 <- X1 - mean(X1)
        X2 <- X2 - mean(X2)
        Y1 <- Y1 - mean(Y1)
        Y2 <- Y2 - mean(Y2)

        ## Only Z1, X1, Z2, Y2 are observed
        SZZ1 <- t(Z1) %*% Z1 / n1
        SZZ2 <- t(Z2) %*% Z2 / n2
        SZX1 <- t(Z1) %*% X1 / n1
        SZY2 <- t(Z2) %*% Y2 / n2
        SYY2 <- t(Y2) %*% Y2 / n2

        ## Two stage least squares
        W <- SZZ2
        gamma.hat <- solve(SZZ1) %*% SZX1
        beta.hat <- solve(t(SZX1) %*% solve(SZZ1) %*% W %*% solve(SZZ1) %*% SZX1) %*% (t(SZX1) %*% solve(SZZ1) %*% W %*% solve(SZZ2) %*% SZY2)
        Omega.hat <- (1/n2) * solve(SZZ2) * (SYY2 - beta.hat^2 * t(gamma.hat) %*% SZZ2 %*% gamma.hat)[, , drop = TRUE] + (1/n1) * solve(SZZ1) * (beta.hat^2 * var(X1 - Z1 %*% gamma.hat))[, , drop = TRUE]
        beta.var <- solve(t(SZX1) %*% solve(SZZ1) %*% W %*% solve(SZZ1) %*% SZX1) %*% t(SZX1) %*% solve(SZZ1) %*% W %*% Omega.hat %*% W %*% solve(SZZ1) %*% SZX1 %*% solve(t(SZX1) %*% solve(SZZ1) %*% W %*% solve(SZZ1) %*% SZX1)
        beta.ci <- beta.hat + c(-1.96, 1.96) * sqrt(beta.var)

        ## Optimal IV estimator
        W <- solve(Omega.hat)
        beta2.hat <- solve(t(SZX1) %*% solve(SZZ1) %*% W %*% solve(SZZ1) %*% SZX1) %*% (t(SZX1) %*% solve(SZZ1) %*% W %*% solve(SZZ2) %*% SZY2)
        beta2.var <- solve(t(SZX1) %*% solve(SZZ1) %*% W %*% solve(SZZ1) %*% SZX1) %*% t(SZX1) %*% solve(SZZ1) %*% W %*% Omega.hat %*% W %*% solve(SZZ1) %*% SZX1 %*% solve(t(SZX1) %*% solve(SZZ1) %*% W %*% solve(SZZ1) %*% SZX1)
        beta2.ci <- beta2.hat + c(-1.96, 1.96) * sqrt(beta2.var)

        ## Angrist-Krueger (estimator is biased!)
        phi <- cov(Z2*matrix(Y2, nrow = n2, ncol = q)) ## I am not sure if this is correct...
        omega <- cov(Z1*matrix(X1, nrow = n1, ncol = q)*beta)
        Phi <- phi + (n2/n1) * omega
        beta3.hat <- solve(t(SZX1) %*% solve(Phi) %*% SZX1) %*% t(SZX1) %*% solve(Phi) %*% SZY2
        beta3.var <- solve(t(SZX1) %*% solve(Phi) %*% SZX1) ## This is definitely wrong...

        ## TSLS naive variance

        c(beta.hat, beta.var, beta2.hat, beta2.var, beta3.hat, beta3.var, summary(lm(X1 ~ Z1))$fstatistic[1], summary(lm(X2 ~ Z2))$fstatistic[1])

    }

    library(MASS)
    ## beta <- 1
    ## q <- 10
    ## gamma <- rep(0.1, q)

    ## n <- 10000
    ## n1 <- n
    ## n2 <- n
    ## rho1 <- 0.5
    ## rho2 <- 0.5
    Sigma1 <- outer(1:q, 1:q, function(i, j) rho1^abs(i - j))
    Sigma2 <- outer(1:q, 1:q, function(i, j) rho2^abs(i - j))
    ## Sigma2 <- outer(1:q, 1:q, function(i, j) (-0.3)^abs(i - j))

    out <- replicate(nsim, one.sim())

    rownames(out) <- c("beta.hat1", "beta.var1", "beta.hat2", "beta.var2", "beta.hat3", "beta.var3", "fstat.sample1", "fstat.sample2")

    rmse1 <- sqrt(mean((out[1, ] - beta)^2)) ## RMSE of TSLS
    rmse2 <- sqrt(mean((out[3, ] - beta)^2)) ## RMSE of optimal IV
    ## sqrt(mean((out[5, ] - beta)^2)) ## RMSE of A-K

    bias1 <- mean(out[1, ] - beta) ## bias of TSLS
    bias2 <- mean(out[3, ] - beta) ## bias of optimal IV
    ## mean(out[5, ] - beta) ## bias of A-K

    beta.ci1 <- cbind(out[1, ] - 1.96 * sqrt(out[2, ]), out[1, ] + 1.96 * sqrt(out[2, ]))
    beta.ci2 <- cbind(out[3, ] - 1.96 * sqrt(out[4, ]), out[3, ] + 1.96 * sqrt(out[4, ]))
    coverage1 <- mean(beta.ci1[, 1] <= beta & beta.ci1[, 2] >= beta) ## coverage of TSLS
    coverage2 <- mean(beta.ci2[, 1] <= beta & beta.ci2[, 2] >= beta) ## coverage of optimal IV

    sd1 <- sd(out[1, ])
    sd2 <- sd(out[3, ])

    se1 <- mean(sqrt(out[2, ]))
    se2 <- mean(sqrt(out[4, ]))

    c(rmse1, rmse2, bias1, bias2, sd1, sd2, se1, se2, coverage1, coverage2, mean(out[7, ]), mean(out[8, ]))

}

settings <- expand.grid(n1 = c(1000, 5000, 20000),
                        n2 = c(1000, 5000, 20000),
                        rho1 = c(0.5),
                        rho2 = c(0.5))

library(parallel)
output <- mclapply(1:nrow(settings), function(i) {print(i); big.sim(settings[i, 1], settings[i, 2], settings[i, 3], settings[i, 4], nsim = 1000)}, mc.cores = 3)

save(output, file = "sim3.rda")

load("sim3.rda")

output <- do.call(rbind, output)
colnames(output) <- c("rmse1", "rmse2", "bias1", "bias2", "sd1", "sd2", "se1", "se2", "coverage1", "coverage2", "fstat1", "fstat2")
output <- cbind(settings, output)

tmp <- subset(output, rho1 == 0.5)

library(tables)
tmp$beta <- 1
tmp$beta <- factor(tmp$beta)
tmp$rho1 <- factor(tmp$rho1)
tmp$rho2 <- factor(tmp$rho2)
tmp$n1 <- factor(tmp$n1)
tmp$n2 <- factor(tmp$n2)

t <- tabular(rho1 * rho2 * n1 * n2 ~ (bias1 * Format(digits = 1) + sd1 * Format(digits = 2) + se1 * Format(digits = 2) + coverage1 * Format(digits = 3) + bias2 * Format(digits = 1) + sd2 * Format(digits = 2) + se2 * Format(digits = 2) + coverage2 * Format(digits = 3)) * Heading() * identity, data = tmp)

latex(t)
