Tietokoneharjoitus 6

              storage  display     value
variable name   type   format      label      variable label
-------------------------------------------------------------------------------
county          int    %9.0g                  county identifier
year            byte   %9.0g                  81 to 87
crmrte          float  %9.0g                  crimes committed per person
prbarr          float  %9.0g                  'probability' of arrest
prbconv         float  %9.0g                  'probability' of conviction
prbpris         float  %9.0g                  'probability' of prison sentenc
avgsen          float  %9.0g                  avg. sentence, days
polpc           float  %9.0g                  police per capita
density         float  %9.0g                  people per sq. mile
taxpc           float  %9.0g                  tax revenue per capita
west            byte   %9.0g                  =1 if in western N.C.
central         byte   %9.0g                  =1 if in central N.C.
urban           byte   %9.0g                  =1 if in SMSA
pctmin80        float  %9.0g                  perc. minority, 1980
wcon            float  %9.0g                  weekly wage, construction
wtuc            float  %9.0g                  wkly wge, trns, util, commun
wtrd            float  %9.0g                  wkly wge, whlesle, retail trade
wfir            float  %9.0g                  wkly wge, fin, ins, real est
wser            float  %9.0g                  wkly wge, service industry
wmfg            float  %9.0g                  wkly wge, manufacturing
wfed            float  %9.0g                  wkly wge, fed employees
wsta            float  %9.0g                  wkly wge, state employees
wloc            float  %9.0g                  wkly wge, local gov emps
mix             float  %9.0g                  offense mix: face-to-face/other
pctymle         float  %9.0g                  percent young male
d82             byte   %9.0g                  =1 if year == 82
d83             byte   %9.0g                  =1 if year == 83
d84             byte   %9.0g                  =1 if year == 84
d85             byte   %9.0g                  =1 if year == 85
d86             byte   %9.0g                  =1 if year == 86
d87             byte   %9.0g                  =1 if year == 87
lcrmrte         float  %9.0g                  log(crmrte)
lprbarr         float  %9.0g                  log(prbarr)
lprbconv        float  %9.0g                  log(prbconv)
lprbpris        float  %9.0g                  log(prbpris)
lavgsen         float  %9.0g                  log(avgsen)
lpolpc          float  %9.0g                  log(polpc)
ldensity        float  %9.0g                  log(density)
ltaxpc          float  %9.0g                  log(taxpc)
lwcon           float  %9.0g                  log(wcon)
lwtuc           float  %9.0g                  log(wtuc)
lwtrd           float  %9.0g                  log(wtrd)
lwfir           float  %9.0g                  log(wfir)
lwser           float  %9.0g                  log(wser)
lwmfg           float  %9.0g                  log(wmfg)
lwfed           float  %9.0g                  log(wfed)
lwsta           float  %9.0g                  log(wsta)
lwloc           float  %9.0g                  log(wloc)
lmix            float  %9.0g                  log(mix)
lpctymle        float  %9.0g                  log(pctymle)
lpctmin         float  %9.0g                  log(pctmin)
clcrmrte        float  %9.0g                  lcrmrte - lcrmrte[_n-1]
clprbarr        float  %9.0g                  lprbarr - lprbarr[_n-1]
clprbcon        float  %9.0g                  lprbconv - lprbconv[_n-1]
clprbpri        float  %9.0g                  lprbpri - lprbpri[t-1]
clavgsen        float  %9.0g                  lavgsen - lavgsen[t-1]
clpolpc         float  %9.0g                  lpolpc - lpolpc[t-1]
cltaxpc         float  %9.0g                  ltaxpc - ltaxpc[t-1]
clmix           float  %9.0g                  lmix - lmix[t-1]

Tehtävä 5a

Estimoi lineaarinen malli SGLS estimaattorilla käyttäen kaikkia vuosia 81-87. Mallissa vastemuuttuja on log(crmrte) ja selittävät muuttujat ovat log(prbarr), log(prbconv), log(prbpris), log(avgsen) ja log(polpc).

file<-"http://cc.oulu.fi/~jklemela/panel/cornwell.raw"
data<-read.table(file=file)

y<-log(data[,3])

x1<-log(data[,4])
x2<-log(data[,5])
x3<-log(data[,6])
x4<-log(data[,7])
x5<-log(data[,8])

y<-matrix(y,length(y),1)
K<-6
x<-matrix(1,length(y),K)
x[,2]<-x1
x[,3]<-x2
x[,4]<-x3
x[,5]<-x4
x[,6]<-x5

# Estimoidaan ensin Omega

I<-diag(1,K)
xtxinv<-solve(t(x)%*%x,I)

betahat<-xtxinv%*%t(x)%*%y

county<-data[,1]
year<-data[,2]
uc<-unique(county)
uy<-unique(year)
N<-length(uc)
T<-length(uy)

U<-matrix(0,T,N)
for (i in 1:N){
   now<-uc[i]
   ind<-(county==now)
   yi<-y[ind]
   xi<-x[ind,]
   U[,i]<-yi-xi%*%betahat
}

omegahat<-U%*%t(U)/N

          [,1]       [,2]       [,3]       [,4]       [,5]       [,6]
[1,] 0.13295214 0.10691554 0.08725805 0.09418204 0.09834043 0.07883118
[2,] 0.10691554 0.13246877 0.09984562 0.10790282 0.10569842 0.08345536
[3,] 0.08725805 0.09984562 0.12093801 0.10635122 0.10554241 0.08992647
[4,] 0.09418204 0.10790282 0.10635122 0.15925729 0.12980230 0.10205059
[5,] 0.09834043 0.10569842 0.10554241 0.12980230 0.16121446 0.10989033
[6,] 0.07883118 0.08345536 0.08992647 0.10205059 0.10989033 0.12945747
[7,] 0.09169674 0.10168747 0.08737220 0.10057430 0.10401350 0.10791154
           [,7]
[1,] 0.09169674
[2,] 0.10168747
[3,] 0.08737220
[4,] 0.10057430
[5,] 0.10401350
[6,] 0.10791154
[7,] 0.15928928

---------------------------------------------------

I<-diag(1,T)
omegahatinv<-solve(omegahat,I)

C<-kronecker(diag(1,N),omegahatinv)
D<-t(x)%*%C%*%x
I<-diag(1,K)
E<-solve(D,I)
F<-t(x)%*%C%*%y
betahat2<-E%*%F

> betahat2
            [,1]
[1,] -2.11957332
[2,] -0.46933248
[3,] -0.35171733
[4,] -0.15307485
[5,] -0.01635828
[6,]  0.37220681

> betahat
           [,1]
[1,] -2.20673453
[2,] -0.72151130
[3,] -0.54927675
[4,]  0.23797055
[5,] -0.06519926
[6,]  0.36252309

Tehtävä 5b

Estimoi SGLS-estimaattorin asymptoottinen varianssi

county<-data[,1]
year<-data[,2]
uc<-unique(county)
uy<-unique(year)
N<-length(uc)
T<-length(uy)
K<-6

# y, x, betahat2 ja omegahatinv saadaan edellisesta tehtavasta

A<-matrix(0,K,K)
B<-matrix(0,K,K)
for (i in 1:N){
   now<-uc[i]
   ind<-(county==now)
   yi<-y[ind]
   xi<-x[ind,]
   ui<-yi-xi%*%betahat2
   A<-A+t(xi)%*%omegahatinv%*%xi
   B<-B+t(xi)%*%omegahatinv%*%ui%*%t(ui)%*%omegahatinv%*%xi
}

I<-diag(1,K)
invA<-solve(A,I)

Avar<-invA%*%B%*%invA

> Avar
             [,1]         [,2]          [,3]          [,4]          [,5]
[1,]  0.434900037 0.0143218326  0.0020173775  0.0020327013 -0.0027025419
[2,]  0.014321833 0.0048180432  0.0019395374  0.0010046334  0.0001545897
[3,]  0.002017378 0.0019395374  0.0017648969  0.0003902649  0.0001107171
[4,]  0.002032701 0.0010046334  0.0003902649  0.0015021298  0.0001704906
[5,] -0.002702542 0.0001545897  0.0001107171  0.0001704906  0.0005580448
[6,]  0.061083218 0.0008838097 -0.0002661697 -0.0001044661 -0.0003144059
              [,6]
[1,]  0.0610832182
[2,]  0.0008838097
[3,] -0.0002661697
[4,] -0.0001044661
[5,] -0.0003144059
[6,]  0.0089666681

---------------------------------------------------------------------

sdhats<-matrix(0,K,1)
tstats<-matrix(0,K,1)
pvals<-matrix(0,K,1)
for (k in 1:K){
   sdhats[k]<-sqrt(Avar[k,k])
   tstats[k]<-betahat2[k]/sdhats[k]
   pvals[k]<-2*(1-pnorm(abs(tstats[k])))
}
sdhats
tstats
pvals

> sdhats
           [,1]
[1,] 0.65946951
[2,] 0.06941213
[3,] 0.04201068
[4,] 0.03875732
[5,] 0.02362297
[6,] 0.09469249
> tstats
           [,1]
[1,] -3.2140581
[2,] -6.7615345
[3,] -8.3720941
[4,] -3.9495726
[5,] -0.6924735
[6,]  3.9306898
> pvals
             [,1]
[1,] 1.308732e-03
[2,] 1.365374e-11
[3,] 0.000000e+00
[4,] 7.829087e-05
[5,] 4.886401e-01
[6,] 8.470250e-05

Tehtävä 5c

Estimoi RE-estimaattorin asymptoottinen varianssi

y<-matrix(y,length(y),1)
K<-6
x<-matrix(1,length(y),K)
x[,2]<-x1
x[,3]<-x2
x[,4]<-x3
x[,5]<-x4
x[,6]<-x5

I<-diag(1,K)
xtxinv<-solve(t(x)%*%x,I)

betahat<-xtxinv%*%t(x)%*%y

county<-data[,1]
year<-data[,2]
uc<-unique(county)
uy<-unique(year)
N<-length(uc)
T<-length(uy)

U<-matrix(0,T,N)
for (i in 1:N){
   now<-uc[i]
   ind<-(county==now)
   yi<-y[ind]
   xi<-x[ind,]
   U[,i]<-yi-xi%*%betahat
}

sigmav2<-sum(U^2)/(N*T)
sigmac2<-0
for (i in 1:N){
    for (t in 1:(T-1)){
        for (s in (t+1):T){
            sigmac2<-sigmac2+U[t,i]*U[s,i]
        }
     }
}
sigmac2<-2*sigmac2/(N*T*(T-1))
sigmau2<-sigmav2-sigmac2

omegahat.re<-sigmav2*diag(1,T)+sigmac2*matrix(1,T,T)
omegahat.re

           [,1]       [,2]       [,3]       [,4]       [,5]       [,6]
[1,] 0.24218956 0.09996421 0.09996421 0.09996421 0.09996421 0.09996421
[2,] 0.09996421 0.24218956 0.09996421 0.09996421 0.09996421 0.09996421
[3,] 0.09996421 0.09996421 0.24218956 0.09996421 0.09996421 0.09996421
[4,] 0.09996421 0.09996421 0.09996421 0.24218956 0.09996421 0.09996421
[5,] 0.09996421 0.09996421 0.09996421 0.09996421 0.24218956 0.09996421
[6,] 0.09996421 0.09996421 0.09996421 0.09996421 0.09996421 0.24218956
[7,] 0.09996421 0.09996421 0.09996421 0.09996421 0.09996421 0.09996421
           [,7]
[1,] 0.09996421
[2,] 0.09996421
[3,] 0.09996421
[4,] 0.09996421
[5,] 0.09996421
[6,] 0.09996421
[7,] 0.24218956


I<-diag(1,T)
omegahat.reinv<-solve(omegahat.re,I)
C<-kronecker(diag(1,N),omegahat.reinv)
D<-t(x)%*%C%*%x
I<-diag(1,K)
E<-solve(D,I)
F<-t(x)%*%C%*%y
betahat3<-E%*%F

> betahat3
             [,1]
[1,] -2.113129731
[2,] -0.580583053
[3,] -0.434592361
[4,] -0.124297846
[5,]  0.002135269
[6,]  0.408412699


A<-matrix(0,K,K)
for (i in 1:N){
   now<-uc[i]
   ind<-(county==now)
   yi<-y[ind]
   xi<-x[ind,]
   ui<-yi-xi%*%betahat3
   A<-A+t(xi)%*%omegahat.reinv%*%xi
}
I<-diag(1,K)
invA<-solve(A,I)
Avar<-invA

> Avar
             [,1]          [,2]          [,3]          [,4]          [,5]
[1,]  0.130805612  0.0021037749 -0.0028029041  0.0059488011 -8.606430e-03
[2,]  0.002103775  0.0039319043  0.0010784974  0.0008204338  1.942074e-04
[3,] -0.002802904  0.0010784974  0.0017757501  0.0006190966  1.131474e-04
[4,]  0.005948801  0.0008204338  0.0006190966  0.0064058001 -1.066457e-04
[5,] -0.008606430  0.0001942074  0.0001131474 -0.0001066457  4.154132e-03
[6,]  0.016172703 -0.0006095679 -0.0008794141 -0.0002131709  1.643495e-05
              [,6]
[1,]  1.617270e-02
[2,] -6.095679e-04
[3,] -8.794141e-04
[4,] -2.131709e-04
[5,]  1.643495e-05
[6,]  2.739562e-03

sdhats<-matrix(0,K,1)
tstats<-matrix(0,K,1)
pvals<-matrix(0,K,1)
for (k in 1:K){
   sdhats[k]<-sqrt(Avar[k,k])
   tstats[k]<-betahat[k]/sdhats[k]
   pvals[k]<-2*(1-pnorm(abs(tstats[k])))
}
sdhats
tstats
pvals

> sdhats
           [,1]
[1,] 0.36167059
[2,] 0.06270490
[3,] 0.04213965
[4,] 0.08003624
[5,] 0.06445256
[6,] 0.05234082
> tstats
           [,1]
[1,]  -6.101504
[2,] -11.506458
[3,] -13.034678
[4,]   2.973285
[5,]  -1.011585
[6,]   6.926201
> pvals
             [,1]
[1,] 1.050750e-09
[2,] 0.000000e+00
[3,] 0.000000e+00
[4,] 2.946308e-03
[5,] 3.117364e-01
[6,] 4.322986e-12

#####################
SGLS-tulokset:

> sdhats
           [,1]
[1,] 0.65946951
[2,] 0.06941213
[3,] 0.04201068
[4,] 0.03875732
[5,] 0.02362297
[6,] 0.09469249
> tstats
           [,1]
[1,] -3.2140581
[2,] -6.7615345
[3,] -8.3720941
[4,] -3.9495726
[5,] -0.6924735
[6,]  3.9306898
> pvals
             [,1]
[1,] 1.308732e-03
[2,] 1.365374e-11
[3,] 0.000000e+00
[4,] 7.829087e-05
[5,] 4.886401e-01
[6,] 8.470250e-05