Home 2SLS with double clustered standard errors
Post
Cancel

2SLS with double clustered standard errors

This program was written for my research on hedge fund capacity constraints, where we use recursive demeaning to estimate the returns to scale effect, following Pastor, Stambaugh, and Taylor (2015).

********** SAS code for 2SLS with double-clustered standard errors **********;
/* Author:  Raisa Velthuis */
/* Created: January 22, 2016 */

*** Make adjustments below as needed. For example, in my base case scenario, the model has one regressor and one instrument, and does not include intercepts ***; 

%let data=sample; *name of dataset;

* get White cov matrix;
proc model data=&data noprint;   
where Z ne .;

  parms b_1st b_2nd;
  Y = b_2nd*X;   X = b_1st*Z;

  fit Y X / 2sls outest=V2W covout hccme=0;
  instruments Z / noint;
run; 
quit;
data v2w; set v2w; where _name_ = "b_2nd"; run;

* add ID for double-clustered observations;
proc sort data=&data; by fund time; run;
proc sort data=&data out=cluster2 nodupkey; by fund time; run;
data cluster2;
set cluster2;
if _n_=1 then cluster2 = 0;
cluster2 +1; 
keep fund time cluster2;
run;
data &data; 
merge &data cluster2; 
by fund time; 
run;

* plain first stage regression w White s.e.;
proc reg data=&data plots=none; 
model X = Z /hcc HCCMETHOD=0 acov noint;
ods output acovest=both ;
run;
quit;
data both; set both; drop Variable  Dependent  Model; run;

* estimate RD w double-clustered s.e.;
proc iml;
* import data;
use &data;
read all var {Y} into y where (Z ^= .);
 read all var {Z} into Z where (Z ^= .); 
read all var {X} into X where (Z ^= .); 
read all var {time} into time where (Z ^= .); 
read all var {fund} into time where (Z ^= .); 
read all var {cluster2} into cluster2 where (Z ^= .);
close &data; 
use both; 
read all var {Z} into V1W;
close both; 
use V2W;
read all var {b_2nd} into V2W; 
close V2W;

* counters and degrees of freedom adjustements;
N=nrow(y); 
f=unique(time); 
t=unique(time); 
d=unique(cluster2); 
M1=ncol(unique(time));
M2=ncol(unique(time));
M3=ncol(unique(cluster2));
M=min(M1,M2);
K=ncol(X); 
qc1 = (N-1)/(N-K)*M1/(M1-1); 
qc2 = (N-1)/(N-K)*M2/(M2-1); 
qc3 = (N-1)/(N-K)*M3/(M3-1); 
qc  = (N-1)/(N-K)*M/(M-1);

*** first stage estimation ***; 
b1 = inv(Z`*Z)*Z`*X; 
e = X - Z*b1;
* cluster 1 (time);
SUM=0; 
do g = f[><] to f[<>];     
h = loc(f=g);     
if nrow(h)>0 then do;        
 h = loc(time=f[h]);         
eg = e[h];         
zg = Z[h,:];
SUM=SUM+zg`*eg*eg`*zg;
end; 
end;
V11 = inv(Z`*Z)*SUM*inv(Z`*Z)*qc1;
* cluster 2 (time);
SUM=0; 
do g = t[><] to t[<>];     
h = loc(t=g);     
if nrow(h)>0 then do;         
h = loc(time=t[h]);        
eg = e[h];         
zg = Z[h,:];
SUM=SUM+zg`*eg*eg`*zg;
end; 
end;
V12 = inv(Z`*Z)*SUM*inv(Z`*Z)*qc2;
* double clustered V;
V1 = (V11+V12-V1W);**qc; 
se1 = V1##.5;
/*print V11;print V12;print V1W; print V1; print se1; */

 *** second stage estimation ***; 
b_iv = inv(Z`*X)*Z`*y; 
u = y - X*b_iv;; 
* cluster 1 (time);
SUM=0; 
do g = f[><] to f[<>];     
h = loc(f=g);    
if nrow(h)>0 then do;        
h = loc(time=f[h]);         
ug = u[h];         
zg = Z[h,:];
SUM=SUM+zg`*ug*ug`*zg;
end; 
end;
V21 = inv(Z`*X)*SUM*inv(X`*Z)*qc1;
* cluster 2 (time);
SUM=0; 
do g = t[><] to t[<>];     
h = loc(t=g);     
if nrow(h)>0 then do;         
h = loc(time=t[h]);         
ug = u[h];         
zg = Z[h,:];
SUM=SUM+zg`*ug*ug`*zg;
end; 
end;
V22 = inv(Z`*X)*SUM*inv(X`*Z)*qc2;
* double clustered V;
Vclu = (V21+V22-V2W);**qc; 
seclu = Vclu##.5; 
/*print V21;print V22;print V2W; print Vclu; print seclu; */

* store results;
create results var {b1 se1 b_iv seclu N M M1 M2 M3 K};
append; 
close results; 
quit;

* output;
data results; 
set results;
* second stage t-stat and p-value;
tvalue=b_iv/seclu; 
probt=(1-probt(abs(tvalue),M-1))*2; 
* first stage t-stat and p-value;
tvalue1=b1/se1;
probt1=(1-probt(abs(tvalue1),M-1))*2;
run;
This post is licensed under CC BY 4.0 by the author.