Home Import Excel files
Post
Cancel

Import Excel files

This program is helpful to import a large set of similarly structured Excel files, for example to read a set of files containing Russell Index constituent data.

/* Program to import set of Excel files obtained from Russell */
/* Each Excel file has the same format, but covers different years */
/* The file names indicate whether the data concerns the Russell 1000, 2000, or Micro index */
/* Variable names (column headers) are repeated throughout the file, and eliminated after importing *
/* Finally, CRSP permnos are matched to the index constituent data by ticker-date pairs */

/* Author:  Raisa Velthuis */
/* Created: November 22, 2013 */

* initialize;
%let path = C:\Users\Me\Documents\Russell Data\;
libname crsp 'D:\WRDS\2013'; /* location of crsp.msf table */

* import files;
%macro get_filenames(location);
/* From: http://stackoverflow.com/questions/1409543/using-sas-macro-to-pipe-a-list-of-filenames-from-
filename _dir_ "%bquote(&location.)";
data filenames(keep=memname);       
handle=dopen( '_dir_' );       
if handle > 0 then do;         
count=dnum(handle);         
do i=1 to count;           
memname=dread(handle,i);           
output filenames;
end;       
end;
rc=dclose(handle);
run;
filename _dir_ clear; 
%mend;

%get_filenames(&path);

%macro get_files(out=);
    /* Adapted from: http://www.nesug.org/Proceedings/nesug11/cc/cc17.pdf */

    /* Select index */
    data files; set filenames; where find(memname,"&out.")>0; i = _n_; run;

    /* Number of files */
    proc sql noprint; select count(memname) into :nfiles from files; quit;     %put Number of files in directory: &nfiles;

    /* Import files */
    %do i = 1 %to &nfiles;
        %* Get file name;
        proc sql noprint; select %trim(%left(memname)) into :file from files where i = &i; quit;
        %put File: &file;
        %put File: "&path.%trim(%left(&file.))";         
        filename fileloc "&path.%trim(%left(&file.))";         
        LIBNAME XLSLIB "&path.%trim(%left(&file.))";

        proc sql noprint;
        /***Get total Number of Sheets***/
            select count(distinct(MEMNAME)) into: tot
         from sashelp.vtable
         where LIBNAME ='XLSLIB' AND INDEX(MEMNAME,'General')=0;
        /**Get the sheet names in to macro variables***/          
         select distinct(MEMNAME) into: v1 - :v%trim (%left(&tot))
         from sashelp.vtable
         where LIBNAME ='XLSLIB' AND INDEX (MEMNAME,'General')=0;

        %* import data;
        PROC IMPORT OUT= WORK.Russell_&i
                    DATAFILE= fileloc
                    DBMS=EXCEL REPLACE;
         RANGE="&v1.";
         GETNAMES=YES;
         MIXED=NO;
         SCANTEXT=YES;
         USEDATE=YES;
         SCANTIME=YES;
        RUN;
    %end;

    %* combine files;     
    data &out;
        format date $10. ticker $8. desc $20. iwght best12.;
        set Russell_:;
        date2 = input(date,mmddyy10.); format date2 date9.;
        if not(missing(date2));
        drop date;         
        rename date2=date;     
    run;

    %* clean up;
    proc datasets nolist; delete files Russell_:; quit; 

%mend;

%get_files(out=R1);     /* Read in R1000 data  */
%get_files(out=R2);     /* Read in R2000 data  */
%get_files(out=RMicro); /* Read in RMicro data */

********* Add Permnos to Russell Membership data *********;
%macro add_permnos(out=);
    * add permnos based on ticker at specific date;
    proc sql; /* create single entry per ticker (sum weights of duplicate tickers) */
    create table &out._id as
    select distinct a.date, a.ticker, a.desc, sum(a.iwght) as iwght
    from &out. as a
    where a.ticker is not missing     group by a.date, a.ticker     order by a.date, a.ticker
    ;     
    quit;     
    proc sql;
    create table &out._id as
    select intnx('month',a.date,0,'e') as date format date9., a.*,
        b.permno, b.comnam, b.exchcd, b.shrcd, b.shrcls, abs(c.prc)*c.shrout as mcap
    from &out._id as a
    left join (select * from crsp.stocknames where ticker is not missing) as b     
    on a.ticker = b.ticker and a.date between b.namedt and b.nameenddt
    left join crsp.msf as c
    on b.permno = c.permno and intnx('month',a.date,0,'e') = intnx('month',c.date,0,'e')
    order by a.date, a.ticker, mcap desc
    ;     
    quit;
    data &out._id; * remove duplicate permno matches;
    set &out._id;
    where permno is not missing;     
    by date ticker descending mcap;
    if first.ticker;
    keep date permno iwght comnam;
    run; %mend;
%add_permnos(out=R1);
%add_permnos(out=R2);
%add_permnos(out=RMicro);

* crsp with russell identifiers;
proc sql;
    create table crsp_w_Russell
    as select intnx('month',a.date,0,'e') as date format date9., a.permno,
        case when b.R1000 = 1 then 1 else 0 end as R1000,
        case when missing(b.iwght) then 0 else b.iwght end as R1wght,
        case when c.R2000 = 1 then 1 else 0 end as R2000,
        case when missing(c.iwght) then 0 else c.iwght end as R2wght,
        case when d.RMicro = 1 then 1 else 0 end as RMicro,
        case when missing(d.iwght) then 0 else d.iwght end as RMwght
    from crsp.msf as a
    left join (select *, 1 as R1000 from R1_id) as b
    on a.permno=b.permno and intnx('month',a.date,0,'e') = b.date
    left join (select *, 1 as R2000 from R2_id) as c
    on a.permno=c.permno and intnx('month',a.date,0,'e') = c.date
    left join (select *, 1 as RMicro from RMicro_id) as d
    on a.permno=d.permno and intnx('month',a.date,0,'e') = d.date
    where a.date >= '01DEC1978'd     order by permno, date; quit;

**** some checks *****;
* summ stats;
proc means data=R1_id nway noprint;
class date; var iwght;
output out=nmembers1 sum=iwght_R1;
run;
proc means data=R2_id nway noprint;
class date; var iwght;
output out=nmembers2 sum=iwght_R2;
run;
proc means data=RMicro_id nway noprint;
class date; var iwght;
output out=nmembers3 sum=iwght_RM;
run; data nmembers;
merge nmembers1(drop=_type_ rename=(_freq_=nR1)) nmembers2(drop=_type_  rename=(_freq_=nR2))
     nmembers3(drop=_type_  rename=(_freq_=nRM));
by date; run;

*time series plot - number of funds; 
ods pdf file="&path.Russell N iwght.pdf" ; 
goptions reset=all; options orientation=landscape;
legend label=none value=(h=2 'Russell 1000' 'Russell 2000' 'Russell Micro') cborder=black;
axis1 label=(angle=90 'Sum of index weights') order= 0 to 1.1 by .1; 
axis2 label=(angle=90 'Number of stocks') order= 0 to 2200 by 200;
   proc gplot data=nmembers;
       symbol i=j v=none h=1 w=3;
       format date year4.;
    title "Total index weight of stocks in Russell index per month";
      plot ( iwght_R1 iwght_R2 iwght_RM) * date
        / overlay legend=legend1 vaxis=axis1
      ;
    run;
    title "Number of stocks in Russell index per month";
      plot ( nR1 nR2 nRM) * date
        / overlay legend=legend1 vaxis=axis2
      ;
    run;
    quit;
    quit;
ods pdf close;

* clean up; 
proc datasets nolist;
delete filenames Membersmcap Nmembers: R1 R1_id R2 R2_id RMicro RMicro_id;
quit;
This post is licensed under CC BY 4.0 by the author.