set more 1 clear set mem 500m /* File: cepr_org_master.do Date: October 12, CEPR ORG Version 1.2.2 Desc: Master do-file for creating CEPR consistent extract of CPS ORG Note: See copyright notice at the end of this program. */ /* Acknowledgements The CEPR ORG extract uses the NBER's "CPS Labor Extracts 1979 - 2001" (updated through 2002) as a base. Complete details and all underlying data are available from the NBER (www.nber.org). I am grateful to Jean Roth for assistance with the data and to Jean Roth and others at the NBER for their efforts to make the CPS data, their programs, and their documentation widely available. I am also grateful to Jared Bernstein, Danielle Gao, Larry Mishel, David Webster, and others at the Economic Policy Institute (www.epinet.org) for extensive conversations and assistance over the years with the ORG data. I thank Dean Baker, Heather Boushey, and Helene Jorgensen for extensive comments. */ /* Notice The underlying Current Population Survey data referenced here are in the public domain. This program and related programs are distributed under the GNU General Public License (GPL). See end of this file and http://www.gnu.org/licenses/ for details. */ /* Part 1: set directories */ /*Windows vs. GNU/Linux*/ global gnulin = 1 /*Set gnulin=0 if you run Windows; 1 if GNU/Linux*/ if $gnulin==1 { global do "/ceprdata/CPS_ORG/CEPR/Do" /* do files for NBER extracts */ global locbdo "/ceprdata/CPS_Basic/CEPR/DoFiles" /* do files for Basic CPS */ global locin "/ceprdata/CPS_ORG/NBER" /* original NBER ORG extracts */ global locbas "/ceprdata/CPS_Basic/NBER" /* Basic CPS extracts (from NBER) */ global lochv "/ceprdata/CPS_ORG/CEPR/hoursvary" /* "hours vary" extracts */ global locotc "/ceprdata/CPS_ORG/CEPR/otc" /* overtime, etc. extracts */ global loctmp "/ceprdata/CPS_ORG/CEPR/temp" /* temporary files */ global locout "/ceprdata/CPS_ORG/CEPR" /* final CEPR extracts */ /*executables*/ global gzip "/usr/bin/gzip" global unzip "/usr/bin/unzip" global copy "/bin/cp" global erase "/bin/rm" } if $gnulin==0 { global locwork "F:\DATA\CPS_ORG\CEPR\Do" /* do files for NBER extracts */ global locbdo "F:\DATA\CPS_Basic\CEPR\DoFiles" /* do files for Basic CPS */ global locin "F:\DATA\CPS_ORG\NBER" /* original NBER ORG extracts */ global locbas "F:\DATA\CPS_Basic\NBER" /* Basic CPS extracts (from NBER) */ global lochv "F:\DATA\CPS_ORG\CEPR\HoursVary" /* "hours vary" extracts */ global locotc "F:\DATA\CPS_ORG\CEPR\otc" /* overtime, etc. extracts */ global loctmp "F:\DATA\CPS_ORG\CEPR\Temp" /* temporary files */ global locout "F:\DATA\CPS_ORG\CEPR" /* final CEPR extracts */ /*executables*/ global gzip "F:\Software\Gzip\gzip.exe" global unzip "F:\Software\PKWARE\PKZIPC\pkzipc.exe -extract" global copy "copy" global erase "erase" } /* Part 2: some preliminaries */ /* a. convert original NBER files from 2-digit to 4-digit years */ *cd "$do" *do "cepr_org_y2k.do" /* b. prepare CPS Basic data sets for merging with NBER */ /* i. Hours-vary data from Basic CPS 1994 - 2002 use extracts created w/ cepr_org_extracts.do */ *cd "$do" *do "cepr_org_extracts.do" *cd "$do" *do "cepr_org_prep_hv.do" /* load program "hv" */ *hv 1994 1995 1996 1997 1998 1999 2000 2001 2002 /* ii. Overtime, tips, commissions data from Basic CPS 1994 - 2002 use extracts created w/ cepr_org_extracts.do */ *cd "$do" *do "cepr_org_prep_otc.do" /* load program "otc" */ *otc 1994 1995 1996 1997 1998 1999 2000 2001 2002 /* c. read raw data 2003- */ cd "$locbdo" do "cepr_basic_read_all.do" /* must change this when adding new months of data */ /* program switches */ *b2003 /* read 2003 data months 1-12 */ *b2004 /* read 2004 data months 1-12 */ *b2005 /* read 2005 data months 1-12 */ *b2006 /* read 2006 data months 1-12 */ /* d. convert monthly files into annual (or near annual) data sets */ capture program drop fullyr program define fullyr version 7.0 * fullyr `1' `2' `3' * syntax fullyr datayear firstmonth lastmonth * while "`1'"~="" { local month=`2' while `month'<=`3' { if $gnulin==1 { use "$locbas/`1'/cps_basic_raw_`1'_`month'.dta", clear } if $gnulin==0 { use "$locbas\`1'\cps_basic_raw_`1'_`month'.dta", clear } keep if hrmis==4 | hrmis==8 /* keep only ORG observations */ keep if 16<=peage & peage~=. /* for consistency with earlier extracts */ drop if pwsswgt<0 /* drop if observation has missing CPS weight */ drop if pworwgt<0 /* drop if observation has missing ORG weight */ lab var year "Year" notes: Age 16 and older only * compress * cd "$locin" /* save with NBER extracts */ save "cepr_org_`1'_`month'.dta", replace local month=`month'+1 } mac shift 3 } end capture program drop combcps program define combcps version 7.0 * combcps `1' `2' `3' * syntax combcps datayear firstmonth lastmonth * while "`1'"~="" { local month=`2' cd "$locin" /* saved with NBER extracts */ use "cepr_org_`1'_`month'.dta", clear local month=`month'+1 while `month'<=`3' { cd "$locin" append using "cepr_org_`1'_`month'.dta" local month=`month'+1 } cd "$locin" save "morg`1'.dta", replace mac shift 3 } end /* Part 3: create consistent set of core variables, by topic */ cd "$do" do "cepr_org_keepord.do" /* load program to keep and order output */ /* Part 3.a. for 1979-2002, use NBER ORG extract */ capture program drop orgnber program define orgnber version 7.0 * * for 1979-2002 CEPR ORG extract, based on NBER extract * while "`1'"~="" { * cd "$locin" use "morg`1'.dta", clear lab var year "Year" notes: Age 16 and older only * cd "$do" do "cepr_org_idvar.do" do "cepr_org_demog.do" do "cepr_org_family.do" do "cepr_org_empstat.do" do "cepr_org_geog.do" do "cepr_org_educ.do" do "cepr_org_ind.do" do "cepr_org_occ.do" do "cepr_org_hours.do" do "cepr_org_topcode_lognormal.do" do "cepr_org_topcode_pareto.do" do "cepr_org_wages.do" * keepord /* keeps and orders consistent variables */ * compress sort month state minsamp hhid hhnum lineno lab data "CEPR ORG Extract, Version 1.2.2, `1', `c(current_date)'" cd "$locout" save "cepr_org_`1'.dta", replace saveold "cepr_org_`1'_ver7.dta", replace mac shift } end /* Part 3.b. for 2003-, use monthly CPS Basic files */ capture program drop orgcpsb program define orgcpsb version 7.0 * * from 2003 CEPR ORG extract, based on monthly CPS Basic files * while "`1'"~="" { * cd "$locin" use "morg`1'.dta", clear lab var year "Year" notes: Age 16 and older only * cd "$locbdo" do "cepr_basic_idvar.do" do "cepr_basic_demog.do" do "cepr_basic_family.do" do "cepr_basic_empstat.do" do "cepr_basic_geog.do" do "cepr_basic_educ.do" do "cepr_basic_ind.do" do "cepr_basic_occ.do" do "cepr_basic_hours.do" do "cepr_basic_topcode_lognormal.do" do "cepr_basic_topcode_pareto.do" cd "$do" do "cepr_org_wages.do" * keepord /* keeps and orders consistent variables */ * compress sort month state minsamp hhid hhnum lineno lab data "CEPR ORG Extract, Version 1.2.2, `1', `c(current_date)'" cd "$locout" save "cepr_org_`1'.dta", replace saveold "cepr_org_`1'_ver7.dta", replace mac shift } end /* program switches */ /* 1979 - 2002 NBER */ orgnber 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 /* */ 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 /* 2003 - 2006 CPS Basic */ /* read raw CPS data */ *fullyr 2003 1 12 2004 1 12 2005 1 12 2006 1 12 /* combine monthly data sets created in fullyr */ *combcps 2003 1 12 2004 1 12 2005 1 12 2006 1 12 /* process ORG data */ orgcpsb 2003 2004 2005 2006 di /* Release notes 1.2.2 Oct 12, 2007 1. Corrected error in decimal places in cepr_basic_topcode_lognormal.do and cepr_basic_topcode_pareto.do affecting topcoded wages in 2003-2006 1.2.1 Jul 24, 2007 1. Temporarily removed substate geography variables 2004-2006. 2. Corrected hhnum for 2004+ (thanks Jeff Wenger; previously incorrect, due to dictionary change in May 2004) 3. Added proxy reporting variables for later years. 1.2 Jan 24, 2007 1. Added Jan-Aug 2006 data 2. Corrected nilf variable in cepr_org_empstat.do (thanks Jeff Wenger) 3. Corrected vet, rural, centcity, suburb variables for months 8-12 of 2005 (error due to dictionary change in Aug 2005) 4. Corrected wbho for August-December 2005 (previously missing, (due to dictionary change in Aug 2005) 5. Corrected missing industry variables for 2003+ 1.1 Feb 27, 2006 1. Added 2005 data 2. Converted programs for dual GNU/Linux, Windows use 3. Changed procedure for adding new months of CPS 4. Added refper (reference person) variable, 1994-2005 5. Set procedure for trimming real wages (rw, rw_ot) to fix cutoffs at $1 and $100 in constant 1989 dollars 6. Shortened label length of some industry/occupation labels to satisfy 80-column label requirement for Stata 8. 0.96 Nov, 2005 1. Updated to include 2003, 2004 data from CPS Basic files (not NBER ORG extract as in earlier years) 2. Manufacturing (manuf) variable now excludes construction; service (servs) variable now includes construction 3. Added new industry (ind03, ind_2d) and occupation codes (occ03, docc03, manag03) to reflect switch in Jan 2003 CPS from SIC 1987 to NAICS 2002 and from SOC 1980 to SOC 2000; variable manag83 replaces previous variable manager 4. Correction to children's age variables in 1998 and 1999 in cepr_org_family.do 5. Correction to label for famrel94 in cepr_org_family.do 6. Correction to coding of publoc in all years in cepr_org_empstat.do 7. Added trimmed, top-code-adjusted, real wage variables (rw, rw_ot) 8. Changed named of year-arrived-in-US variable from peinusyr to prinusyr 9. Added new race and ethnicity variable (wbhom), to reflect changes in CPS race and ethnicity options beginning in 2003 10. Correction to agric variable in cepr_org_ind.do 0.91 Nov 8, 2003 All NBER city-related codes added, without labeling or documentation to basic extracts 0.9 Sep 1, 2003 Beta release */ /* Copyright 2003 CEPR and John Schmitt Center for Economic and Policy Research 1621 Connecticut Avenue, NW Washington, DC 20009 Tel: (202) 293-5380 Fax: (202) 588-1356 http://www.cepr.net This program and all programs referenced in it are free software. You can redistribute the program or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */