|  | **Do-File to clean the EGOALT BHPS data
 | 
  
    |  | 
 | 
  
    |  | version 15.1
 | 
  
    |  | clear all
 | 
  
    |  | set more off
 | 
  
    |  | cap log close
 | 
  
    |  | cd "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"
 | 
  
    |  | 
 | 
  
    |  | global main_data "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Datasets\Understanding Societies (UKHLS)\UKDA-6614-stata\stata\stata13_se"
 | 
  
    |  | global dataout "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"
 | 
  
    |  | global results "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\(Preliminary) Results"
 | 
  
    |  | global graphs ""
 | 
  
    |  | global tables ""
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | global W "a b c d e f g h i j k l m n o p q r"                                   // List af all waves
 | 
  
    |  | global N " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18"
 | 
  
    |  | global nW : word count $W                                                       // Number of elements in the list above (In our case, number of waves)
 | 
  
    |  |  
 | 
  
    |  | set scheme s1mono                                                               // Scheme for graphs (black and white graphs)!!
 | 
  
    |  | clear all
 | 
  
    |  | capture log close      
 | 
  
    |  | 
 | 
  
    |  | ********************************************************************************
 | 
  
    |  | * Creating the dataset: INDALL (BHPS)                                         *
 | 
  
    |  | ********************************************************************************
 | 
  
    |  | 
 | 
  
    |  | * Here I get the information I need from INDALL: the data file that all the personal information on responding individuals as well as those who lives in same household in a given wave
 | 
  
    |  | 
 | 
  
    |  | foreach w in $W {                                                               // Foreach wave, I keep the dataset I need
 | 
  
    |  | 	local n=strpos("abcdefghijklmnopqr", "`w'")                                          // Creates a correspondence between letter and number
 | 
  
    |  | 	di `n'
 | 
  
    |  | 	use "$main_data/bhps_w`n'/b`w'_egoalt.dta", clear
 | 
  
    |  | 	rename b`w'_* *                                                                // Removes the prefix of the variable
 | 
  
    |  | 	gen wave = `n'
 | 
  
    |  | 	lab var wave "wave"	
 | 
  
    |  | save "$dataout\egoaltSmall`n'", replace	  
 | 
  
    |  | 		}
 | 
  
    |  | 		
 | 
  
    |  | use "$dataout\egoaltSmall1", clear                                             // I append all these small datasets
 | 
  
    |  | forvalues n=2/$nW {
 | 
  
    |  | append using "$dataout\egoaltSmall`n'"
 | 
  
    |  | }
 | 
  
    |  | 
 | 
  
    |  | // Compress the data to save space
 | 
  
    |  | describe
 | 
  
    |  | sort pidp wave
 | 
  
    |  | compress
 | 
  
    |  | 
 | 
  
    |  | // recode values from -1 to -9 to Stata system missing for all variables
 | 
  
    |  | quietly mvdecode _all, mv(-9/-1) 
 | 
  
    |  | 
 | 
  
    |  | save "$dataout\egoaltPanelSmall", replace
 | 
  
    |  | 
 | 
  
    |  | foreach n in $N {
 | 
  
    |  | erase "$dataout\egoaltSmall`n'.dta"
 | 
  
    |  | }
 | 
  
    |  | 
 | 
  
    |  | *******************************************************************
 | 
  
    |  | *  OTHER IMPORTANT VARIABLES:
 | 
  
    |  | *******************************************************************
 | 
  
    |  | use "$dataout\egoaltPanelSmall", clear
 | 
  
    |  | 
 | 
  
    |  | * Year
 | 
  
    |  | gen year= 1991 if wave==1
 | 
  
    |  | replace year= 1992 if wave==2
 | 
  
    |  | replace year= 1993 if wave==3
 | 
  
    |  | replace year= 1994 if wave==4
 | 
  
    |  | replace year= 1995 if wave==5
 | 
  
    |  | replace year= 1996 if wave==6
 | 
  
    |  | replace year= 1997 if wave==7
 | 
  
    |  | replace year= 1998 if wave==8
 | 
  
    |  | replace year= 1999 if wave==9
 | 
  
    |  | replace year= 2000 if wave==10
 | 
  
    |  | replace year= 2001 if wave==11
 | 
  
    |  | replace year= 2002 if wave==12
 | 
  
    |  | replace year= 2003 if wave==13
 | 
  
    |  | replace year= 2004 if wave==14
 | 
  
    |  | replace year= 2005 if wave==15
 | 
  
    |  | replace year= 2006 if wave==16
 | 
  
    |  | replace year= 2007 if wave==17
 | 
  
    |  | replace year= 2008 if wave==18
 | 
  
    |  | 
 | 
  
    |  | fre year
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | * GETTING TO KNOW THE EGOALT FILE
 | 
  
    |  | **************************************
 | 
  
    |  | 
 | 
  
    |  | // Take a look at the data by using different methods
 | 
  
    |  | describe
 | 
  
    |  | summ
 | 
  
    |  | sort hidp pno apno
 | 
  
    |  | li in 1/10, sepby(hidp)
 | 
  
    |  | // or you could browse interactively
 | 
  
    |  | fre relationship_bh lwstat nwstat
 | 
  
    |  | ta relationship_bh esex
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | // Q: Which variable or variables uniquely identify each row? 
 | 
  
    |  | duplicates report pidp
 | 
  
    |  | duplicates report pidp apidp
 | 
  
    |  | 
 | 
  
    |  | duplicates report hidp pno
 | 
  
    |  | duplicates report hidp pno apno
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | // Q: How many men and women are living with their husband/wife, 
 | 
  
    |  | // partner/cohabitee or civil partner in this wave? 
 | 
  
    |  | count if esex==1 & inlist(relationship_bh,1,2,3)
 | 
  
    |  | count if esex==2 & inlist(relationship_bh,1,2,3)
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | // Q: How many are living in same sex partnerships?
 | 
  
    |  | ta asex esex if inlist(relationship_bh,1,2,3)
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | // Q: How many OSM children were born between last wave and this? 
 | 
  
    |  | // How can you identify the new entrants in the dataset?
 | 
  
    |  | fre lwstat
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | // Digression
 | 
  
    |  | // Note the w_egoalt is a file which shows the relationship between each 
 | 
  
    |  | // household member with every other household member. So, single person 
 | 
  
    |  | // households are excluded from w_egoalt. To verify this will compute the 
 | 
  
    |  | // household size from b_indall and merge it with this dataset and then compare 
 | 
  
    |  | // household size for matched and unmatched cases. 
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | ** CREATE UNIQUE IDENTIFIERS FOR SPECIFIC FAMILY MEMBERS 
 | 
  
    |  | ************************************************************
 | 
  
    |  | 
 | 
  
    |  | // Create a variable that records the unique cross-wave identifier of the EGO's 
 | 
  
    |  | // husband/wife or partner/cohabitee and compare that with the identifiers 
 | 
  
    |  | // provided with the data. 
 | 
  
    |  | 
 | 
  
    |  | // keep only those obsevations where the EGO is the husband/wife or 
 | 
  
    |  | // partner/cohabitee of the ALTER.
 | 
  
    |  | * keep if inlist(relationship_bh,1,2,3)
 | 
  
    |  | 
 | 
  
    |  | // Are there any people with more than one partner?
 | 
  
    |  | * bys pidp: g num_partners=_N
 | 
  
    |  | * fre num_partners
 | 
  
    |  | 
 | 
  
    |  | // If there are such cases either these are data errors or actual cases of 
 | 
  
    |  | // multiple partners. As identifying multiple partners will requre different codes
 | 
  
    |  | // We will keep things simple and drop this case 
 | 
  
    |  | * drop if num_partners==2
 | 
  
    |  | * fre num_partners
 | 
  
    |  | 
 | 
  
    |  | // generate pidp of spouse or partner of EGO (this is teh same that ppid in INDALL file)
 | 
  
    |  | g long partner_pidp=apidp
 | 
  
    |  |  
 | 
  
    |  | // to compare with identifiers provided with the data merge with b_indall file
 | 
  
    |  | // You can take a look at the online documentation to find out which variable 
 | 
  
    |  | // represents identifier sof spouse or partner
 | 
  
    |  | // As you will find out the answer is w_ppid or w_hidp and w_ppno
 | 
  
    |  | * merge m:1 pidp using "$inpath/ukhls_w2/b_indall", keepusing(pidp b_ppid)
 | 
  
    |  | 
 | 
  
    |  | // Let us drop the unmatched cases.
 | 
  
    |  | * drop if _m==2
 | 
  
    |  | 
 | 
  
    |  | // check if partner pidp created here is the same as the one provided with the 
 | 
  
    |  | // data. The answer should be ZERO.
 | 
  
    |  | * count if partner_pidp != b_ppid
 | 
  
    |  | 
 | 
  
    |  | // Create unique cross-wave identifiers of the father, mother, grandfather and 
 | 
  
    |  | // grandmother of EGO.
 | 
  
    |  | * use temp_egoalt, clear
 | 
  
    |  | 
 | 
  
    |  | g long mother_pidp=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==2
 | 
  
    |  | g long father_pidp=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==1
 | 
  
    |  | g long grandmother_pidp=apidp if relationship_bh==20 & asex==2
 | 
  
    |  | g long grandfather_pidp=apidp if relationship_bh==20 & asex==1
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | * IDENTIFYING JOINERS, LEAVERS 
 | 
  
    |  | **********************************
 | 
  
    |  | 
 | 
  
    |  | generat JoinHH = 0
 | 
  
    |  | replace JoinHH = 1 if inlist(nwstat,2,3,4,5,6)
 | 
  
    |  | lab var JoinHH "Alter joined ego's HH this wave"
 | 
  
    |  | 
 | 
  
    |  | generat LeftHH = 0
 | 
  
    |  | replace LeftHH = 1 if inlist(nwstat,2)
 | 
  
    |  | lab var LeftHH "Alter left ego's HH next wave"
 | 
  
    |  | 
 | 
  
    |  | fre JoinHH LeftHH
 | 
  
    |  | 
 | 
  
    |  | ** Save data so far:
 | 
  
    |  | save  "$dataout\temp_egoalt", replace
 | 
  
    |  | 
 | 
  
    |  | **  A (WIDE FORMAT) DATA SET OF CO-RESIDENT CHILDREN INCLUDING THEIR 
 | 
  
    |  | ** IDENTIFIER, AGE AND SEX
 | 
  
    |  | ************************************************************************
 | 
  
    |  | use "$dataout\egoaltPanelSmall", clear
 | 
  
    |  | 
 | 
  
    |  | // Only keep children of EGO
 | 
  
    |  | keep if relationship_bh>=9 & relationship_bh<=12
 | 
  
    |  | count 
 | 
  
    |  | 
 | 
  
    |  | fre wave
 | 
  
    |  | // attach age variables of ALTER
 | 
  
    |  | keep pidp apidp relationship_bh esex asex
 | 
  
    |  | rename pidp xpidp // xpidp: ego identifier who are parents of these children
 | 
  
    |  | rename apidp pidp //pidp: this now are actually the id of children
 | 
  
    |  | merge m:m pidp using "$dataout\indallPanelSmallVar", keepusing(pidp age_indall wave)
 | 
  
    |  | drop if _m==2
 | 
  
    |  | drop _m
 | 
  
    |  | 
 | 
  
    |  | rename pidp kpidp
 | 
  
    |  | label var kpidp "cross-wave identifier of child"
 | 
  
    |  | 
 | 
  
    |  | rename age_indall kage
 | 
  
    |  | label var kage "child's age"
 | 
  
    |  | 
 | 
  
    |  | rename asex ksex
 | 
  
    |  | lab var ksex "child's sex"
 | 
  
    |  | 
 | 
  
    |  | rename xpidp pidp
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | // check the data
 | 
  
    |  | sort pidp kpidp
 | 
  
    |  | li pidp kpidp relationship_bh esex ksex kage in 1/10, sepby(pidp) noobs
 | 
  
    |  | 
 | 
  
    |  | // convert the data into a wide format file of EGO's children
 | 
  
    |  | bys pidp (kage): g k_id=_n
 | 
  
    |  | 
 | 
  
    |  | // Q: What is the maximum no. of children in a household?
 | 
  
    |  | fre k_id
 | 
  
    |  | 
 | 
  
    |  | reshape wide kpidp ksex kage relationship_bh, i(pidp) j(k_id)
 | 
  
    |  | 
 | 
  
    |  | duplicates report pidp
 | 
  
    |  | 
 | 
  
    |  | // How many individuals have their adult child living with them?
 | 
  
    |  | g adult_children_HH=0
 | 
  
    |  | forvalues i=1/11 {
 | 
  
    |  | 	replace adult_children_HH=1 if kage`i'>=18 & kage`i'<.
 | 
  
    |  | }
 | 
  
    |  | fre adult_children_HH
 | 
  
    |  | 
 | 
  
    |  | //Merge with previous:
 | 
  
    |  | 
 | 
  
    |  | merge m:m pidp using "$dataout\temp_egoalt"
 | 
  
    |  | 
 | 
  
    |  | drop _m
 | 
  
    |  | 
 | 
  
    |  | 
 | 
  
    |  | //
 | 
  
    |  | 
 | 
  
    |  | // Clean up: delete temporary files no longer needed
 | 
  
    |  | erase temp_egoalt.dta
 | 
  
    |  | 
 | 
  
    |  | // save dataset:
 | 
  
    |  | compress
 | 
  
    |  | save "$dataout\egoaltPanelSmallVar", replace 
 | 
  
    |  | log close
 | 
  
    |  | exit
 |