**Do-File to clean the EGOALT BHPS data version 15.1 clear all set more off cap log close cd "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis" global main_data "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Datasets\Understanding Societies (UKHLS)\UKDA-6614-stata\stata\stata13_se" global dataout "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis" global results "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\(Preliminary) Results" global graphs "" global tables "" global W "a b c d e f g h i j k l m n o p q r" // List af all waves global N " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18" global nW : word count $W // Number of elements in the list above (In our case, number of waves) set scheme s1mono // Scheme for graphs (black and white graphs)!! clear all capture log close ******************************************************************************** * Creating the dataset: INDALL (BHPS) * ******************************************************************************** * Here I get the information I need from INDALL: the data file that all the personal information on responding individuals as well as those who lives in same household in a given wave foreach w in $W { // Foreach wave, I keep the dataset I need local n=strpos("abcdefghijklmnopqr", "`w'") // Creates a correspondence between letter and number di `n' use "$main_data/bhps_w`n'/b`w'_egoalt.dta", clear rename b`w'_* * // Removes the prefix of the variable gen wave = `n' lab var wave "wave" save "$dataout\egoaltSmall`n'", replace } use "$dataout\egoaltSmall1", clear // I append all these small datasets forvalues n=2/$nW { append using "$dataout\egoaltSmall`n'" } // Compress the data to save space describe sort pidp wave compress // recode values from -1 to -9 to Stata system missing for all variables quietly mvdecode _all, mv(-9/-1) save "$dataout\egoaltPanelSmall", replace foreach n in $N { erase "$dataout\egoaltSmall`n'.dta" } ******************************************************************* * OTHER IMPORTANT VARIABLES: ******************************************************************* use "$dataout\egoaltPanelSmall", clear * Year gen year= 1991 if wave==1 replace year= 1992 if wave==2 replace year= 1993 if wave==3 replace year= 1994 if wave==4 replace year= 1995 if wave==5 replace year= 1996 if wave==6 replace year= 1997 if wave==7 replace year= 1998 if wave==8 replace year= 1999 if wave==9 replace year= 2000 if wave==10 replace year= 2001 if wave==11 replace year= 2002 if wave==12 replace year= 2003 if wave==13 replace year= 2004 if wave==14 replace year= 2005 if wave==15 replace year= 2006 if wave==16 replace year= 2007 if wave==17 replace year= 2008 if wave==18 fre year * GETTING TO KNOW THE EGOALT FILE ************************************** // Take a look at the data by using different methods describe summ sort hidp pno apno li in 1/10, sepby(hidp) // or you could browse interactively fre relationship_bh lwstat nwstat ta relationship_bh esex // Q: Which variable or variables uniquely identify each row? duplicates report pidp duplicates report pidp apidp duplicates report hidp pno duplicates report hidp pno apno // Q: How many men and women are living with their husband/wife, // partner/cohabitee or civil partner in this wave? count if esex==1 & inlist(relationship_bh,1,2,3) count if esex==2 & inlist(relationship_bh,1,2,3) // Q: How many are living in same sex partnerships? ta asex esex if inlist(relationship_bh,1,2,3) // Q: How many OSM children were born between last wave and this? // How can you identify the new entrants in the dataset? fre lwstat // Digression // Note the w_egoalt is a file which shows the relationship between each // household member with every other household member. So, single person // households are excluded from w_egoalt. To verify this will compute the // household size from b_indall and merge it with this dataset and then compare // household size for matched and unmatched cases. ** CREATE UNIQUE IDENTIFIERS FOR SPECIFIC FAMILY MEMBERS ************************************************************ // Create a variable that records the unique cross-wave identifier of the EGO's // husband/wife or partner/cohabitee and compare that with the identifiers // provided with the data. // keep only those obsevations where the EGO is the husband/wife or // partner/cohabitee of the ALTER. * keep if inlist(relationship_bh,1,2,3) // Are there any people with more than one partner? * bys pidp: g num_partners=_N * fre num_partners // If there are such cases either these are data errors or actual cases of // multiple partners. As identifying multiple partners will requre different codes // We will keep things simple and drop this case * drop if num_partners==2 * fre num_partners // generate pidp of spouse or partner of EGO (this is teh same that ppid in INDALL file) g long partner_pidp=apidp // to compare with identifiers provided with the data merge with b_indall file // You can take a look at the online documentation to find out which variable // represents identifier sof spouse or partner // As you will find out the answer is w_ppid or w_hidp and w_ppno * merge m:1 pidp using "$inpath/ukhls_w2/b_indall", keepusing(pidp b_ppid) // Let us drop the unmatched cases. * drop if _m==2 // check if partner pidp created here is the same as the one provided with the // data. The answer should be ZERO. * count if partner_pidp != b_ppid // Create unique cross-wave identifiers of the father, mother, grandfather and // grandmother of EGO. * use temp_egoalt, clear g long mother_pidp=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==2 g long father_pidp=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==1 g long grandmother_pidp=apidp if relationship_bh==20 & asex==2 g long grandfather_pidp=apidp if relationship_bh==20 & asex==1 * IDENTIFYING JOINERS, LEAVERS ********************************** generat JoinHH = 0 replace JoinHH = 1 if inlist(nwstat,2,3,4,5,6) lab var JoinHH "Alter joined ego's HH this wave" generat LeftHH = 0 replace LeftHH = 1 if inlist(nwstat,2) lab var LeftHH "Alter left ego's HH next wave" fre JoinHH LeftHH ** Save data so far: save "$dataout\temp_egoalt", replace ** A (WIDE FORMAT) DATA SET OF CO-RESIDENT CHILDREN INCLUDING THEIR ** IDENTIFIER, AGE AND SEX ************************************************************************ use "$dataout\egoaltPanelSmall", clear // Only keep children of EGO keep if relationship_bh>=9 & relationship_bh<=12 count fre wave // attach age variables of ALTER keep pidp apidp relationship_bh esex asex rename pidp xpidp // xpidp: ego identifier who are parents of these children rename apidp pidp //pidp: this now are actually the id of children merge m:m pidp using "$dataout\indallPanelSmallVar", keepusing(pidp age_indall wave) drop if _m==2 drop _m rename pidp kpidp label var kpidp "cross-wave identifier of child" rename age_indall kage label var kage "child's age" rename asex ksex lab var ksex "child's sex" rename xpidp pidp // check the data sort pidp kpidp li pidp kpidp relationship_bh esex ksex kage in 1/10, sepby(pidp) noobs // convert the data into a wide format file of EGO's children bys pidp (kage): g k_id=_n // Q: What is the maximum no. of children in a household? fre k_id reshape wide kpidp ksex kage relationship_bh, i(pidp) j(k_id) duplicates report pidp // How many individuals have their adult child living with them? g adult_children_HH=0 forvalues i=1/11 { replace adult_children_HH=1 if kage`i'>=18 & kage`i'<. } fre adult_children_HH //Merge with previous: merge m:m pidp using "$dataout\temp_egoalt" drop _m // // Clean up: delete temporary files no longer needed erase temp_egoalt.dta // save dataset: compress save "$dataout\egoaltPanelSmallVar", replace log close exit