|
**Do-File to clean the EGOALT BHPS data
|
|
|
|
version 15.1
|
|
clear all
|
|
set more off
|
|
cap log close
|
|
cd "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"
|
|
|
|
global main_data "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Datasets\Understanding Societies (UKHLS)\UKDA-6614-stata\stata\stata13_se"
|
|
global dataout "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"
|
|
global results "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\(Preliminary) Results"
|
|
global graphs ""
|
|
global tables ""
|
|
|
|
|
|
global W "a b c d e f g h i j k l m n o p q r" // List af all waves
|
|
global N " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18"
|
|
global nW : word count $W // Number of elements in the list above (In our case, number of waves)
|
|
|
|
set scheme s1mono // Scheme for graphs (black and white graphs)!!
|
|
clear all
|
|
capture log close
|
|
|
|
********************************************************************************
|
|
* Creating the dataset: INDALL (BHPS) *
|
|
********************************************************************************
|
|
|
|
* Here I get the information I need from INDALL: the data file that all the personal information on responding individuals as well as those who lives in same household in a given wave
|
|
|
|
foreach w in $W { // Foreach wave, I keep the dataset I need
|
|
local n=strpos("abcdefghijklmnopqr", "`w'") // Creates a correspondence between letter and number
|
|
di `n'
|
|
use "$main_data/bhps_w`n'/b`w'_egoalt.dta", clear
|
|
rename b`w'_* * // Removes the prefix of the variable
|
|
gen wave = `n'
|
|
lab var wave "wave"
|
|
save "$dataout\egoaltSmall`n'", replace
|
|
}
|
|
|
|
use "$dataout\egoaltSmall1", clear // I append all these small datasets
|
|
forvalues n=2/$nW {
|
|
append using "$dataout\egoaltSmall`n'"
|
|
}
|
|
|
|
// Compress the data to save space
|
|
describe
|
|
sort pidp wave
|
|
compress
|
|
|
|
// recode values from -1 to -9 to Stata system missing for all variables
|
|
quietly mvdecode _all, mv(-9/-1)
|
|
|
|
save "$dataout\egoaltPanelSmall", replace
|
|
|
|
foreach n in $N {
|
|
erase "$dataout\egoaltSmall`n'.dta"
|
|
}
|
|
|
|
*******************************************************************
|
|
* OTHER IMPORTANT VARIABLES:
|
|
*******************************************************************
|
|
use "$dataout\egoaltPanelSmall", clear
|
|
|
|
* Year
|
|
gen year= 1991 if wave==1
|
|
replace year= 1992 if wave==2
|
|
replace year= 1993 if wave==3
|
|
replace year= 1994 if wave==4
|
|
replace year= 1995 if wave==5
|
|
replace year= 1996 if wave==6
|
|
replace year= 1997 if wave==7
|
|
replace year= 1998 if wave==8
|
|
replace year= 1999 if wave==9
|
|
replace year= 2000 if wave==10
|
|
replace year= 2001 if wave==11
|
|
replace year= 2002 if wave==12
|
|
replace year= 2003 if wave==13
|
|
replace year= 2004 if wave==14
|
|
replace year= 2005 if wave==15
|
|
replace year= 2006 if wave==16
|
|
replace year= 2007 if wave==17
|
|
replace year= 2008 if wave==18
|
|
|
|
fre year
|
|
|
|
|
|
* GETTING TO KNOW THE EGOALT FILE
|
|
**************************************
|
|
|
|
// Take a look at the data by using different methods
|
|
describe
|
|
summ
|
|
sort hidp pno apno
|
|
li in 1/10, sepby(hidp)
|
|
// or you could browse interactively
|
|
fre relationship_bh lwstat nwstat
|
|
ta relationship_bh esex
|
|
|
|
|
|
// Q: Which variable or variables uniquely identify each row?
|
|
duplicates report pidp
|
|
duplicates report pidp apidp
|
|
|
|
duplicates report hidp pno
|
|
duplicates report hidp pno apno
|
|
|
|
|
|
// Q: How many men and women are living with their husband/wife,
|
|
// partner/cohabitee or civil partner in this wave?
|
|
count if esex==1 & inlist(relationship_bh,1,2,3)
|
|
count if esex==2 & inlist(relationship_bh,1,2,3)
|
|
|
|
|
|
// Q: How many are living in same sex partnerships?
|
|
ta asex esex if inlist(relationship_bh,1,2,3)
|
|
|
|
|
|
// Q: How many OSM children were born between last wave and this?
|
|
// How can you identify the new entrants in the dataset?
|
|
fre lwstat
|
|
|
|
|
|
// Digression
|
|
// Note the w_egoalt is a file which shows the relationship between each
|
|
// household member with every other household member. So, single person
|
|
// households are excluded from w_egoalt. To verify this will compute the
|
|
// household size from b_indall and merge it with this dataset and then compare
|
|
// household size for matched and unmatched cases.
|
|
|
|
|
|
** CREATE UNIQUE IDENTIFIERS FOR SPECIFIC FAMILY MEMBERS
|
|
************************************************************
|
|
|
|
// Create a variable that records the unique cross-wave identifier of the EGO's
|
|
// husband/wife or partner/cohabitee and compare that with the identifiers
|
|
// provided with the data.
|
|
|
|
// keep only those obsevations where the EGO is the husband/wife or
|
|
// partner/cohabitee of the ALTER.
|
|
* keep if inlist(relationship_bh,1,2,3)
|
|
|
|
// Are there any people with more than one partner?
|
|
* bys pidp: g num_partners=_N
|
|
* fre num_partners
|
|
|
|
// If there are such cases either these are data errors or actual cases of
|
|
// multiple partners. As identifying multiple partners will requre different codes
|
|
// We will keep things simple and drop this case
|
|
* drop if num_partners==2
|
|
* fre num_partners
|
|
|
|
// generate pidp of spouse or partner of EGO (this is teh same that ppid in INDALL file)
|
|
g long partner_pidp=apidp
|
|
|
|
// to compare with identifiers provided with the data merge with b_indall file
|
|
// You can take a look at the online documentation to find out which variable
|
|
// represents identifier sof spouse or partner
|
|
// As you will find out the answer is w_ppid or w_hidp and w_ppno
|
|
* merge m:1 pidp using "$inpath/ukhls_w2/b_indall", keepusing(pidp b_ppid)
|
|
|
|
// Let us drop the unmatched cases.
|
|
* drop if _m==2
|
|
|
|
// check if partner pidp created here is the same as the one provided with the
|
|
// data. The answer should be ZERO.
|
|
* count if partner_pidp != b_ppid
|
|
|
|
// Create unique cross-wave identifiers of the father, mother, grandfather and
|
|
// grandmother of EGO.
|
|
* use temp_egoalt, clear
|
|
|
|
g long mother_pidp=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==2
|
|
g long father_pidp=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==1
|
|
g long grandmother_pidp=apidp if relationship_bh==20 & asex==2
|
|
g long grandfather_pidp=apidp if relationship_bh==20 & asex==1
|
|
|
|
|
|
* IDENTIFYING JOINERS, LEAVERS
|
|
**********************************
|
|
|
|
generat JoinHH = 0
|
|
replace JoinHH = 1 if inlist(nwstat,2,3,4,5,6)
|
|
lab var JoinHH "Alter joined ego's HH this wave"
|
|
|
|
generat LeftHH = 0
|
|
replace LeftHH = 1 if inlist(nwstat,2)
|
|
lab var LeftHH "Alter left ego's HH next wave"
|
|
|
|
fre JoinHH LeftHH
|
|
|
|
** Save data so far:
|
|
save "$dataout\temp_egoalt", replace
|
|
|
|
** A (WIDE FORMAT) DATA SET OF CO-RESIDENT CHILDREN INCLUDING THEIR
|
|
** IDENTIFIER, AGE AND SEX
|
|
************************************************************************
|
|
use "$dataout\egoaltPanelSmall", clear
|
|
|
|
// Only keep children of EGO
|
|
keep if relationship_bh>=9 & relationship_bh<=12
|
|
count
|
|
|
|
fre wave
|
|
// attach age variables of ALTER
|
|
keep pidp apidp relationship_bh esex asex
|
|
rename pidp xpidp // xpidp: ego identifier who are parents of these children
|
|
rename apidp pidp //pidp: this now are actually the id of children
|
|
merge m:m pidp using "$dataout\indallPanelSmallVar", keepusing(pidp age_indall wave)
|
|
drop if _m==2
|
|
drop _m
|
|
|
|
rename pidp kpidp
|
|
label var kpidp "cross-wave identifier of child"
|
|
|
|
rename age_indall kage
|
|
label var kage "child's age"
|
|
|
|
rename asex ksex
|
|
lab var ksex "child's sex"
|
|
|
|
rename xpidp pidp
|
|
|
|
|
|
// check the data
|
|
sort pidp kpidp
|
|
li pidp kpidp relationship_bh esex ksex kage in 1/10, sepby(pidp) noobs
|
|
|
|
// convert the data into a wide format file of EGO's children
|
|
bys pidp (kage): g k_id=_n
|
|
|
|
// Q: What is the maximum no. of children in a household?
|
|
fre k_id
|
|
|
|
reshape wide kpidp ksex kage relationship_bh, i(pidp) j(k_id)
|
|
|
|
duplicates report pidp
|
|
|
|
// How many individuals have their adult child living with them?
|
|
g adult_children_HH=0
|
|
forvalues i=1/11 {
|
|
replace adult_children_HH=1 if kage`i'>=18 & kage`i'<.
|
|
}
|
|
fre adult_children_HH
|
|
|
|
//Merge with previous:
|
|
|
|
merge m:m pidp using "$dataout\temp_egoalt"
|
|
|
|
drop _m
|
|
|
|
|
|
//
|
|
|
|
// Clean up: delete temporary files no longer needed
|
|
erase temp_egoalt.dta
|
|
|
|
// save dataset:
|
|
compress
|
|
save "$dataout\egoaltPanelSmallVar", replace
|
|
log close
|
|
exit
|