**Do-File to clean the EGOALT BHPS data

version 15.1
clear all
set more off
cap log close
cd "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"

global main_data "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Datasets\Understanding Societies (UKHLS)\UKDA-6614-stata\stata\stata13_se"
global dataout "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"
global results "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\(Preliminary) Results"
global graphs ""
global tables ""


global W "a b c d e f g h i j k l m n o p q r"                                   // List af all waves
global N " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18"
global nW : word count $W                                                       // Number of elements in the list above (In our case, number of waves)
 
set scheme s1mono                                                               // Scheme for graphs (black and white graphs)!!
clear all
capture log close      

********************************************************************************
* Creating the dataset: INDALL (BHPS)                                         *
********************************************************************************

* Here I get the information I need from INDALL: the data file that all the personal information on responding individuals as well as those who lives in same household in a given wave

foreach w in $W {                                                               // Foreach wave, I keep the dataset I need
	local n=strpos("abcdefghijklmnopqr", "`w'")                                          // Creates a correspondence between letter and number
	di `n'
	use "$main_data/bhps_w`n'/b`w'_egoalt.dta", clear
	rename b`w'_* *                                                                // Removes the prefix of the variable
	gen wave = `n'
	lab var wave "wave"	
save "$dataout\egoaltSmall`n'", replace	  
		}
		
use "$dataout\egoaltSmall1", clear                                             // I append all these small datasets
forvalues n=2/$nW {
append using "$dataout\egoaltSmall`n'"
}

// Compress the data to save space
describe
sort pidp wave
compress

// recode values from -1 to -9 to Stata system missing for all variables
quietly mvdecode _all, mv(-9/-1) 

save "$dataout\egoaltPanelSmall", replace

foreach n in $N {
erase "$dataout\egoaltSmall`n'.dta"
}

*******************************************************************
*  OTHER IMPORTANT VARIABLES:
*******************************************************************
use "$dataout\egoaltPanelSmall", clear

* Year
gen year= 1991 if wave==1
replace year= 1992 if wave==2
replace year= 1993 if wave==3
replace year= 1994 if wave==4
replace year= 1995 if wave==5
replace year= 1996 if wave==6
replace year= 1997 if wave==7
replace year= 1998 if wave==8
replace year= 1999 if wave==9
replace year= 2000 if wave==10
replace year= 2001 if wave==11
replace year= 2002 if wave==12
replace year= 2003 if wave==13
replace year= 2004 if wave==14
replace year= 2005 if wave==15
replace year= 2006 if wave==16
replace year= 2007 if wave==17
replace year= 2008 if wave==18

fre year


* GETTING TO KNOW THE EGOALT FILE
**************************************

// Take a look at the data by using different methods
describe
summ
sort hidp pno apno
li in 1/10, sepby(hidp)
// or you could browse interactively
fre relationship_bh lwstat nwstat
ta relationship_bh esex


// Q: Which variable or variables uniquely identify each row? 
duplicates report pidp
duplicates report pidp apidp

duplicates report hidp pno
duplicates report hidp pno apno


// Q: How many men and women are living with their husband/wife, 
// partner/cohabitee or civil partner in this wave? 
count if esex==1 & inlist(relationship_bh,1,2,3)
count if esex==2 & inlist(relationship_bh,1,2,3)


// Q: How many are living in same sex partnerships?
ta asex esex if inlist(relationship_bh,1,2,3)


// Q: How many OSM children were born between last wave and this? 
// How can you identify the new entrants in the dataset?
fre lwstat


// Digression
// Note the w_egoalt is a file which shows the relationship between each 
// household member with every other household member. So, single person 
// households are excluded from w_egoalt. To verify this will compute the 
// household size from b_indall and merge it with this dataset and then compare 
// household size for matched and unmatched cases. 


** CREATE UNIQUE IDENTIFIERS FOR SPECIFIC FAMILY MEMBERS 
************************************************************

// Create a variable that records the unique cross-wave identifier of the EGO's 
// husband/wife or partner/cohabitee and compare that with the identifiers 
// provided with the data. 

// keep only those obsevations where the EGO is the husband/wife or 
// partner/cohabitee of the ALTER.
* keep if inlist(relationship_bh,1,2,3)

// Are there any people with more than one partner?
* bys pidp: g num_partners=_N
* fre num_partners

// If there are such cases either these are data errors or actual cases of 
// multiple partners. As identifying multiple partners will requre different codes
// We will keep things simple and drop this case 
* drop if num_partners==2
* fre num_partners

// generate pidp of spouse or partner of EGO (this is teh same that ppid in INDALL file)
g long partner_pidp=apidp
 
// to compare with identifiers provided with the data merge with b_indall file
// You can take a look at the online documentation to find out which variable 
// represents identifier sof spouse or partner
// As you will find out the answer is w_ppid or w_hidp and w_ppno
* merge m:1 pidp using "$inpath/ukhls_w2/b_indall", keepusing(pidp b_ppid)

// Let us drop the unmatched cases.
* drop if _m==2

// check if partner pidp created here is the same as the one provided with the 
// data. The answer should be ZERO.
* count if partner_pidp != b_ppid

// Create unique cross-wave identifiers of the father, mother, grandfather and 
// grandmother of EGO.
* use temp_egoalt, clear

g long mother_pidp=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==2
g long father_pidp=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==1
g long grandmother_pidp=apidp if relationship_bh==20 & asex==2
g long grandfather_pidp=apidp if relationship_bh==20 & asex==1


* IDENTIFYING JOINERS, LEAVERS 
**********************************

generat JoinHH = 0
replace JoinHH = 1 if inlist(nwstat,2,3,4,5,6)
lab var JoinHH "Alter joined ego's HH this wave"

generat LeftHH = 0
replace LeftHH = 1 if inlist(nwstat,2)
lab var LeftHH "Alter left ego's HH next wave"

fre JoinHH LeftHH

** Save data so far:
save  "$dataout\temp_egoalt", replace

**  A (WIDE FORMAT) DATA SET OF CO-RESIDENT CHILDREN INCLUDING THEIR 
** IDENTIFIER, AGE AND SEX
************************************************************************
use "$dataout\egoaltPanelSmall", clear

// Only keep children of EGO
keep if relationship_bh>=9 & relationship_bh<=12
count 

fre wave
// attach age variables of ALTER
keep pidp apidp relationship_bh esex asex
rename pidp xpidp // xpidp: ego identifier who are parents of these children
rename apidp pidp //pidp: this now are actually the id of children
merge m:m pidp using "$dataout\indallPanelSmallVar", keepusing(pidp age_indall wave)
drop if _m==2
drop _m

rename pidp kpidp
label var kpidp "cross-wave identifier of child"

rename age_indall kage
label var kage "child's age"

rename asex ksex
lab var ksex "child's sex"

rename xpidp pidp


// check the data
sort pidp kpidp
li pidp kpidp relationship_bh esex ksex kage in 1/10, sepby(pidp) noobs

// convert the data into a wide format file of EGO's children
bys pidp (kage): g k_id=_n

// Q: What is the maximum no. of children in a household?
fre k_id

reshape wide kpidp ksex kage relationship_bh, i(pidp) j(k_id)

duplicates report pidp

// How many individuals have their adult child living with them?
g adult_children_HH=0
forvalues i=1/11 {
	replace adult_children_HH=1 if kage`i'>=18 & kage`i'<.
}
fre adult_children_HH

//Merge with previous:

merge m:m pidp using "$dataout\temp_egoalt"

drop _m


//

// Clean up: delete temporary files no longer needed
erase temp_egoalt.dta

// save dataset:
compress
save "$dataout\egoaltPanelSmallVar", replace 
log close
exit
