**Do-File to clean the EGOALT BHPS data

version 15.1
clear all
set more off
cap log close
cd "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"

global main_data "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Datasets\Understanding Societies (UKHLS)\UKDA-6614-stata\stata\stata13_se"
global dataout "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"
global results "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\(Preliminary) Results"
global graphs ""
global tables ""


global W "a b c d e f g h i j k l m n o p q r"                                   // List af all waves
global N " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18"
global nW : word count $W                                                       // Number of elements in the list above (In our case, number of waves)
 
set scheme s1mono                                                               // Scheme for graphs (black and white graphs)!!
clear all
capture log close      

********************************************************************************
* Creating the dataset:                                       *
********************************************************************************

* Creating child id, plus their age and sex variables across 18 Waves.

foreach w in $W {                                                               // Foreach wave, I keep the dataset I need
	local n=strpos("abcdefghijklmnopqr", "`w'")                                          // Creates a correspondence between letter and number
	di `n'
	use "$main_data/bhps_w`n'/b`w'_egoalt.dta", clear
	rename b`w'_* *                                                                // Removes the prefix of the variable
	gen wave = `n'
	lab var wave "wave"	
save "$dataout\egoaltSmall`n'", replace	  
		}

		
foreach w in $W {                                                               // Foreach wave, I keep the dataset I need
	local n=strpos("abcdefghijklmnopqr", "`w'")                                          // Creates a correspondence between letter and number
	di `n'
	use "$main_data/bhps_w`n'/b`w'_indall.dta", clear
	rename b`w'_* *                                                                // Removes the prefix of the variable
	gen wave = `n'
	rename age age_indall
	lab var wave "wave"	
save "$dataout\indallSmall`n'", replace	  
		}
		
forvalues i=1/18 {		
use "$dataout\egoaltSmall`i'", clear  		
keep if relationship_bh>=9 & relationship_bh<=12
count 		
keep pidp apidp relationship_bh esex asex
rename pidp xpidp // xpidp: ego identifier who are parents of these children
rename apidp pidp //pidp: this now are actually the id of children
merge m:1 pidp using "$dataout\indallSmall`i'.dta", keepusing(pidp age_indall wave birthy)
drop if _m==2
drop _m
rename pidp kpidp
label var kpidp "cross-wave identifier of child"
rename age_indall kage
label var kage "child's age"
rename asex ksex
lab var ksex "child's sex"
rename xpidp pidp	
rename relationship_bh krelationship_bh
label var krelationship_bh "Relationship of ego with the children"
rename birthy kbirthy
lab var kbirthy "child's year of birth"
bys pidp (kage): g k_id=_n
save "$dataout\childrenSmall`i'", replace	
	}
	
use "$dataout\childrenSmall1", clear                                             // I append all these small datasets
forvalues n=2/$nW {
append using "$dataout\childrenSmall`n'"
}

// Compress the data to save space
describe
sort pidp wave
compress

// recode values from -1 to -9 to Stata system missing for all variables
quietly mvdecode _all, mv(-9/-1) 

//Save data for use:
save "$dataout\childrenPanelSmall", replace

*******************************************************
// Create mother and father id:

forvalues i=1/18 {		
use "$dataout\egoaltSmall`i'", clear  			
keep pidp apidp relationship_bh esex asex
g long mnpid_bh=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==2
g long fnpid_bh=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==1
g long grandmother_pidp=apidp if relationship_bh==20 & asex==2
g long grandfather_pidp=apidp if relationship_bh==20 & asex==1
label var mnpid_bh "cross-wave identifier of mother"
label var fnpid_bh "cross-wave identifier of father"
label var grandmother_pidp "cross-wave identifier of grandmother"
label var grandfather_pidp "cross-wave identifier of grandfather"
gen wave = `i'
label var wave "wave"
save "$dataout\parentSmall`i'", replace	
	}
	
use "$dataout\parentSmall1", clear                                             // I append all these small datasets
forvalues n=2/$nW {
append using "$dataout\parentSmall`n'"
}

// Compress the data to save space
describe
sort pidp wave
compress
// recode values from -1 to -9 to Stata system missing for all variables
quietly mvdecode _all, mv(-9/-1) 
//Save data for use:
save "$dataout\parentPanelSmall", replace

**** MERGE CHILD, PARENT AND EGOALT DATASETS

use "$dataout\egoaltPanelSmallVar", clear

//merge parents id file:
merge m:m pidp using "$dataout\parentPanelSmall"
drop _m

// Merge children id file:
merge m:m pidp using "$dataout\childrenPanelSmall"
drop _m

* Year
gen year= 1991 if wave==1
replace year= 1992 if wave==2
replace year= 1993 if wave==3
replace year= 1994 if wave==4
replace year= 1995 if wave==5
replace year= 1996 if wave==6
replace year= 1997 if wave==7
replace year= 1998 if wave==8
replace year= 1999 if wave==9
replace year= 2000 if wave==10
replace year= 2001 if wave==11
replace year= 2002 if wave==12
replace year= 2003 if wave==13
replace year= 2004 if wave==14
replace year= 2005 if wave==15
replace year= 2006 if wave==16
replace year= 2007 if wave==17
replace year= 2008 if wave==18

fre year

//Save data for use:
sa  "$dataout\egoaltPanelSmallVarID", replace


**** PREPARE DE DATA TO CREATE PARENTS VARIABLES:

//Save data for use:
sa  "$dataout\egoaltPanelSmallVarID", replace


// Create a dta for each wave:

forvalues i=1/18 {		
use "$dataout\egoaltPanelSmallVarID", clear 
keep if wave==`i'
sa "$dataout\egoaltPanelSmallVarID`i'", replace
}

// Trying with just this dataset, i.e. without creating children id (kpidp)
forvalues i=1/18 {		
use "$dataout\parentPanelSmall", clear 
keep if wave==`i'
sa "$dataout\parentPanelSmall`i'", replace
}

// Trying with just this dataset, i.e. without creating children id (kpidp)
forvalues i=1/18 {		
use "$dataout\childrenPanelSmall", clear 
keep if wave==`i'
sa "$dataout\childrenPanelSmall`i'", replace
}


		
//Drop unuseful datasets:

foreach n in $N {
erase "$dataout\egoaltSmall`n'.dta"
}
foreach n in $N {
erase "$dataout\indallSmall`n'.dta"
}

foreach n in $N {
erase "$dataout\childrenSmall`n'.dta"
}
foreach n in $N {
erase "$dataout\parentSmall`n'.dta"
}

erase parentPanelSmall.dta
erase childrenPanelSmall.dta
