Project

General

Profile

Support #1498 » merge_childrenBHPS.do

Create the mother, father and child id - Mario Martinez-Jimenez, 02/08/2021 04:45 PM

 
**Do-File to clean the EGOALT BHPS data

version 15.1
clear all
set more off
cap log close
cd "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"

global main_data "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Datasets\Understanding Societies (UKHLS)\UKDA-6614-stata\stata\stata13_se"
global dataout "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\Data Analysis"
global results "D:\Mario files\box\PhD Health Research\Effects of parental unemployment on children health\(Preliminary) Results"
global graphs ""
global tables ""


global W "a b c d e f g h i j k l m n o p q r" // List af all waves
global N " 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18"
global nW : word count $W // Number of elements in the list above (In our case, number of waves)
set scheme s1mono // Scheme for graphs (black and white graphs)!!
clear all
capture log close

********************************************************************************
* Creating the dataset: *
********************************************************************************

* Creating child id, plus their age and sex variables across 18 Waves.

foreach w in $W { // Foreach wave, I keep the dataset I need
local n=strpos("abcdefghijklmnopqr", "`w'") // Creates a correspondence between letter and number
di `n'
use "$main_data/bhps_w`n'/b`w'_egoalt.dta", clear
rename b`w'_* * // Removes the prefix of the variable
gen wave = `n'
lab var wave "wave"
save "$dataout\egoaltSmall`n'", replace
}

foreach w in $W { // Foreach wave, I keep the dataset I need
local n=strpos("abcdefghijklmnopqr", "`w'") // Creates a correspondence between letter and number
di `n'
use "$main_data/bhps_w`n'/b`w'_indall.dta", clear
rename b`w'_* * // Removes the prefix of the variable
gen wave = `n'
rename age age_indall
lab var wave "wave"
save "$dataout\indallSmall`n'", replace
}
forvalues i=1/18 {
use "$dataout\egoaltSmall`i'", clear
keep if relationship_bh>=9 & relationship_bh<=12
count
keep pidp apidp relationship_bh esex asex
rename pidp xpidp // xpidp: ego identifier who are parents of these children
rename apidp pidp //pidp: this now are actually the id of children
merge m:1 pidp using "$dataout\indallSmall`i'.dta", keepusing(pidp age_indall wave birthy)
drop if _m==2
drop _m
rename pidp kpidp
label var kpidp "cross-wave identifier of child"
rename age_indall kage
label var kage "child's age"
rename asex ksex
lab var ksex "child's sex"
rename xpidp pidp
rename relationship_bh krelationship_bh
label var krelationship_bh "Relationship of ego with the children"
rename birthy kbirthy
lab var kbirthy "child's year of birth"
bys pidp (kage): g k_id=_n
save "$dataout\childrenSmall`i'", replace
}
use "$dataout\childrenSmall1", clear // I append all these small datasets
forvalues n=2/$nW {
append using "$dataout\childrenSmall`n'"
}

// Compress the data to save space
describe
sort pidp wave
compress

// recode values from -1 to -9 to Stata system missing for all variables
quietly mvdecode _all, mv(-9/-1)

//Save data for use:
save "$dataout\childrenPanelSmall", replace

*******************************************************
// Create mother and father id:

forvalues i=1/18 {
use "$dataout\egoaltSmall`i'", clear
keep pidp apidp relationship_bh esex asex
g long mnpid_bh=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==2
g long fnpid_bh=apidp if relationship_bh>=4 & relationship_bh<=7 & asex==1
g long grandmother_pidp=apidp if relationship_bh==20 & asex==2
g long grandfather_pidp=apidp if relationship_bh==20 & asex==1
label var mnpid_bh "cross-wave identifier of mother"
label var fnpid_bh "cross-wave identifier of father"
label var grandmother_pidp "cross-wave identifier of grandmother"
label var grandfather_pidp "cross-wave identifier of grandfather"
gen wave = `i'
label var wave "wave"
save "$dataout\parentSmall`i'", replace
}
use "$dataout\parentSmall1", clear // I append all these small datasets
forvalues n=2/$nW {
append using "$dataout\parentSmall`n'"
}

// Compress the data to save space
describe
sort pidp wave
compress
// recode values from -1 to -9 to Stata system missing for all variables
quietly mvdecode _all, mv(-9/-1)
//Save data for use:
save "$dataout\parentPanelSmall", replace

**** MERGE CHILD, PARENT AND EGOALT DATASETS

use "$dataout\egoaltPanelSmallVar", clear

//merge parents id file:
merge m:m pidp using "$dataout\parentPanelSmall"
drop _m

// Merge children id file:
merge m:m pidp using "$dataout\childrenPanelSmall"
drop _m

* Year
gen year= 1991 if wave==1
replace year= 1992 if wave==2
replace year= 1993 if wave==3
replace year= 1994 if wave==4
replace year= 1995 if wave==5
replace year= 1996 if wave==6
replace year= 1997 if wave==7
replace year= 1998 if wave==8
replace year= 1999 if wave==9
replace year= 2000 if wave==10
replace year= 2001 if wave==11
replace year= 2002 if wave==12
replace year= 2003 if wave==13
replace year= 2004 if wave==14
replace year= 2005 if wave==15
replace year= 2006 if wave==16
replace year= 2007 if wave==17
replace year= 2008 if wave==18

fre year

//Save data for use:
sa "$dataout\egoaltPanelSmallVarID", replace


**** PREPARE DE DATA TO CREATE PARENTS VARIABLES:

//Save data for use:
sa "$dataout\egoaltPanelSmallVarID", replace


// Create a dta for each wave:

forvalues i=1/18 {
use "$dataout\egoaltPanelSmallVarID", clear
keep if wave==`i'
sa "$dataout\egoaltPanelSmallVarID`i'", replace
}

// Trying with just this dataset, i.e. without creating children id (kpidp)
forvalues i=1/18 {
use "$dataout\parentPanelSmall", clear
keep if wave==`i'
sa "$dataout\parentPanelSmall`i'", replace
}

// Trying with just this dataset, i.e. without creating children id (kpidp)
forvalues i=1/18 {
use "$dataout\childrenPanelSmall", clear
keep if wave==`i'
sa "$dataout\childrenPanelSmall`i'", replace
}


//Drop unuseful datasets:

foreach n in $N {
erase "$dataout\egoaltSmall`n'.dta"
}
foreach n in $N {
erase "$dataout\indallSmall`n'.dta"
}

foreach n in $N {
erase "$dataout\childrenSmall`n'.dta"
}
foreach n in $N {
erase "$dataout\parentSmall`n'.dta"
}

erase parentPanelSmall.dta
erase childrenPanelSmall.dta
(3-3/5)