|
// CITIZENSHIP ANALYSIS
|
|
|
|
/* NOTE:
|
|
|
|
Respondents born in the UK are not asked the citizenship questions.
|
|
But as anyone born in 1983 whose neither parents were British citizen or settled in the UK were NOT automatically granted UK citizenship,
|
|
it is not possible to identify from this data whether such persons are UK citizens.
|
|
|
|
Across six waves of data (and 81540 respondents), the number of individuals born on or after 1983 in the UK with
|
|
both parents born outside the UK is 1590. It is not possible to identify whether these 2% are UK citizens.
|
|
Note, of these if at least one of their parents had acquired UK citizenship or had become permanent UK residents before their birth
|
|
(there are a few other such clauses), then they would have UK citizenship.
|
|
|
|
Also, foreign born respondents who are UK citizens are not asked again about their citizenship in subsequent waves.
|
|
|
|
*/
|
|
|
|
|
|
|
|
cd "N:\DATA\Understanding Society\indresp files\"
|
|
|
|
******************************************************************************************************
|
|
** 1st step: I merge h_indresp_protect + xwavedat_protect + citizenship variables from previous waves:
|
|
******************************************************************************************************
|
|
|
|
foreach let in a b c d e f g {
|
|
|
|
use "`let'_indresp_protect.dta", clear
|
|
sort pidp
|
|
keep pidp `let'_pno `let'_citzn1 `let'_citzn2 `let'_citzn3 `let'_citzn_code `let'_citzn2_code
|
|
save ctz_`let', replace
|
|
|
|
}
|
|
|
|
use "h_indresp_protect.dta", clear
|
|
keep pidp h_pno h_citzn1 h_citzn2 h_citzn3 h_citzn_code h_citzn2_code ///
|
|
h_indnsub_lw h_indbdub_lw h_indpxub_xw h_indinub_xw h_indscub_xw h_indpxui_xw h_indinui_xw h_indscui_xw h_ind5mus_xw h_strata h_psu ///
|
|
h_ind5mus_lw h_indbd91_lw h_indbdub_lw h_indin01_lw h_indin91_lw h_indinub_lw h_indinui_lw h_indinus_lw h_indns91_lw h_indnsub_lw h_indpxub_lw ///
|
|
h_indpxui_lw h_indpxus_lw h_indscub_lw h_indscui_lw h_indscus_lw h_xtra5minosm_dv
|
|
sort pidp
|
|
save ctz_h, replace
|
|
|
|
use "xwavedat_protect.dta"
|
|
sort pidp
|
|
merge 1:1 pidp using "ctz_a.dta"
|
|
sort pidp
|
|
drop _merge
|
|
merge 1:1 pidp using "ctz_b.dta"
|
|
sort pidp
|
|
drop _merge
|
|
merge 1:1 pidp using "ctz_c.dta"
|
|
sort pidp
|
|
drop _merge
|
|
merge 1:1 pidp using "ctz_d.dta"
|
|
sort pidp
|
|
drop _merge
|
|
merge 1:1 pidp using "ctz_e.dta"
|
|
sort pidp
|
|
drop _merge
|
|
merge 1:1 pidp using "ctz_f.dta"
|
|
sort pidp
|
|
drop _merge
|
|
merge 1:1 pidp using "ctz_g.dta"
|
|
sort pidp
|
|
drop _merge
|
|
merge 1:1 pidp using "ctz_h.dta"
|
|
sort pidp
|
|
save "citizen.dta", replace
|
|
|
|
* This is the name of my dataset ( h_indresp_protect + xwavedat_protect + citizenship variables from previous waves)
|
|
use "citizen.dta", replace
|
|
|
|
|
|
****************************************************************************************
|
|
** 2nd step: I generate a country of birth variable for respondents and their parents
|
|
****************************************************************************************
|
|
|
|
foreach cob in plbornc_all macob_all pacob_all {
|
|
gen `cob'_det=.
|
|
|
|
// UK born
|
|
replace `cob'_det=1 if `cob'==195 | `cob'==206 | ///
|
|
`cob'==1 | `cob'== 2 | `cob'==3 | `cob'==4
|
|
// When country of birth in plbornc_all = -9 (missing), it means that these people is born in the UK. Therefore:
|
|
replace plbornc_all_det=1 if bornuk_dv==1
|
|
// Ireland
|
|
replace `cob'_det=2 if `cob'==5
|
|
// EU-13
|
|
replace `cob'_det=3 if `cob'==6 | `cob'==7 | `cob'==8 | `cob'==9 | `cob'==353 | ///
|
|
`cob'==123 | `cob'==132 | `cob'==312 | `cob'==346 | `cob'==179 | `cob'==198 | `cob'==209 | `cob'==296 | `cob'==317
|
|
// EU-8 EU-2 EU Other
|
|
replace `cob'_det =4 if `cob'==10 | `cob'==11 | `cob'==147 | `cob'==318 | `cob'==332 | `cob'==333 | ///
|
|
`cob'==173 | `cob'==176 | `cob'==269 | `cob'==259 | `cob'==252 | `cob'==192 | `cob'==223 | `cob'==177
|
|
// Other Europe (non EU)
|
|
replace `cob'_det=5 if `cob'==12 | `cob'==347 | `cob'==365 | `cob'==108 | ///
|
|
`cob'==140 | `cob'==320 | `cob'==327 | `cob'==385 | `cob'==248 | `cob'==263 | `cob'==205 | `cob'==248 | ///
|
|
`cob'==278 | `cob'==225 | `cob'==281 | `cob'==131 | `cob'==279 | `cob'==118 | ///
|
|
`cob'==160 | `cob'==214 | `cob'==233 | `cob'==238
|
|
// MENA and Central Asia
|
|
replace `cob'_det=6 if `cob'==348 | `cob'==361 | `cob'==368 | `cob'==384 | `cob'==110 | ///
|
|
`cob'==314 | `cob'==325 | `cob'==127 | `cob'==188 | `cob'==301 | ///
|
|
`cob'==256 | `cob'==230 | `cob'==231 | `cob'==241 | `cob'==243 | `cob'==253 | `cob'==249 | ///
|
|
`cob'==283 | `cob'==234 | `cob'==101 | `cob'== 350 | `cob'== 184 | `cob'==104 | `cob'==102
|
|
// India
|
|
replace `cob'_det=7 if `cob'==18 | `cob'==242 // kashmir included here
|
|
// South Asia except India
|
|
replace `cob'_det=8 if `cob'==19 | `cob'==20 | `cob'==21 | `cob'==288
|
|
// East Asia & South East Asia
|
|
replace `cob'_det=9 if `cob'==17 | `cob'==349 | `cob'==352 | `cob'==377 | `cob'==331 | `cob'==146 | ///
|
|
`cob'==309 | `cob'==266 | `cob'==285 | `cob'==228 | `cob'==236 | `cob'==274 | `cob'==247 | `cob'==151 | `cob'==251
|
|
replace `cob'_det=9 if `cob'==17 // China hong kong
|
|
// Sub-Saharan Africa
|
|
replace `cob'_det=10 if `cob'==22 | `cob'==351 | `cob'==355 | `cob'==386 | `cob'==387 | ///
|
|
`cob'==23 | `cob'==24 | `cob'==25 | `cob'==103 | `cob'==105 | `cob'==106 | ///
|
|
`cob'==112 | `cob'==335 | `cob'==343 | `cob'==326 | `cob'==330 | ///
|
|
`cob'==149 | `cob'==152 | `cob'==154 | `cob'==321 | `cob'==168 | `cob'==178 | `cob'==191 | ///
|
|
`cob'==264 | `cob'==265 | `cob'==284 | `cob'==286 | `cob'==193 | `cob'== 202 | `cob'==203 | ///
|
|
`cob'==215 | `cob'==216 | `cob'==255 | `cob'==235 | `cob'==273 | `cob'==328 | `cob'==26 | `cob'==148 | ///
|
|
`cob'==141 | `cob'==159 | `cob'==323
|
|
// US, Canada, Australia, New Zealand
|
|
replace `cob'_det=11 if `cob'==13 | `cob'==14 | `cob'==15 | `cob'==16
|
|
// Other
|
|
replace `cob'_det=12 if `cob'==376 | `cob'==382 | `cob'==388 | `cob'==997 | `cob'==27 | ///
|
|
`cob'==113 | `cob'==114 | `cob'==116 | `cob'==126 | `cob'==129 | `cob'==137 | `cob'==359 | ///
|
|
`cob'==142 | `cob'==165 | `cob'==161 | `cob'==336 | `cob'==341 | `cob'==339 | ///
|
|
`cob'==338 | `cob'==337 | `cob'==135 | `cob'==145 | `cob'==183 | `cob'==182 | ///
|
|
`cob'==187 | `cob'==189 | `cob'==330 | `cob'==275 | `cob'==303 | `cob'==174 | `cob'==307 | ///
|
|
`cob'==197 | `cob'==211 | `cob'==213 | `cob'==217 | `cob'==282 | `cob'==305 | `cob'==334 | `cob'== 364 | ///
|
|
`cob'==133 | `cob'==344 | `cob'==291
|
|
}
|
|
|
|
rename plbornc_all_det cob_det
|
|
rename macob_all_det macob_det
|
|
rename pacob_all_det pacob_det
|
|
label define cob_det ///
|
|
1 "UK" ///
|
|
2 "Ireland" ///
|
|
3 "EU-13" ///
|
|
4 "EU-8, EU-2, EU Other" ///
|
|
5 "Other Europe" ///
|
|
6 "MENA & Central Asia" ///
|
|
7 "India" ///
|
|
8 "Pakistan & other South Asia" ///
|
|
9 "East & Southeast Asia" ///
|
|
10 "Sub-Saharan Africa" ///
|
|
11 "Australia, NZ Canada, US" ///
|
|
12 "Other countries", replace
|
|
lab val cob_det macob_det pacob_det cob_det
|
|
|
|
// There are still 661 respondents with no info in country of birth (bornuk_dv==-9)
|
|
// There are also 318 we know they are not UK born, but we don't know the country (bornuk_dv==2 & cob_det==.)
|
|
|
|
replace cob_det=.a if bornuk_dv==-9
|
|
replace cob_det=.b if bornuk_dv==2 & cob_det==.
|
|
labelmiss cob_det .a "No info on cob" .b "Foreign born but unknown country", modify
|
|
|
|
replace cob_det=2 if cob_det==. & plbornc==5
|
|
replace cob_det=3 if cob_det==. & (plbornc==6 | plbornc==7 | plbornc==8 | plbornc==9)
|
|
replace cob_det=4 if cob_det==. & plbornc==10 | plbornc==11
|
|
replace cob_det=5 if cob_det==. & plbornc==12
|
|
replace cob_det=7 if cob_det==. & plbornc==18
|
|
replace cob_det=8 if cob_det==. & (plbornc==19 | plbornc==20 | plbornc==21)
|
|
replace cob_det=9 if cob_det==. & plbornc==17
|
|
replace cob_det=10 if cob_det==. & (plbornc>=22 & plbornc<=26)
|
|
replace cob_det=11 if cob_det==. & (plbornc==13 | plbornc==14 | plbornc==15 | plbornc==16)
|
|
replace cob_det=12 if cob_det==. & plbornc==27
|
|
|
|
replace macob_det =.a if macob_det== -9
|
|
replace macob_det =.c if macob_det== -20
|
|
labelmiss macob_det .a "No info" .c "No info from BHPS", modify
|
|
|
|
replace pacob_det =.a if pacob_det== -9
|
|
replace pacob_det =.c if pacob_det== -20
|
|
labelmiss pacob_det .a "No info" .c "No info from BHPS", modify
|
|
|
|
recode cob_det (1=1 "UK born") (2 3 4=2 "EU born") (5/12=3 "Non-EU born"), gen(cobeu)
|
|
|
|
|
|
|
|
**************************************************************************************
|
|
* 3rd step: Generating a summary variable for respondents' citizenship(s) in each wave
|
|
**************************************************************************************
|
|
|
|
foreach x in a b c d e f g h {
|
|
|
|
gen `x'_citizenship=.
|
|
replace `x'_citizenship = 1 if cob_det==1 & `x'_pno!=. // Born in the UK and thus not asked about citizenship
|
|
replace `x'_citizenship = 2 if `x'_citzn1==-7 // Proxy respondents
|
|
replace `x'_citizenship = 3 if `x'_citzn1==1 & `x'_citzn2==0 & `x'_citzn3==0 // Foreign born with UK citizenship ONLY
|
|
replace `x'_citizenship = 4 if `x'_citzn1==1 & (`x'_citzn2==1 | `x'_citzn3==1) // Foreign born with UK and Other citizenship
|
|
replace `x'_citizenship = 5 if `x'_citzn1==0 // Foreign born without UK citizenship
|
|
replace `x'_citizenship = 6 if `x'_citizenship==. & `x'_pno!=. & `x'_citzn1==-8 // Inapplicable
|
|
}
|
|
|
|
lab define citizenship ///
|
|
1 "Born in the UK" ///
|
|
2 "Proxy respondents" ///
|
|
3 "Foreign born UK citizens ONLY" ///
|
|
4 "Foreign born UK + Other citizenship" ///
|
|
5 "Foreign born Other citizenship ONLY" ///
|
|
6 "Inapplicable", replace
|
|
lab val *_citizenship citizenship
|
|
|
|
|
|
|
|
** Here I am just identifying respondents mentioning British citizenship (x_citizenshipUK), EU citizenship (x_citizenshipEU) and Non-EU citizenship (x_citizenshipNONEU):
|
|
|
|
foreach x in a b c d e f g h {
|
|
|
|
gen `x'_citizenshipUK =.
|
|
replace `x'_citizenshipUK=1 if `x'_citzn1==1
|
|
replace `x'_citizenshipUK=1 if `x'_citzn_code==6 | `x'_citzn2_code==6 | `x'_citzn_code==44 | `x'_citzn2_code==44
|
|
|
|
gen `x'_citizenshipEU = .
|
|
replace `x'_citizenshipEU=1 if (`x'_citzn2==1 & cobeu==2)
|
|
replace `x'_citizenshipEU=1 if (`x'_citzn3==1) & ((`x'_citzn_code>=1 & `x'_citzn_code<=5) | (`x'_citzn_code>=7 & `x'_citzn_code<=12) | `x'_citzn_code==30 | `x'_citzn_code==32 | `x'_citzn_code==38 | `x'_citzn_code==46 | (`x'_citzn_code>=53 & `x'_citzn_code<=68) | `x'_citzn_code==900 | `x'_citzn_code==901)
|
|
replace `x'_citizenshipEU=1 if (`x'_citzn3==1) & ((`x'_citzn2_code>=1 & `x'_citzn2_code<=5) | (`x'_citzn2_code>=7 & `x'_citzn2_code<=12) | `x'_citzn2_code==30 | `x'_citzn2_code==32 | `x'_citzn2_code==38 | `x'_citzn2_code==46 | (`x'_citzn2_code>=53 & `x'_citzn2_code<=68) | `x'_citzn2_code==900 | `x'_citzn2_code==901)
|
|
|
|
gen `x'_citizenshipNONEU = .
|
|
replace `x'_citizenshipNONEU=1 if (`x'_citzn2==1 & cobeu==3)
|
|
replace `x'_citizenshipNONEU=1 if (`x'_citzn3==1) & ((`x'_citzn_code>=14 & `x'_citzn_code<=28) | `x'_citzn_code==36 | `x'_citzn_code==37 | `x'_citzn_code==43 | `x'_citzn_code==45 | `x'_citzn_code==52 | (`x'_citzn_code>=70 & `x'_citzn_code<=890) | (`x'_citzn_code>=902 & `x'_citzn_code<=997 ))
|
|
replace `x'_citizenshipNONEU=1 if (`x'_citzn3==1) & ((`x'_citzn2_code>=14 & `x'_citzn2_code<=28) | `x'_citzn2_code==36 | `x'_citzn2_code==37 | `x'_citzn2_code==43 | `x'_citzn2_code==45 | `x'_citzn2_code==52 | (`x'_citzn2_code>=70 & `x'_citzn2_code<=890) | (`x'_citzn2_code>=902 & `x'_citzn2_code<=997 ))
|
|
|
|
|
|
** This is a summary variable that takes into account whether respondents mentioned a UK, EU or non-EU citizenship:
|
|
gen `x'_citizenship2=.
|
|
replace `x'_citizenship2=1 if `x'_citizenshipUK==1 & `x'_citizenshipEU ==. & `x'_citizenshipNONEU==. // UK only
|
|
replace `x'_citizenship2=2 if `x'_citizenshipUK==1 & `x'_citizenshipEU ==1 & `x'_citizenshipNONEU==. // UK and EU
|
|
replace `x'_citizenship2=3 if `x'_citizenshipUK==1 & `x'_citizenshipEU ==. & `x'_citizenshipNONEU==1 // UK and non-EU
|
|
replace `x'_citizenship2=4 if `x'_citizenshipUK==1 & `x'_citizenshipEU ==1 & `x'_citizenshipNONEU==1 // UK , EU and Non-EU
|
|
replace `x'_citizenship2=5 if `x'_citizenshipUK==. & `x'_citizenshipEU ==1 & `x'_citizenshipNONEU==. // EU only
|
|
replace `x'_citizenship2=6 if `x'_citizenshipUK==. & `x'_citizenshipEU ==. & `x'_citizenshipNONEU==1 // Non-EU only
|
|
replace `x'_citizenship2=7 if `x'_citizenshipUK==. & `x'_citizenshipEU ==1 & `x'_citizenshipNONEU==1 // EU and non-EU
|
|
}
|
|
|
|
lab define citizenship2 ///
|
|
1 "UK only" ///
|
|
2 "UK & EU" ///
|
|
3 "UK & Non-EU" ///
|
|
4 "UK, EU & Non-EU" ///
|
|
5 "EU only" ///
|
|
6 "Non-EU only" ///
|
|
7 "EU & non-EU", replace
|
|
lab val *_citizenship2 citizenship2
|
|
|
|
|
|
***********************************************************
|
|
* 4th step: Citizenship variable for wave h_ respondents:
|
|
***********************************************************
|
|
// I take h_citizenship as the reference because I am interested in wave h_ respondents.
|
|
// This new citizenship variable uses the values of wave h_ and wave a_ only for those respondents that are not asked the question in wave h_.
|
|
|
|
gen w_citizenship=h_citizenship
|
|
label values w_citizenship citizenship
|
|
replace w_citizenship=a_citizenship if h_citizenship==6
|
|
|
|
gen w_citizenship2= h_citizenship2 // I want to identify the nationalities, so I take into account the x_citizenship2 variable generated above
|
|
replace w_citizenship2=a_citizenship2 if w_citizenship!=. & w_citizenship2==.
|
|
replace w_citizenship2=8 if w_citizenship==1
|
|
replace w_citizenship2=9 if w_citizenship==2
|
|
lab define w_citizenship2 ///
|
|
1 "UK only" ///
|
|
2 "UK & EU" ///
|
|
3 "UK & Non-EU" ///
|
|
4 "UK, EU & Non-EU" ///
|
|
5 "EU only" ///
|
|
6 "Non-EU only" ///
|
|
7 "EU & non-EU" ///
|
|
8 "Born in the UK" ///
|
|
9 "Proxy respondents" , replace
|
|
lab values w_citizenship2 w_citizenship2
|
|
|
|
|
|
******************
|
|
** TABULATION ****
|
|
******************
|
|
|
|
// WEIGHTS --> I need to use longitudinal weights because I am using information from wave 1, not only of wave 7
|
|
svyset h_psu [pweight=h_indpxui_lw], strata(h_strata) singleunit(scaled) // when including proxy respondents
|
|
svyset h_psu [pweight=h_indinui_lw], strata(h_strata) singleunit(scaled) // when NOT including proxy respondents
|
|
|
|
|
|
svy: tab w_citizenship2 cobeu if w_citizenship2!=9, col // This shows an usually high share of EU born holding UK nationality only.
|
|
|
|
// I examine this wave by wave to understand what is going on:
|
|
|
|
foreach x in a b c d e f g h {
|
|
|
|
tab `x'_citizenship2 cobeu if `x'_citizenship2!=9, col
|
|
}
|
|
|