********************************************************************************
********************************************************************************
* Date: November 2025
** Do: Code to reproduce Chapter 4 of World Inequality Report 2026
*** Author: Ricardo Gómez-Carrera
	* 1. 
	* 2. Figure 4.2 We are working fewer hours and being more productive
	* 3. Figure 4.3. Women work more in all regions
	* 4. Figure 4.4. Female average incomes are smaller than males' everywhere
	* 5. Figure 4.5. Female labor income shares are very low almost everywhere
	* 6. Figure 4.6. The gender gap is wider considering domestic work
	* 7. Figure 4.7. The gender gap is larger when accounting for domestic labor hours
	* 8. Figure 4.8. Women are less likely than men to hold a job in the labor market
	* 9. Figure 4.9. Employed women earn less than employed men everywhere
	*10. Figure 4.10. The high school enrollment gender gap has decreased in the last 25 years
	*11. 

********************************************************************************
********************************************************************************

/*
For information on how to use the widcode and the World Inequality Database, visit:
	*1. https://wid.world/codes-dictionary/
	*2. https://github.com/world-inequality-database/wid-stata-tool
	*3. https://github.com/world-inequality-database/wid-r-tool
	*4. https://wid.world/document/distributional-national-accounts-dina-guidelines-2025-methods-and-concepts-used-in-the-world-inequality-database/
	
	For other data inputs visit:
	*1. https://github.com/world-inequality-database/wid-world/tree/master/data-input
	
*/


**# Figure 4.1. The gender gap is still large considering several dimensions
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
/*
Constructed using data from:
 -Figure 4.3 in this do-file
 -Figure 4.4 in this do-file
 -Figure 4.8 in this do-file
 -Figure 4.9 in this do-file 
 -Figure 4.10 in this do-file 
*/



**# Figure 4.2 We are working fewer hours and being more productive
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
/*
Data from Andreescu et al. (2025)
Figures 4 and 23 in Andreescu et al. (2025).

Paper
https://wid.world/www-site/uploads/2025/05/WorldInequalityLab_WP2025_08_Global-Labour-Hours-in-Paid-and-Unpaid-Work_Inequality-Productivity-and-Structural-Transformation1800-2100_Final.pdf

Replication package
https://wid.world/document/data-file-to-global-labour-hours-in-paid-and-unpaid-work-inequality-productivity-and-structural-transformation-1800-2100-appendix-to-wil-technical-note-2025-01/
*/



**# Figure 4.3. Women work more in all regions
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
/*
Data from Andreescu et al. (2025)
Table 3 in Andreescu et al. (2025).

Paper
https://wid.world/www-site/uploads/2025/05/WorldInequalityLab_WP2025_08_Global-Labour-Hours-in-Paid-and-Unpaid-Work_Inequality-Productivity-and-Structural-Transformation1800-2100_Final.pdf

Replication package
https://wid.world/document/data-file-to-global-labour-hours-in-paid-and-unpaid-work-inequality-productivity-and-structural-transformation-1800-2100-appendix-to-wil-technical-note-2025-01/
*/



**# Figure 4.4. Female average incomes are smaller than males' everywhere
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
clear 
insobs 1 
g use_widcode="$use_widcode"

if use_widcode=="YES"{
	wid, ind(spllin) age(992) pop(f)  p(p0p100) area(WO XN XF QE XB XL XR XS QL) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode,"spllin992f")
}
else{
use if inlist(year, 1990,1995,2000,2005,2010,2015,2020,$year_output) & inlist(widcode, "spllin992f") & p == "p0p100" & inlist(iso, "WO", "XN", "XF", "QE", "XB", "XL", "XR", "XS", "QL") using $wid2024, clear
}	

keep iso value year
reshape wide value, i(year) j(iso) string
renvars value*, predrop(5)

label var WO	"World"
label var QL 	"East Asia"
label var QE	  "Europe"
label var XL	 "Latin America"
label var XN	 "MENA"
label var XB 	"North America & Oceania"
label var XS 	"South & Southeast Asia"
label var XR	 "Russia & Central Asia"
label var XF "Sub-Saharan Africa"
order year QE XL XN XB XR XS XF QL WO
export excel using "$outputs/WIR_2026_Chapter4.xlsx", sheet("data-F4.4") sheetmod cell(A1) first(varl)
putexcel set "$outputs/WIR_2026_Chapter4.xlsx", modify sheet("data-F4.4")
putexcel (B2:J45), nformat(percent)


**# Figure 4.5. Female labor income shares are very low almost everywhere
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
clear 
insobs 1 
g use_widcode="$use_widcode"

if use_widcode=="YES"{
	wid, ind(spllin) age(992) pop(f)  p(p0p100) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode,"spllin992f")
}
else{
use if year ==$year_output & inlist(widcode, "spllin992f") & p == "p0p100" using $wid2024, clear
}	
replace iso ="XK" if iso=="KV" //kosovo is missing
g region=""
foreach reg in MENA RUCA SSAF SSEA EURO LATA NAOC EASA{
	foreach c in $`reg'{
		replace region="`reg'" if iso=="`c'"
	}	
}
keep if region!=""

gsort -value
keep iso value

ren iso iso_a2_eh
save "$work_data/flis.dta",replace


**# Figure 4.6. The gender gap is wider considering domestic work
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
/*
Data from Andreescu et al. (2025)
Figure 19 (updated) in Andreescu et al. (2025).

Revised values for the Gender Gap in Hourly Income for all regions using the decadal estimates for economic labor hours. 
Note that we still use the 2000-2025 estimate for domestic labor hours since we do not have enough data to estimate that statistic per decade.


Paper
https://wid.world/www-site/uploads/2025/05/WorldInequalityLab_WP2025_08_Global-Labour-Hours-in-Paid-and-Unpaid-Work_Inequality-Productivity-and-Structural-Transformation1800-2100_Final.pdf

Replication package
https://wid.world/document/data-file-to-global-labour-hours-in-paid-and-unpaid-work-inequality-productivity-and-structural-transformation-1800-2100-appendix-to-wil-technical-note-2025-01/
*/



**# Figure 4.7. The gender gap is larger when accounting for domestic labor hours
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
/*
Data from Andreescu et al. (2025)
Figures 20 and 21 (updated) in Andreescu et al. (2025).

Revised values for the Gender Gap in Hourly Income for all regions using the decadal estimates for economic labor hours. 
Note that we still use the 2000-2025 estimate for domestic labor hours since we do not have enough data to estimate that statistic per decade.


Paper
https://wid.world/www-site/uploads/2025/05/WorldInequalityLab_WP2025_08_Global-Labour-Hours-in-Paid-and-Unpaid-Work_Inequality-Productivity-and-Structural-Transformation1800-2100_Final.pdf

Replication package
https://wid.world/document/data-file-to-global-labour-hours-in-paid-and-unpaid-work-inequality-productivity-and-structural-transformation-1800-2100-appendix-to-wil-technical-note-2025-01/
*/



**# Figures 4.8. and 4.9. 
* ---------------------------------------------------------------------------- *
* Data provided by Valentina Gabrielli (Flis_Ratios_june2025.csv)
* Note: https://wid.world/document/2024-update-for-female-labor-income-share/
* ---------------------------------------------------------------------------- *
*Populations (for weights)
use if inrange(year, 1990,$year_output) & inlist(widcode, "npopul999i") using $wid2024, clear
drop currency  p widcode
tempfile population
ren value npopul999i
save `population',replace

*Data provided by Valentina Gabrielli
import delimited using "$raw/Flis_Ratios_june2025.csv",  clear
replace gearningsratio="" if gearningsratio=="NA"
 
*Destring 
replace gtotalempratio="" if gtotalempratio=="NA"
destring gearningsratio gtotalempratio, replace

keep gtotalempratio iso year
ren gtotalempratio value
tempfile original
save `original',replace 

*Forecast

		keep if inrange(year, 2013, 2023)
		sort iso  year

		* Compute growth rates
		gen growth = .
		bys iso  (year): replace growth = (value - value[_n-1]) / value[_n-1] * 100

		* Compute average growth for 2024 projection (previous 10 years: 2014-2023)
		egen avg_growth_2024 = mean(growth) if inrange(year, 2014, 2023), by(iso )

		* Create 2024 observations
		gen year_2024 = 2024
		expand 2 if year==2023
		bys iso  (year): replace year = year_2024 if year==2023 & _n==_N

		sort iso  year
		bys iso  (year): gen value_2024 = value[_n-1] * (1 + avg_growth_2024/100)
		replace value = value_2024 if year==2024
		drop value_2024 growth avg_growth_2024 year_2024

	
keep if inlist(year,2024,2024)


append using `original'	


ren value gtotalempratio 
merge 1:1 iso year using `population'
keep if _merge==3
drop _merge


*Aggregate at the regional level
gen region2=""
	foreach c of global EURO {
		replace region2="QE" if iso=="`c'"
		}
	foreach c of global NAOC {
		replace region2="XB" if iso=="`c'"
		}
	foreach c of global LATA {
		replace region2="XL" if iso=="`c'"
		}
	foreach c of global MENA {
		replace region2="XN" if iso=="`c'"
		}
	foreach c of global SSAF {
		replace region2="XF" if iso=="`c'"
		}
	foreach c of global RUCA {
		replace region2="XR" if iso=="`c'"
		}
	foreach c of global EASA {
		replace region2="QL" if iso=="`c'"
		}	
	foreach c of global SSEA {
		replace region2="XS" if iso=="`c'"
		}		
		
replace region2="QE" if inlist(iso, "QM","OC")
replace region2="XB" if inlist(iso, "OH") 		
replace region2="XL" if inlist(iso, "OD")
replace region2="XN" if inlist(iso, "OE")
replace region2="XF" if inlist(iso, "OJ")
replace region2="XR" if inlist(iso, "OA")
replace region2="QL" if inlist(iso, "OB")
replace region2="XS" if inlist(iso, "OI") 		
	ren region2 region
drop if iso=="CK"	

*World
preserve
collapse (mean)	gtotalempratio [w=npopul999i],by(year ) 
g region ="WO"
tempfile world
save `world',replace
restore

collapse (mean)	gtotalempratio [w=npopul999i],by(year region) 

append using 	`world'		

ren (gtotalempratio region ) (value iso)
keep if  inlist(iso, "WO", "XN", "XF", "QE", "XB", "XL", "XR", "XS", "QL")

keep iso value year
greshape wide value, i(year) j(iso) string
renvars value*, predrop(5)

label var WO	"World"
label var QL 	"East Asia"
label var QE	  "Europe"
label var XL	 "Latin America"
label var XN	 "MENA"
label var XB 	"North America & Oceania"
label var XS 	"South & Southeast Asia"
label var XR	 "Russia & Central Asia"
label var XF "Sub-Saharan Africa"
order year QE XL XN XB XR XS XF QL WO
keep if inlist(year, 1990,1995,2000,2005,2010,2015,2020,$year_output)
export excel using "$outputs/WIR_2026_Chapter4.xlsx", sheet("data-F4.4") sheetmod cell(A1) first(varl)
putexcel set "$outputs/WIR_2026_Chapter4.xlsx", modify sheet("data-F4.4")
putexcel (B2:J45), nformat(percent)




**# Figure 4.10. The high school enrollment gender gap has decreased in the last 25 years
* ---------------------------------------------------------------------------- *
*  Unesco enrollment: https://databrowser.uis.unesco.org/browser           
* ---------------------------------------------------------------------------- *

import delimited "$work_data/unesco_enrollment.csv", clear
// tab geo
keep if  ///
inlist(geounit,"WB: High income (July 2024)","WB: Low & middle income (July 2024)","WB: Low income (July 2024)") | ///
inlist(geounit,"WB: Lower middle income (July 2024)","WB: Middle income (July 2024)", /// 
"WB: Upper middle income (July 2024)")  | inlist(geounit,"WB: World")
misstable summarize


replace indicatorid="F" if indicatorid=="NERT.3.F.CP"
replace indicatorid="M" if indicatorid=="NERT.3.M.CP"
ren indicatorid gender

keep gender geounit year value
g region=""
replace region="HIC" if geounit=="WB: High income (July 2024)"
replace region="LMIC" if geounit=="WB: Low & middle income (July 2024)"
replace region="LIC" if geounit=="WB: Low income (July 2024)"
replace region="LowerMIC" if geounit=="WB: Lower middle income (July 2024)"
replace region="MIC" if geounit=="WB: Middle income (July 2024)"
replace region="UpperMIC" if geounit=="WB: Upper middle income (July 2024)"
replace region="World" if geounit=="WB: World"
tab region, m
keep gender region year value
reshape wide value, i(year region) j(gender) string	
g ratio=valueF/valueM
drop valueF valueM
reshape wide ratio, i(year ) j(region) string	
renvars ratio*, predrop(5)
insobs 1
replace year=2024 if year==.

sort year

foreach var in HIC /*LIC*/ LMIC LowerMIC MIC UpperMIC World{
	forv x=2000/2024{
		replace `var'       = (`var'[_n-1] + `var'[_n-2]) / 2       if year == `x' & `var'==.
	}
}

	label var HIC "High income"
	label var LIC "Low income"
	label var LMIC "Low & middle income"
	label var LowerMIC "Lower middle income" 
	label var MIC "Middle income"
	label var UpperMIC "Upper middle income" 
	label var World "World"
	

export excel using "$outputs//WIR_2026_Chapter4..xlsx", sheet("data-F4.10") sheetmod cell(A1) first(varl)




**# Figure 4.11. Education alone cannot fully close the gap
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
/*
Data from Gethin (2024).
Figure 10 – Returns to Schooling by Gender and World Regio in Gethin (2024).

Paper
https://amory-gethin.fr/files/pdf/Gethin2024.pdf

* Data provided by Amory Gethin
*/

