# Access to Cooling in NCR

This notebook explores the census dataset to understand access to cooling in different provinces of NCR

## Data

The data for this was obtained from the census dataset provided by the Filipino govt. The locations of the latest barangays are taken from an open repository on [GitHub](https://github.com/altcoder/philippines-psgc-shapefiles?tab=readme-ov-file). 

## Description of the Census Data
There are 17 provinces, 30 municipalities and 1299 barangays in the NCR region. There are 13.4 million responses in the NCR file of the Census which is roughly the population of the region. In the Form 3 responses, which captures 10% of the representative population, there are 2.5 million responses. The access to cooling data is obatined from the representative sample. 

In [4]:
import pandas as pd
import pyreadstat
from matplotlib import pyplot as plt
import geopandas

Reading shapefiles of Manila

In [5]:
PHILIPPINES  = geopandas.read_file('../../data/shapefiles/philippines/phl_adminboundaries_candidate_exclude_adm3/phl_admbnda_adm2_psa_namria_20200529.shp')
MANILA = PHILIPPINES[PHILIPPINES['ADM2_EN'].isin(['NCR, City of Manila, First District', 'NCR, Second District', 'NCR, Third District', 'NCR, Fourth District'])]
#PHILIPPINES_ADM3 = geopandas.read_file('../../data/shapefiles/philippines/phl_adminboundaries_candidate_adm3/phl_admbnda_adm3_psa_namria_20200529.shp')

In [154]:
PHILIPPINES_BARANGAY = geopandas.read_file('../../data/shapefiles/philippines/phl_adm4_barangay/PH_Adm4_BgySubMuns.shp.shp')
PHILIPPINES_ADM4 = geopandas.read_file('../../data/shapefiles/philippines/phl_adminboundaries_candidate_exclude_adm3/phl_admbndl_admALL_psa_namria_itos_20200529.shp')
MANILA_BARANGAY = PHILIPPINES_BARANGAY.to_crs('EPSG:4326').sjoin(MANILA)

In [6]:
PHILIPPINES_MUNICIPALITY = geopandas.read_file('../../data/shapefiles/philippines/phl_adm3_municipality/PH_Adm3_MuniCities.shp.shp')
MANILA_MUNICIPALITY = PHILIPPINES_MUNICIPALITY.to_crs('EPSG:4326').sjoin(MANILA)

In [31]:
MANILA_BARANGAY.to_file('../../data/shapefiles/philippines/manila_barangay.shp', format = 'ESRI Shapefile')

  MANILA_BARANGAY.to_file('../../data/shapefiles/philippines/manila_barangay.shp', format = 'ESRI Shapefile')


In [7]:
form3 , meta = pyreadstat.read_dta('../../data/census/ncr_cph2020_form3.dta')
#form2 , meta = pyreadstat.read_dta('../../data/census/ncr_cph2020_form2.dta')

Read Census data

In [5]:
building_type_questions = ['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8']
education = ['p16']
pii = ['reg', 'prv', 'prv_name', 'mun', 'mun_name', 'bgy', 'bgy_name',
       'psgc_prv', 'psgc_mun', 'psgc_bgy', 'urb', 'husn', 'hsn']

occupation = ['p21']

In [167]:
MANILA_BARANGAY.rename(columns = {'adm4_psgc':'psgc_bgy', 'adm3_psgc': 'psgc_mun'}, inplace=True)

In [8]:
PHILIPPINES_MUNICIPALITY.rename(columns = {'adm4_psgc':'psgc_bgy', 'adm3_psgc': 'psgc_mun'}, inplace=True)

In [38]:
air_conditioning = form3.groupby(['prv_name', 'h15e']).size().unstack(fill_value=0).reset_index()
fan_cooling_equipment = form3.groupby(['prv_name', 'h15f']).size().unstack(fill_value=0).reset_index()

In [10]:
import numpy as np
air_conditioning['psgc_mun'] = air_conditioning['psgc_mun'].astype(np.int64)
fan_cooling_equipment['psgc_mun'] = fan_cooling_equipment['psgc_mun'].astype(np.int64)

In [39]:
air_conditioning.rename(columns = {1:'AC_Access', 2:'No_AC_Access', 9:'Unknown_AC'}, inplace=True)
fan_cooling_equipment.rename(columns = {1:'Cooling_Equipment_Access', 2:'No_Cooling_Equipment_Access', 9:'Unknown_CE'}, inplace=True)

In [184]:
form3['psgc_mun']=form3['psgc_mun'].astype(np.int64)

In [89]:
MANILA_MUNICIPALITY.to_file('../../data/shapefiles/philippines/manila_municipality.shp', file_format = 'ESRI Shapefile')

In [49]:
df = air_conditioning.merge(fan_cooling_equipment)#.to_csv('../../data/census/access_to_cooling.csv')

In [42]:
air_conditioning['total_AC'] = air_conditioning['AC_Access'] + air_conditioning['No_AC_Access'] + air_conditioning['Unknown_AC']
air_conditioning['% without AC Access'] = 100*air_conditioning['No_AC_Access']/air_conditioning['total_AC']
air_conditioning['% with AC Access'] = 100*air_conditioning['AC_Access']/air_conditioning['total_AC']

In [48]:
fan_cooling_equipment['total_CE'] = fan_cooling_equipment['Cooling_Equipment_Access'] + fan_cooling_equipment['No_Cooling_Equipment_Access'] + fan_cooling_equipment['Unknown_CE']
fan_cooling_equipment['% without CE Access'] = 100*fan_cooling_equipment['No_Cooling_Equipment_Access']/fan_cooling_equipment['total_CE']
fan_cooling_equipment['% with CE Access'] = 100*fan_cooling_equipment['Cooling_Equipment_Access']/fan_cooling_equipment['total_CE']

In [72]:
df['prv_name']= df['prv_name'].str.title()

In [91]:
from bokeh.io import  show
from bokeh.plotting import figure
from bokeh.palettes import Category10
from bokeh.transform import factor_cmap
from bokeh.plotting import figure,  show, output_notebook
import pandas as pd

output_notebook()

# Define the provinces and stack categories
provinces = df['prv_name'].tolist()
stack_categories = ['AC_Access', 'No_AC_Access',
       'Unknown_AC']

# Define the colors for each stack category
colors = Category10[len(stack_categories)]

# Create a figure
p = figure(x_range=provinces, title="% Respondents with Access to Air Conditioning by Province", tools="",  width=1000, height=500, toolbar_location='above')

# Stack the bars
p.vbar(x='prv_name', top='% with AC Access', width=0.5,  source=df)

# Configure legend
#p.legend.location = "top_right"

# Add labels
p.xaxis.axis_label = "Provinces"
p.yaxis.axis_label = "% Respondents"

p.xaxis.major_label_orientation = 45
# Set font style for x-axis labels
p.xaxis.major_label_text_font_size = "10pt"
p.xaxis.axis_label_text_font_style = "normal"

p.yaxis.axis_label_text_font_style = "normal"

p.yaxis.axis_label_text_font_style = "bold"
p.xaxis.axis_label_text_font_style = "bold"

p.title.text_font_size = "18pt"


# Set y-axis range from 0 to 100
p.y_range.start = 0
p.y_range.end = 100

# Show the plot
show(p)


In [92]:
from bokeh.io import  show
from bokeh.plotting import figure
from bokeh.palettes import Category10
from bokeh.transform import factor_cmap
from bokeh.plotting import figure,  show, output_notebook
import pandas as pd

output_notebook()

# Define the provinces and stack categories
provinces = df['prv_name'].tolist()
stack_categories = ['AC_Access', 'No_AC_Access',
       'Unknown_AC']

# Define the colors for each stack category
colors = Category10[len(stack_categories)]

# Create a figure
p = figure(x_range=provinces, title="% Respondents with Access to Other Cooling Equipment by Province", tools="",  width=1000, height=500, toolbar_location='above')

# Stack the bars
p.vbar(x='prv_name', top='% with CE Access', width=0.5,  source=df)

# Configure legend
#p.legend.location = "top_right"

# Add labels
p.xaxis.axis_label = "Provinces"
p.yaxis.axis_label = "% Respondents"

p.xaxis.major_label_orientation = 45
# Set font style for x-axis labels
p.xaxis.major_label_text_font_size = "10pt"
p.xaxis.axis_label_text_font_style = "normal"

p.yaxis.axis_label_text_font_style = "normal"

p.yaxis.axis_label_text_font_style = "bold"
p.xaxis.axis_label_text_font_style = "bold"

p.title.text_font_size = "18pt"


# Set y-axis range from 0 to 100
p.y_range.start = 0
p.y_range.end = 100

# Show the plot
show(p)
