# Using Nighttime Lights, NO2 and EVI as proxies for GDP

In [3]:
import pandas as pd
import geopandas as gpd

In [4]:
ntl_admin0_annual = pd.read_csv("../../data/ntl/adm0_annual.csv")
ntl_admin1_annual = pd.read_csv("../../data/ntl/adm1_annual.csv")
ntl_admin2_annual = pd.read_csv("../../data/ntl/adm2_annual.csv")
ntl_admin3_annual = pd.read_csv("../../data/ntl/adm3_annual.csv")

ntl_admin0_monthly = pd.read_csv("../../data/ntl/adm0_monthly.csv")
ntl_admin1_monthly = pd.read_csv("../../data/ntl/adm1_monthly.csv")
ntl_admin2_monthly = pd.read_csv("../../data/ntl/adm2_monthly.csv")
ntl_admin3_monthly = pd.read_csv("../../data/ntl/adm3_monthly.csv")

In [19]:
syria_adm1_shp = gpd.read_file("../../data/boundaries/syr_admin1.shp")
syria_adm2_shp = gpd.read_file("../../data/boundaries/syr_admin2.shp")
syria_adm3_shp = gpd.read_file("../../data/boundaries/syr_admin3.shp")
syria_adm0_shp = gpd.read_file("../../data/boundaries/syr_admin0.shp")

In [17]:
PROJECT_ROOT = Path().cwd().parent.parent
DATA_PATH = PROJECT_ROOT / "data"
BOUNDARIES_PATH = DATA_PATH / "boundaries"
EVI_PATH = DATA_PATH / "evi"

In [13]:
from pathlib import Path


def preprocess_evi(evi_file: str | Path) -> pd.DataFrame:
    """Preprocess EVI CSV file."""
    evi_df = pd.read_csv(evi_file)

    metadata_cols = [
        col
        for col in evi_df.columns
        for word in ["PCODE", "_AR", "_EN", ".geo"]
        if word in col
    ]
    evi_df = (
        evi_df.rename(columns=lambda col: col[-14:] if col.endswith("_EVI") else col)
        .drop(columns=["system:index", "UPDATE_DAT"])
        .melt(
            id_vars=metadata_cols,
            var_name="band_date",
            value_name="EVI",
        )
        .assign(
            date=lambda df: pd.to_datetime(
                df["band_date"].str.extract(r"(\d{4}_\d{2}_\d{2})")[0],
                format="%Y_%m_%d",
            ),
        )
    )
    return evi_df

In [84]:
def filter_growing_season(df: pd.DataFrame) -> pd.DataFrame:
    """
    Filter dataframe for growing seasons only.
    Growing season is defined as February to June
    """
    start_year = df["date"].dt.year.min()
    end_year = df["date"].dt.year.max()

    seasons = []
    for year in range(start_year, end_year + 1):
        # February to June of this year
        season_start = pd.Timestamp(f"{year}-02-01")
        season_end = pd.Timestamp(f"{year}-06-30")

        season_data = df.query("date >= @season_start and date <= @season_end").assign(
            year=pd.to_datetime(f"{year}-01-01")
        )

        seasons.append(season_data)

    return pd.concat(seasons, ignore_index=True)

In [86]:
evi_adm0_df = (
    pd.concat(
        [
            preprocess_evi(
                EVI_PATH / "Admin level 0" / f"syria_adm0_evi_stats_{year}.csv"
            )
            for year in range(2010, 2026)
        ],
    )
    .drop(columns=[".geo"])
    .merge(syria_adm1_shp.filter(["PCODE", "geometry"]), on="PCODE", how="left")
    .sort_values(["date", "NAME_EN"])
    .reset_index(drop=True)
    .assign(
        month=lambda df: df["date"].dt.to_period("M").dt.to_timestamp(),
        year=lambda df: df["date"].dt.to_period("Y").dt.to_timestamp(),
    )
    .set_index("date")
)

evi_adm1_df = (
    pd.concat(
        [
            preprocess_evi(
                EVI_PATH / "Admin level 1" / f"syria_adm1_evi_stats_{year}.csv"
            )
            for year in range(2010, 2026)
        ],
    )
    .drop(columns=[".geo"])
    .merge(syria_adm1_shp.filter(["PCODE", "geometry"]), on="PCODE", how="left")
    .sort_values(["date", "NAME_EN"])
    .reset_index(drop=True)
    .assign(
        month=lambda df: df["date"].dt.to_period("M").dt.to_timestamp(),
        year=lambda df: df["date"].dt.to_period("Y").dt.to_timestamp(),
    )
)

evi_adm2_df = (
    pd.concat(
        [
            preprocess_evi(
                EVI_PATH / "Admin level 2" / f"syria_adm2_evi_stats_{year}.csv"
            )
            for year in range(2010, 2026)
        ],
    )
    .drop(columns=[".geo"])
    .merge(syria_adm2_shp.filter(["PCODE", "geometry"]), on="PCODE", how="left")
    .sort_values(["date", "NAME_EN"])
    .assign(
        month=lambda df: df["date"].dt.to_period("M").dt.to_timestamp(),
        year=lambda df: df["date"].dt.to_period("Y").dt.to_timestamp(),
    )
)

evi_adm2_median = (
    evi_adm2_df.pipe(filter_growing_season)
    .groupby(["year", "PCODE", "NAME_EN"], as_index=False)
    .agg(EVI=("EVI", "median"))
    .assign(EVI=lambda df: df["EVI"].round(3))
    .rename(columns={"NAME_EN": "Name"})
)

evi_adm3_df = (
    pd.concat(
        [
            preprocess_evi(
                EVI_PATH / "Admin level 3" / f"syria_adm3_evi_stats_{year}.csv"
            )
            for year in range(2010, 2026)
        ],
    )
    .drop(columns=[".geo"])
    .merge(syria_adm3_shp.filter(["PCODE", "geometry"]), on="PCODE", how="left")
    .sort_values(["date", "NAME_EN"])
    .assign(
        month=lambda df: df["date"].dt.to_period("M").dt.to_timestamp(),
        year=lambda df: df["date"].dt.to_period("Y").dt.to_timestamp(),
    )
)

evi_adm3_median = (
    evi_adm3_df.pipe(filter_growing_season)
    .groupby(["year", "PCODE", "NAME_EN"], as_index=False)
    .agg(EVI=("EVI", "median"))
    .assign(EVI=lambda df: df["EVI"].round(3))
    .rename(columns={"NAME_EN": "Name"})
)

evi_adm1_median = (
    evi_adm1_df.pipe(filter_growing_season)
    .groupby(["year", "PCODE", "NAME_EN"], as_index=False)
    .agg(EVI=("EVI", "median"))
    .assign(EVI=lambda df: df["EVI"].round(3))
    .rename(columns={"NAME_EN": "Name"})
)

evi_adm0_median = (
    evi_adm0_df.reset_index()
    .pipe(filter_growing_season)
    .groupby("year", as_index=False)
    .agg(EVI=("EVI", "median"))
    .assign(EVI=lambda df: df["EVI"].round(3))
)

evi_adm0_median.rename(columns={"year": "date"}, inplace=True)

In [44]:
no2_adm1_monthly = pd.read_csv(
    "../../data//airpollution/admin1/syria_adm1_no2_monthly_combined.csv"
)
no2_adm1_monthly.rename(columns={"mean": "mean_no2"}, inplace=True)
no2_adm0_monthly = pd.read_csv(
    "../../data/airpollution/admin0/syr_admin0_no2_monthly_combined.csv"
)
no2_adm0_monthly.rename(columns={"mean": "mean_no2"}, inplace=True)
no2_adm2_monthly = pd.read_csv(
    "../../data//airpollution/admin2/syr_admin2_no2_monthly_combined.csv"
)
no2_adm2_monthly.rename(columns={"mean": "mean_no2"}, inplace=True)
no2_adm3_monthly = pd.read_csv(
    "../../data//airpollution/admin3/syr_admin3_no2_monthly_combined.csv"
)
no2_adm3_monthly.rename(columns={"mean": "mean_no2"}, inplace=True)

In [45]:
no2_adm0_monthly["start_date"] = pd.to_datetime(no2_adm0_monthly["start_date"])
no2_adm0_annual = (
    no2_adm0_monthly.groupby(["NAME_EN", pd.Grouper(key="start_date", freq="YS")])[
        "mean_no2"
    ]
    .mean()
    .reset_index()
)

In [57]:
gdp = pd.read_excel("../../data/GDP Syria LCU.xlsx")
gdp.rename(columns={"Unnamed: 0": "year"}, inplace=True)

In [None]:
# Import our custom visualization functions

In [88]:
# Prepare the data for visualization using correct column names and format

# Convert GDP year column to integer for consistent merging
gdp["year_int"] = gdp["year"].dt.year

# NO2 data: extract year as integer from start_date
no2_adm0_annual["date"] = pd.to_datetime(no2_adm0_annual["start_date"])
no2_adm0_annual["year_int"] = no2_adm0_annual["date"].dt.year

# NTL data: the 'date' column is already year as integer
ntl_admin0_annual["year_int"] = ntl_admin0_annual["date"]

# EVI data: extract year as integer from the year column
evi_adm0_median["date"] = pd.to_datetime(evi_adm0_median["date"])
evi_adm0_median["year_int"] = evi_adm0_median["date"].dt.year

print("Data preparation complete!")
print("GDP years:", sorted(gdp["year_int"].unique()))
print("NO2 years:", sorted(no2_adm0_annual["year_int"].unique()))
print("NTL years:", sorted(ntl_admin0_annual["year_int"].unique()))
print("EVI years:", sorted(evi_adm0_median["year_int"].unique()))

# Check overlaps
gdp_years = set(gdp["year_int"])
no2_years = set(no2_adm0_annual["year_int"])
ntl_years = set(ntl_admin0_annual["year_int"])
evi_years = set(evi_adm0_median["year_int"])

print(f"\nOverlapping years GDP-NO2: {sorted(gdp_years.intersection(no2_years))}")
print(f"Overlapping years GDP-NTL: {sorted(gdp_years.intersection(ntl_years))}")
print(f"Overlapping years GDP-EVI: {sorted(gdp_years.intersection(evi_years))}")

Data preparation complete!
GDP years: [np.int32(2008), np.int32(2009), np.int32(2010), np.int32(2011), np.int32(2012), np.int32(2013), np.int32(2014), np.int32(2015), np.int32(2016), np.int32(2017), np.int32(2018), np.int32(2019), np.int32(2020), np.int32(2021), np.int32(2022), np.int32(2023), np.int32(2024)]
NO2 years: [np.int32(2019), np.int32(2020), np.int32(2021), np.int32(2022), np.int32(2023), np.int32(2024)]
NTL years: [np.int64(2012), np.int64(2013), np.int64(2014), np.int64(2015), np.int64(2016), np.int64(2017), np.int64(2018), np.int64(2019), np.int64(2020), np.int64(2021), np.int64(2022), np.int64(2023), np.int64(2024)]
EVI years: [np.int32(2010), np.int32(2011), np.int32(2012), np.int32(2013), np.int32(2014), np.int32(2015), np.int32(2016), np.int32(2017), np.int32(2018), np.int32(2019), np.int32(2020), np.int32(2021), np.int32(2022), np.int32(2023), np.int32(2024), np.int32(2025)]

Overlapping years GDP-NO2: [2019, 2020, 2021, 2022, 2023, 2024]
Overlapping years GDP-NTL: [

In [99]:
# Import Altair and reload the module to get the updated function
import altair as alt
import importlib
import sys

sys.path.append("/Users/ssarva/syria-economic-monitor/notebooks/combined")
import combined_visuals

importlib.reload(combined_visuals)
from combined_visuals import create_three_part_gdp_line_charts

# Enable Altair to render in Jupyter
alt.data_transformers.enable("json")

# Create the first visualization with GDP
# Layout: 2 charts in top row, 1 chart in bottom row
# Reduced chart width by 30% (400 * 0.7 = 280)
gdp_chart = create_three_part_gdp_line_charts(
    gdp_data=gdp,
    no2_data=no2_adm0_annual,
    ntl_data=ntl_admin0_annual,
    evi_data=evi_adm0_median,
    year_column="year_int",
    gdp_column="GDP per capita (constant SYP)",  # Using GDP column
    chart_width=280,
    chart_height=300,
)

print("GDP vs Economic/Environmental Indicators:")
display(gdp_chart)

print("\n" + "=" * 60 + "\n")

# Create the second visualization with Population
pop_chart = create_three_part_gdp_line_charts(
    gdp_data=gdp,
    no2_data=no2_adm0_annual,
    ntl_data=ntl_admin0_annual,
    evi_data=evi_adm0_median,
    year_column="year_int",
    gdp_column="pop",  # Using population column
    chart_width=280,
    chart_height=300,
)

print("Population vs Economic/Environmental Indicators:")
display(pop_chart)

GDP vs Economic/Environmental Indicators:




Population vs Economic/Environmental Indicators:
