In [1]:
import pandas as pd
import geopandas as gpd

import bokeh
from bokeh.layouts import column
from bokeh.models import Legend, TabPanel, Tabs

from bokeh.core.validation.warnings import EMPTY_LAYOUT, MISSING_RENDERERS

bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)
from bokeh.plotting import figure, show, output_notebook

In [2]:
color_palette = [
    "#4E79A7",  # Blue
    "#F28E2B",  # Orange
    "#E15759",  # Red
    "#76B7B2",  # Teal
    "#59A14F",  # Green
    "#EDC948",  # Yellow
    "#B07AA1",  # Purple
    "#FF9DA7",  # Pink
    "#9C755F",  # Brown
    "#BAB0AC",  # Gray
    "#7C7C7C",  # Dark gray
    "#6B4C9A",  # Violet
    "#D55E00",  # Orange-red
    "#CC61B0",  # Magenta
    "#0072B2",  # Bright blue
    "#329262",  # Peacock green
    "#9E5B5A",  # Brick red
    "#636363",  # Medium gray
    "#CD9C00",  # Gold
    "#5D69B1",  # Medium blue
]

In [47]:
from bokeh.plotting import ColumnDataSource
from bokeh.io import output_notebook
from bokeh.core.validation import silence
from bokeh.core.validation.warnings import EMPTY_LAYOUT

# Use the silence function to ignore the EMPTY_LAYOUT warning
silence(EMPTY_LAYOUT, True)


def get_bar_chart(
    dataframe,
    title,
    source,
    subtitle=None,
    measure="measure",
    category="category",
    color_code=None,
):
    # Initialize the figure
    p2 = figure(
        x_axis_type="datetime", width=1000, height=400, toolbar_location="above"
    )
    p2.add_layout(Legend(), "right")

    # Define the color palette (make sure this has enough colors for the categories)
    color_palette = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b"]

    # Loop through each unique category and plot a bar
    for id, unique_category in enumerate(dataframe[category].unique()):
        # Filter the DataFrame for each category
        category_df = dataframe[dataframe[category] == unique_category].copy()
        category_df.sort_values(
            by="event_date", inplace=True
        )  # Ensure the DataFrame is sorted by date
        category_source = ColumnDataSource(category_df)

        # Plot the bars
        p2.vbar(
            x="event_date",
            top=measure,
            width=86400000 * 1.5,
            source=category_source,
            color=color_code,
        )

    # Configure legend
    p2.legend.click_policy = "hide"
    p2.legend.location = "top_right"

    # Set the subtitle as the title of the plot if it exists
    if subtitle:
        p2.title.text = subtitle

    # Create title and subtitle text using separate figures
    title_fig = figure(title=title, toolbar_location=None, width=800, height=40)
    title_fig.title.align = "left"
    title_fig.title.text_font_size = "20pt"
    title_fig.border_fill_alpha = 0
    title_fig.outline_line_color = None

    sub_title_fig = figure(title=source, toolbar_location=None, width=800, height=40)
    sub_title_fig.title.align = "left"
    sub_title_fig.title.text_font_size = "10pt"
    sub_title_fig.title.text_font_style = "normal"
    sub_title_fig.border_fill_alpha = 0
    sub_title_fig.outline_line_color = None

    # Combine the title, plot, and subtitle into a single layout
    layout = column(title_fig, p2, sub_title_fig)

    return layout


# Example usage:
# result = get_bar_chart(dataframe=my_dataframe, title="My Bar Chart", source="Data Source", subtitle="My Subtitle")
# show(result)  # or curdoc().add_root(result) if using bokeh server

In [43]:
bokeh.core.validation.silence(EMPTY_LAYOUT, True)


def get_line_plot(
    ooklaUsers,
    title,
    source,
    earthquakes=False,
    subtitle=None,
    measure="conflictIndex",
    category="ADM4_EN",
):
    p2 = figure(
        x_axis_type="datetime", width=1000, height=400, toolbar_location="above"
    )
    p2.add_layout(Legend(), "right")

    for id, adm2 in enumerate(ooklaUsers[category].unique()):
        df = ooklaUsers[ooklaUsers[category] == adm2][
            ["event_date", measure]
        ].reset_index(drop=True)
        p2.vbar(
            x="event_date",
            top=measure,
            width=86400000 * 1.5,
            source=source,
            color=color_palette[id],
            legend_label=adm2,
        )

    p2.legend.click_policy = "hide"
    if subtitle is not None:
        p2.title = subtitle

    title_fig = figure(
        title=title,
        toolbar_location=None,
        width=800,
        height=40,
    )
    title_fig.title.align = "left"
    title_fig.title.text_font_size = "20pt"
    title_fig.border_fill_alpha = 0
    title_fig.outline_line_width = 0

    # with silence(MISSING_RENDERERS):
    sub_title = figure(
        title=source,
        toolbar_location=None,
        width=800,
        height=40,
    )
    sub_title.title.align = "left"
    sub_title.title.text_font_size = "10pt"
    sub_title.title.text_font_style = "normal"
    sub_title.border_fill_alpha = 0
    sub_title.outline_line_width = 0

    layout = column(title_fig, p2, sub_title)

    return layout

In [4]:
from shapely.geometry import Point


def convert_to_gdf(df):
    geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
    gdf = gpd.GeoDataFrame(df, crs="EPSG:4326", geometry=geometry)

    return gdf

In [5]:
syria_adm3 = gpd.read_file(
    "../../data/shapefiles/syr_pplp_adm4_unocha_20210113/syr_admbnda_adm3_uncs_unocha_20201217.json"
)
syria_adm4 = gpd.read_file(
    "../../data/shapefiles/syr_pplp_adm4_unocha_20210113/syr_pplp_adm4_unocha_20210113.json"
)
region_control = pd.read_excel("../../data/shapefiles/IntensityAoC_ADM4_v12.xlsx")
intensity = pd.read_csv(
    "../../data/earthquake-intensity/syria_adm4_earthquake_intensity.csv"
)
syria_adm4 = syria_adm4.merge(
    intensity[
        [
            "ADM1_EN",
            "ADM2_EN",
            "ADM3_EN",
            "ADM4_EN",
            "category_max_feb06",
            "max_intensity_feb06",
        ]
    ],
    on=["ADM1_EN", "ADM2_EN", "ADM3_EN", "ADM4_EN"],
)

region_control = region_control[["ADM4_EN", "Population", "aoc"]]
syria_adm4 = syria_adm4.merge(region_control, on=["ADM4_EN"])

syria_adm2 = gpd.read_file(
    "../../data/shapefiles/syr_pplp_adm4_unocha_20210113/syr_admbnda_adm2_uncs_unocha_20201217.json"
)

# Armed Conflict Location and Event Data Analysis

The Armed Conflict Location & Event Data Project (ACLED) is a disaggregated data collection, analysis, and crisis mapping project. ACLED collects information on the dates, actors, locations, fatalities, and types of all reported political violence and protest events around the world. The raw data is available through a license obtained by the World Bank

In [6]:
from datetime import datetime

acled = pd.concat(
    [
        pd.read_csv("../../data/acled/2017-01-01-2023-06-28-Syria.csv"),
        pd.read_csv("../../data/acled/2023-06-01-2023-10-03-Syria.csv"),
        pd.read_csv("../../data/acled/2023-06-01-2023-11-01-Syria.csv"),
    ]
)
acled.drop_duplicates(inplace=True)
# acled['timestamp'] = acled['timestamp'].apply(lambda x: datetime.fromtimestamp(x))
acled["event_date"] = acled["event_date"].apply(
    lambda x: datetime.strptime(x, "%d %B %Y")
)

In [7]:
syria_adm4_crs = syria_adm4.to_crs("EPSG:32632")
syria_adm3_crs = syria_adm3.to_crs("EPSG:32632")
syria_adm2_crs = syria_adm2.to_crs("EPSG:32632")

## Calculating Conflict Index

Conflict Index is calculated as a geometric mean of conflict events and fatalities at admin 2 level

In [8]:
from scipy.stats import gmean


def get_acled_by_admin(
    adm, acled, columns=["ADM4_EN", "ADM3_EN", "ADM2_EN", "ADM1_EN"], nearest=False
):
    acled_adm2 = convert_to_gdf(acled)
    acled_adm2 = acled_adm2.to_crs("EPSG:32632")
    if nearest == True:
        acled_adm2 = (
            adm.sjoin_nearest(acled_adm2, max_distance=2000)[
                ["event_date", "fatalities", "event_type"] + columns
            ]
            .groupby([pd.Grouper(key="event_date", freq="M", closed="left")] + columns)[
                "fatalities"
            ]
            .agg(["sum", "count"])
            .reset_index()
        )
    else:
        acled_adm2 = (
            adm.sjoin(acled_adm2)[["event_date", "fatalities", "event_type"] + columns]
            .groupby([pd.Grouper(key="event_date", freq="M", closed="left")] + columns)[
                "fatalities"
            ]
            .agg(["sum", "count"])
            .reset_index()
        )
    acled_adm2.rename(columns={"sum": "fatalities", "count": "nrEvents"}, inplace=True)
    acled_adm2["conflictIndex"] = acled_adm2.apply(
        lambda row: gmean([row["nrEvents"], row["fatalities"]]), axis=1
    )
    acled_adm2["conflictIndexLog"] = np.log(acled_adm2["conflictIndex"])
    # acled_adm2['event_date_map'] = acled_adm2['event_date'].apply(lambda x: x.date().replace(day=1))

    return acled_adm2.reset_index()

In [9]:
import numpy as np

acled_adm4 = get_acled_by_admin(syria_adm4_crs, acled)
df1 = acled_adm4[acled_adm4["conflictIndex"] > 0]
df1["conflictIndexLog"] = np.log(df1["conflictIndex"])

In [10]:
acled_adm4 = get_acled_by_admin(syria_adm4_crs, acled, nearest=True)
acled_adm3 = get_acled_by_admin(
    syria_adm3_crs, acled, columns=["ADM3_EN", "ADM2_EN", "ADM1_EN"], nearest=True
)
acled_aoc = get_acled_by_admin(syria_adm4_crs, acled, columns=["aoc"], nearest=True)
acled_intensity = get_acled_by_admin(
    syria_adm4_crs, acled, columns=["category_max_feb06"], nearest=True
)

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [11]:
acled_adm4[
    ["event_date", "ADM1_EN", "ADM2_EN", "ADM3_EN", "ADM4_EN", "fatalities", "nrEvents"]
].to_csv("../../data/acled/acled_admin4_2017_2023.csv")

In [12]:
import numpy as np

syria_adm0 = gpd.read_file(
    "../../data/shapefiles/syr_pplp_adm4_unocha_20210113/syr_admbnda_adm0_uncs_unocha_20201217.json"
)
acled_adm0 = get_acled_by_admin(
    syria_adm0.to_crs("EPSG:32632"), acled, columns=["ADM0_EN"]
)

In [13]:
# acled = convert_to_gdf(acled)
acled_moi = acled[
    (acled["event_date"] >= "2023-06-01") & (acled["event_date"] <= "2023-10-31")
]

In [19]:
import pandas as pd


def normalize_column(df, column_name):
    min_val = df[column_name].min()
    max_val = df[column_name].max()

    df[column_name + "_normalized"] = (df[column_name] - min_val) / (max_val - min_val)
    return df

In [20]:
acled_moi = (
    acled_moi.groupby(
        ["latitude", "longitude", pd.Grouper(key="event_date", freq="M"), "event_type"]
    )["fatalities"]
    .agg(["sum", "count"])
    .reset_index()
)
acled_moi.rename(columns={"sum": "fatalities", "count": "nr_events"}, inplace=True)

In [34]:
acled_moi = normalize_column(acled_moi, "fatalities")
acled_moi = normalize_column(acled_moi, "nr_events")

In [22]:
acled_moi = convert_to_gdf(acled_moi)
# data = aug_acled.set_index('geometry')['nr_events'].to_dict()

In [23]:
syria_adm1 = gpd.read_file(
    "../../data/shapefiles/syr_pplp_adm4_unocha_20210113/syr_admbnda_adm1_uncs_unocha_20201217.json"
)
syria_adm1_json = syria_adm1[["ADM1_EN", "ADM0_EN", "geometry"]].to_json()

In [25]:
syria_adm2.sjoin(acled_moi)[
    [
        "latitude",
        "longitude",
        "ADM2_EN",
        "fatalities",
        "nr_events",
        "event_date",
        "event_type",
    ]
].to_excel("../../data/acled/acled_moi.xlsx")

In [48]:
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []
measure_names = {
    "nrEvents": "Number of Conflict Events",
    "fatalities": "Number of Fatalities",
}
measure_colors = {"nrEvents": "#4E79A7", "fatalities": "#F28E2B"}
# acled_adm0 = get_acled_by_admin(syria_adm2_crs, acled, columns = ['ADM2_EN', 'ADM1_EN'])
for measure in ["nrEvents", "fatalities"]:
    tabs.append(
        TabPanel(
            child=get_bar_chart(
                acled_adm0,
                f"National Trend in {measure_names[measure]}",
                "Source: ACLED",
                subtitle="",
                category="ADM0_EN",
                measure=measure,
                color_code=measure_colors[measure],
            ),
            title=measure_names[measure].capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

In [22]:
acled_adm0.groupby(["ADM0_EN", pd.Grouper(key="event_date", freq="Y")]).sum(
    ["fatalities", "nrEvents"]
).reset_index()

Unnamed: 0,ADM0_EN,event_date,index,fatalities,nrEvents,conflictIndex,conflictIndexLog
0,Syrian Arab Republic,2017-12-31,66,54345,31395,41221.880982,97.523622
1,Syrian Arab Republic,2018-12-31,210,29988,20820,24854.683841,90.810144
2,Syrian Arab Republic,2019-12-31,354,15617,20037,17528.538179,87.025012
3,Syrian Arab Republic,2020-12-31,498,8186,12385,10023.405372,80.350493
4,Syrian Arab Republic,2021-12-31,642,5849,10721,7901.533451,77.747495
5,Syrian Arab Republic,2022-12-31,786,5901,12259,8479.471919,78.577275
6,Syrian Arab Republic,2023-12-31,684,4842,9141,6589.771383,58.905973


In [24]:
output_notebook()

show(
    get_line_plot(
        acled_intensity,
        "Conflict index by earthquake intensity",
        "Source: ACLED",
        earthquakes=True,
        subtitle="",
        category="category_max_feb06",
        measure="conflictIndex",
    )
)

### Observations
It is seen from the below image that earthquake intensity had little to do with conflict intensity in Syria i.e., the areas where there was earthquake impact do not coincide with areas of high conflict. The conflict is agnostic to earthquakes and has peristsed before the earthquake. 

In [25]:
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []

acled_adm2 = get_acled_by_admin(syria_adm2_crs, acled, columns=["ADM2_EN", "ADM1_EN"])
for adm in list(acled_adm2["ADM1_EN"].unique()):
    df = acled_adm2[acled_adm2["ADM1_EN"] == adm]

    tabs.append(
        TabPanel(
            child=get_line_plot(
                df,
                "Conflict Index by admin 2",
                "Source: ACLED",
                earthquakes=True,
                subtitle="",
                category="ADM2_EN",
                measure="conflictIndex",
            ),
            title=adm.capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

  result = getattr(ufunc, method)(*inputs, **kwargs)


### Observations
- The Aleppo and Idleb regions have high conflict compared to the rest of the country
- The conflict has reduced in Aleppo and Idleb over time

In [26]:
output_notebook()
title = "Monthly conflict related fatalities by Area of Control"
source = "ACLED"

show(
    get_line_plot(
        acled_aoc[acled_aoc["event_date"].dt.year > 2016],
        title=title,
        source=source,
        category="aoc",
    )
)

### Observations
* The conflcit index is the highest in government and allied force controlled areas 
* It used to be much higher in Non state armed group controlled areas but went down significantly in early 2022.