{ "cells": [ { "cell_type": "markdown", "id": "3528957d", "metadata": {}, "source": [ "# Data Extraction" ] }, { "cell_type": "markdown", "id": "2fe7c5ae-f4f2-4b15-8c53-6c2b38e9e090", "metadata": {}, "source": [ "The following notebook retrieves AIS data from the [UN Global Platform](https://unstats.un.org/wiki/display/AIS/AIS+Handbook+Outline)." ] }, { "cell_type": "markdown", "id": "0d2116c5-6dd4-468a-a0d9-883a8b5df980", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 1, "id": "c33c3961", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "from shapely.geometry import mapping\n", "\n", "import json\n", "\n", "import pyspark.sql.functions as F\n", "from ais import functions as af\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 2, "id": "99cebdb6", "metadata": {}, "outputs": [], "source": [ "from IPython.core.interactiveshell import (\n", " InteractiveShell,\n", ") # allow multiple outputs in one jupyter cell\n", "\n", "InteractiveShell.ast_node_interactivity = \"all\"" ] }, { "cell_type": "code", "execution_count": 3, "id": "8477a219", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
SparkSession - in-memory
\n", " \n", "SparkContext
\n", "\n", " \n", "\n", "v3.5.0
k8s://https://10.100.0.1:443
nb-d4ee6998-3c82-444f-931c-18193e6eb5b4-0f658
\n", " | port_boundary | \n", "Port_name | \n", "Country | \n", "Continent | \n", "
---|---|---|---|---|
0 | \n", "POLYGON ((-149.93214 61.25829, -149.83906 61.2... | \n", "Anchorage | \n", "U.S.A. | \n", "North-America | \n", "
1 | \n", "POLYGON ((-148.68875 60.78098, -148.65289 60.7... | \n", "Whittier | \n", "U.S.A. | \n", "North-America | \n", "
2 | \n", "POLYGON ((-146.42760 61.11690, -146.38752 61.1... | \n", "Swanport | \n", "U.S.A. | \n", "North-America | \n", "
3 | \n", "POLYGON ((-130.38185 54.34481, -130.24392 54.3... | \n", "Prince Rupert | \n", "U.S.A. | \n", "North-America | \n", "
4 | \n", "POLYGON ((-123.13286 49.26700, -123.13218 49.3... | \n", "Vancouver | \n", "Canada | \n", "North-America | \n", "
\n", " | hex_id | \n", "polygon_name | \n", "hex_resolution | \n", "
---|---|---|---|
6415962 | \n", "624811593067102207 | \n", "chokepoint:Cape of Good Hope | \n", "10 | \n", "
6415963 | \n", "625181089372012543 | \n", "chokepoint:Cape of Good Hope | \n", "10 | \n", "
6415964 | \n", "625181119436783615 | \n", "chokepoint:Cape of Good Hope | \n", "10 | \n", "
6415965 | \n", "625180943611559935 | \n", "chokepoint:Cape of Good Hope | \n", "10 | \n", "
6415966 | \n", "625180973944766463 | \n", "chokepoint:Cape of Good Hope | \n", "10 | \n", "
\n", " | hex_id | \n", "polygon_name | \n", "hex_resolution | \n", "
---|---|---|---|
6415965 | \n", "625180943611559935 | \n", "chokepoint:Cape of Good Hope | \n", "10 | \n", "
6415966 | \n", "625180973944766463 | \n", "chokepoint:Cape of Good Hope | \n", "10 | \n", "