5. 02-weather#

  • collects weather data from NASA GPM API

  • Step 20 of methodology

import geopandas as gpd
from rasterstats import zonal_stats
import rasterio
import requests
import pandas as pd
import numpy as np
import os

#time
import pytz
from datetime import date, datetime, timedelta

functions compiled

#make api url
def get_url(startTime, endTime):
    url = "https://pmmpublisher.pps.eosdis.nasa.gov/opensearch?q=precip_30mn&" + \
    "lat=30" + \
    "&" +\
    "lon=78" + \
    "&limit=&" + \
    "startTime=" + startTime + \
    "&" +\
    "endTime=" + endTime
    return url
def get_raster_list(url, start, end):   
    r = requests.get(url)    
    items = r.json()['items']
    turl = None
    df = pd.DataFrame(index=range(0,len(items)), columns=['date', 'url', 'inRange'])
    ind = 0
    for i in items:
        #convert date string to datetime
        date = datetime.strptime(i['properties']['date']['@value'], '%Y-%m-%dT%H:%M:%S+00:00')
        date = date.astimezone(pytz.utc) #set timezone
        df.iloc[ind, 0] = date
        
        #add info on whether or not it is in time range
        df.iloc[ind, 2] = time_in_range(start, end, date)
        
        #add urls if they are tifs
        mediaType = i['action'][1]['using'][1]['mediaType']
        if mediaType == 'image/tiff':
            df.iloc[ind, 1] = i['action'][1]['using'][1]['url']
        else:
            df.iloc[ind, 1] = i['action'][1]['using'][2]['url']
        ind += 1
    return df
def time_in_range(start, end, x):
    """Return true if x is in the range [start, end]"""
    if start <= end:
        return start < x <= end
    else:
        return start < x or x <= end
def get_precip_1d(df, line):
    total = 0

    for index, row in df.iterrows():
        #print(row['url'])
        src = rasterio.open(row['url'])
        #print(size(src.read(1)))
        rast_stats = zonal_stats(line, 
            src.read(1), affine=src.transform,
            stats="count min mean max median")
        mean = rast_stats[0]['mean']
        #print(rast_stats)
        total += mean
    
    return total
def add_time(file,tz):
    #assume all the closures occur or are reported at 6pm, or earlier
    dateTime = datetime.strptime(file['date'][0] + ' 17-59-59', '%Y-%m-%d %H-%M-%S')
    timezone = pytz.timezone(tz)
    
    dateTime_tz = timezone.localize(dateTime) #time in timezone
    dateTime_utc = dateTime_tz.astimezone(pytz.utc) #time in utc

    file['datetime'] = dateTime_utc
    return file, dateTime_utc
def add_precip_1d(file, tz):
    file, dateTime_utc = add_time(file, tz)
    
    #start and end time as strings
    endTime = dateTime_utc.strftime('%Y-%m-%d')
    startTime = (dateTime_utc - timedelta(days=1)).strftime('%Y-%m-%d')
    
    #get the urls to help get weather data !!
    url = get_url(startTime, endTime)
    df = get_raster_list(url, dateTime_utc - timedelta(days=1), dateTime_utc)
    
    #some processing
    in_range = df.loc[df['inRange'] == True]
    #add precipitation column if it doesn't already exist
    try:
        file.insert(len(road.columns),'precip_1d', np.nan)
    except ValueError:
        pass
    
    #add to the list
    for index, row in road.iterrows():
        col = file.columns.get_loc("precip_1d")
        precip_1d = get_precip_1d(in_range, row['geometry'])
        file.iloc[index,col] = precip_1d
    
    print(endTime)
    
    return file

5.1. START HERE#

  • change filepath to appropriate geopackage (that you want to add weather data to), run rest of file

filepath = '/Users/emilyfang/Desktop/road_flooding2/data/a-closures/closures-agg/2021-07-08.gpkg'
road = gpd.read_file(filepath)
road = road.to_crs(epsg=4326)
data = add_precip_1d(road, 'Asia/Kolkata')
data.to_file(filepath, driver='GPKG')
2021-07-08