02-weather
Contents
5. 02-weather#
collects weather data from NASA GPM API
Step 20 of methodology
import geopandas as gpd
from rasterstats import zonal_stats
import rasterio
import requests
import pandas as pd
import numpy as np
import os
#time
import pytz
from datetime import date, datetime, timedelta
functions compiled
#make api url
def get_url(startTime, endTime):
url = "https://pmmpublisher.pps.eosdis.nasa.gov/opensearch?q=precip_30mn&" + \
"lat=30" + \
"&" +\
"lon=78" + \
"&limit=&" + \
"startTime=" + startTime + \
"&" +\
"endTime=" + endTime
return url
def get_raster_list(url, start, end):
r = requests.get(url)
items = r.json()['items']
turl = None
df = pd.DataFrame(index=range(0,len(items)), columns=['date', 'url', 'inRange'])
ind = 0
for i in items:
#convert date string to datetime
date = datetime.strptime(i['properties']['date']['@value'], '%Y-%m-%dT%H:%M:%S+00:00')
date = date.astimezone(pytz.utc) #set timezone
df.iloc[ind, 0] = date
#add info on whether or not it is in time range
df.iloc[ind, 2] = time_in_range(start, end, date)
#add urls if they are tifs
mediaType = i['action'][1]['using'][1]['mediaType']
if mediaType == 'image/tiff':
df.iloc[ind, 1] = i['action'][1]['using'][1]['url']
else:
df.iloc[ind, 1] = i['action'][1]['using'][2]['url']
ind += 1
return df
def time_in_range(start, end, x):
"""Return true if x is in the range [start, end]"""
if start <= end:
return start < x <= end
else:
return start < x or x <= end
def get_precip_1d(df, line):
total = 0
for index, row in df.iterrows():
#print(row['url'])
src = rasterio.open(row['url'])
#print(size(src.read(1)))
rast_stats = zonal_stats(line,
src.read(1), affine=src.transform,
stats="count min mean max median")
mean = rast_stats[0]['mean']
#print(rast_stats)
total += mean
return total
def add_time(file,tz):
#assume all the closures occur or are reported at 6pm, or earlier
dateTime = datetime.strptime(file['date'][0] + ' 17-59-59', '%Y-%m-%d %H-%M-%S')
timezone = pytz.timezone(tz)
dateTime_tz = timezone.localize(dateTime) #time in timezone
dateTime_utc = dateTime_tz.astimezone(pytz.utc) #time in utc
file['datetime'] = dateTime_utc
return file, dateTime_utc
def add_precip_1d(file, tz):
file, dateTime_utc = add_time(file, tz)
#start and end time as strings
endTime = dateTime_utc.strftime('%Y-%m-%d')
startTime = (dateTime_utc - timedelta(days=1)).strftime('%Y-%m-%d')
#get the urls to help get weather data !!
url = get_url(startTime, endTime)
df = get_raster_list(url, dateTime_utc - timedelta(days=1), dateTime_utc)
#some processing
in_range = df.loc[df['inRange'] == True]
#add precipitation column if it doesn't already exist
try:
file.insert(len(road.columns),'precip_1d', np.nan)
except ValueError:
pass
#add to the list
for index, row in road.iterrows():
col = file.columns.get_loc("precip_1d")
precip_1d = get_precip_1d(in_range, row['geometry'])
file.iloc[index,col] = precip_1d
print(endTime)
return file
5.1. START HERE#
change filepath to appropriate geopackage (that you want to add weather data to), run rest of file
filepath = '/Users/emilyfang/Desktop/road_flooding2/data/a-closures/closures-agg/2021-07-08.gpkg'
road = gpd.read_file(filepath)
road = road.to_crs(epsg=4326)
data = add_precip_1d(road, 'Asia/Kolkata')
data.to_file(filepath, driver='GPKG')
2021-07-08