Introduction to Geospatial Python#
A Tufts University Data Lab Tutorial
Written by Uku-Kaspar Uustalu
Contact: uku-kaspar.uustalu@tufts.edu
Last updated: 2023-02-27
WORK IN PROGRESS
Reading Data with Geographic Coordinates#
import pandas as pd
import geopandas as gpd
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 2
1 import pandas as pd
----> 2 import geopandas as gpd
ModuleNotFoundError: No module named 'geopandas'
stops = pd.read_csv('data/mbta-transit-stations.csv')
stops.head()
stops = gpd.GeoDataFrame(data=stops,
geometry=gpd.points_from_xy(x=stops.stop_lon,
y=stops.stop_lat))
stops.head()
type(stops)
type(stops.geometry)
type(stops.geometry[0])
Coordinate Reference Systems#
stops.crs
stops = stops.set_crs(epsg=4326)
stops.crs
stops.crs.name
stops.to_crs(epsg=26986, inplace=True)
stops.head()
stops.crs
stops.crs.name
Creating Static Maps#
import matplotlib.pyplot as plt
import seaborn as sns
import contextily as cx
stops.geometry.x
stops.geometry.y
plt.scatter(stops.geometry.x, stops.geometry.y)
plt.show()
stops.plot()
plt.show()
ax = stops.plot(figsize=(10, 10), color='black', markersize=50)
cx.add_basemap(ax=ax, crs=stops.crs)
plt.show()
ax = stops.plot(figsize=(10, 10), color='black', markersize=50)
cx.add_basemap(ax=ax, crs=stops.crs, source=cx.providers.OpenStreetMap.Mapnik)
plt.show()
fig, ax = plt.subplots(figsize=(10, 10))
sns.kdeplot(x=stops.geometry.x,
y=stops.geometry.y,
fill=True,
alpha=0.75,
cmap='magma',
ax=ax)
cx.add_basemap(ax=ax, crs=stops.crs)
plt.show()
ax = stops[stops.accessible == 'yes'].plot(figsize=(10, 10),
color='blue',
label='accessible',
marker='o',
markersize=50)
stops[stops.accessible == 'no'].plot(color='red',
label='not accessible',
marker='x',
markersize=100,
ax=ax)
cx.add_basemap(ax=ax, crs=stops.crs)
plt.legend()
plt.show()
Creating Interactive Maps#
import folium
import hvplot.pandas
import plotly.express as px
stops.explore(column='accessible',
tooltip='stop_name',
tooltip_kwds=dict(labels=False),
marker_kwds=dict(radius=5, fill=True),
popup=True,
legend=True,
cmap='bwr_r',
tiles='CartoDB positron')
stops_wgs84 = stops.to_crs(epsg=4326)
stops_wgs84_ctr = stops_wgs84.unary_union.centroid
m = folium.Map(location=(stops_wgs84_ctr.y, stops_wgs84_ctr.x),
zoom_start=12)
def style_function(feature):
color = 'blue' if feature['properties']['accessible']=='yes' else 'red'
return {'color': color, 'fillColor': color}
folium.GeoJson(data=stops_wgs84.to_json(),
style_function = style_function,
tooltip=folium.GeoJsonTooltip(fields=['stop_name'],
labels=False),
marker=folium.CircleMarker(radius=5,
fill=True,
fill_opacity=0.5)).add_to(m)
m
stops.hvplot(crs=stops.crs.to_epsg(),
tiles='StamenTerrainRetina',
hover_cols=['stop_name'],
c='accessible',
cmap='bwr',
alpha=0.7,
size=200,
width=1200,
height=600)
fig = px.scatter_mapbox(stops_wgs84,
lat=stops_wgs84.geometry.y,
lon=stops_wgs84.geometry.x,
color='accessible',
color_discrete_map={'yes': 'blue', 'no': 'red'},
hover_name='stop_name',
mapbox_style='carto-positron',
opacity=0.7,
zoom=10,
width=1200,
height=600)
fig.update_traces(marker={'size': 15})
fig.show()
Exploring Shapely Objects#
from shapely.geometry import Polygon, LineString
from shapely.ops import transform
import pyproj
stops[stops.stop_name == 'Medford/Tufts']
stops[stops.stop_name == 'Medford/Tufts'].geometry
medford = stops[stops.stop_name == 'Medford/Tufts'].geometry.values[0]
medford
type(medford)
medford.x
medford.y
plt.scatter(medford.x, medford.y)
plt.show()
boston = stops[stops.stop_name == 'Tufts Medical Center'].geometry.values[0]
smfa = stops[stops.stop_name == 'Museum of Fine Arts'].geometry.values[0]
ax = stops.plot(figsize=(10, 10), color='black', markersize=25)
ax.scatter(medford.x, medford.y, color='blue', marker='*', s=200)
ax.scatter(boston.x, boston.y, color='red', marker='X', s=100)
ax.scatter(smfa.x, smfa.y, color='green', marker='s', s=50)
cx.add_basemap(ax, crs=stops.crs, source=cx.providers.CartoDB.Positron)
plt.show()
triangle = Polygon([medford, boston, smfa])
triangle
type(triangle)
triangle.exterior
type(triangle.exterior)
triangle.length
triangle.exterior.length
triangle.area
triangle.exterior.area
medford.distance(boston)
medford.distance(smfa)
triangle.exterior.xy
x, y = triangle.exterior.xy
plt.plot(x, y)
plt.show()
x, y = triangle.exterior.xy
plt.fill(x, y)
plt.show()
x, y = triangle.exterior.xy
ax = stops.plot(figsize=(10, 10), color='black', markersize=50, zorder=3)
plt.plot(x, y, color='blue', linewidth=3, zorder=2)
plt.fill(x, y, color='blue', alpha=0.5, zorder=1)
cx.add_basemap(ax, crs=stops.crs, source=cx.providers.CartoDB.Positron)
plt.show()
transformer = pyproj.Transformer.from_crs(stops.crs, 'epsg:4326').transform
m = folium.Map(location=transform(transformer, triangle).centroid.coords[0],
tiles='CartoDB positron',
zoom_start=12)
folium.Polygon(locations=transform(transformer, triangle.exterior).coords,
color='blue', fill=True).add_to(m)
folium.Marker(location=transform(transformer, medford).coords[0],
tooltip='Medford/Tufts').add_to(m)
folium.Marker(location=transform(transformer, boston).coords[0],
tooltip='Tufts Medical Center').add_to(m)
folium.Marker(location=transform(transformer, smfa).coords[0],
tooltip='Museum of Fine Arts').add_to(m)
m
Geocoding with GeoPy#
import random
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
glx_stops = ['Medford/Tufts', 'Ball Square', 'Magoun Square', 'Gilman Square',
'East Somerville', 'Lechmere', 'Union Square']
glx = pd.read_csv('data/mbta-transit-stations.csv')
glx = glx[glx.stop_name.isin(glx_stops)].reset_index(drop=True)
glx.drop(columns=['stop_lat', 'stop_lon', 'accessible'], inplace=True)
glx
token = 'uep239-python-spatial-{:04}'.format(random.randint(0,9999))
print(token)
geolocator = Nominatim(user_agent=token)
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
glx['location'] = glx.stop_address.apply(geocode)
glx
glx['stop_lat'] = glx.location.apply(lambda loc: loc.latitude if loc else None)
glx['stop_lon'] = glx.location.apply(lambda loc: loc.longitude if loc else None)
glx.drop(columns='location', inplace=True)
glx
glx = gpd.GeoDataFrame(data=glx,
geometry=gpd.points_from_xy(x=glx.stop_lon,
y=glx.stop_lat,
crs='epsg:4326'))
glx.to_crs(epsg=26986, inplace=True)
ax = glx.plot(color='darkgreen', markersize=50)
cx.add_basemap(ax=ax, crs=glx.crs, source=cx.providers.CartoDB.Positron)
plt.show()
glx_line = LineString(glx.set_index('stop_name').loc[glx_stops, 'geometry'])
glx_line
x, y = glx_line.xy
ax = glx.plot(color='darkgreen', markersize=50, zorder=2)
plt.plot(x, y, color='green', zorder=1)
cx.add_basemap(ax=ax, crs=glx.crs, source=cx.providers.CartoDB.Positron)
plt.show()
m = folium.Map(location=transform(transformer, glx_line).centroid.coords[0],
tiles='CartoDB positron',
zoom_start=13)
folium.PolyLine(locations=transform(transformer, glx_line).coords,
color='green').add_to(m)
folium.GeoJson(data=glx.to_crs(epsg=4326).to_json(),
tooltip=folium.GeoJsonTooltip(fields=['stop_name'],
labels=False),
marker=folium.Marker(icon=folium.Icon(color='green',
icon='train-tram',
prefix='fa',))).add_to(m)
m
Exercise#
entries = (pd.read_csv('data/mbta-gated-entries-2022.csv')
.groupby('stop_id')
.gated_entries.sum()
.to_frame('total_entries')
.reset_index())
entries
stops = stops.merge(entries, how='left', on='stop_id')
stops
ax = stops.plot(figsize=(10, 10),
column='total_entries',
cmap='Greens',
alpha=0.8,
edgecolor='darkgreen',
legend=True,
markersize=100,
missing_kwds=dict(color='gray',
edgecolor=None,
alpha=0.5,
marker='x',
markersize=50))
cx.add_basemap(ax, crs=stops.crs, source=cx.providers.CartoDB.Positron)
plt.show()