Introduction to Geospatial Python#


A Tufts University Data Lab Tutorial
Written by Uku-Kaspar Uustalu

Contact: uku-kaspar.uustalu@tufts.edu

Last updated: 2023-02-27

WORK IN PROGRESS


Reading Data with Geographic Coordinates#

import pandas as pd
import geopandas as gpd
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 2
      1 import pandas as pd
----> 2 import geopandas as gpd

ModuleNotFoundError: No module named 'geopandas'
stops = pd.read_csv('data/mbta-transit-stations.csv')
stops.head()
stops = gpd.GeoDataFrame(data=stops,
                         geometry=gpd.points_from_xy(x=stops.stop_lon,
                                                     y=stops.stop_lat))
stops.head()
type(stops)
type(stops.geometry)
type(stops.geometry[0])

Coordinate Reference Systems#

stops.crs
stops = stops.set_crs(epsg=4326)
stops.crs
stops.crs.name
stops.to_crs(epsg=26986, inplace=True)
stops.head()
stops.crs
stops.crs.name

Creating Static Maps#

import matplotlib.pyplot as plt
import seaborn as sns
import contextily as cx
stops.geometry.x
stops.geometry.y
plt.scatter(stops.geometry.x, stops.geometry.y)
plt.show()
stops.plot()
plt.show()
ax = stops.plot(figsize=(10, 10), color='black', markersize=50)
cx.add_basemap(ax=ax, crs=stops.crs)
plt.show()
ax = stops.plot(figsize=(10, 10), color='black', markersize=50)
cx.add_basemap(ax=ax, crs=stops.crs, source=cx.providers.OpenStreetMap.Mapnik)
plt.show()
fig, ax = plt.subplots(figsize=(10, 10))

sns.kdeplot(x=stops.geometry.x,
            y=stops.geometry.y,
            fill=True,
            alpha=0.75,
            cmap='magma',
            ax=ax)

cx.add_basemap(ax=ax, crs=stops.crs)

plt.show()
ax = stops[stops.accessible == 'yes'].plot(figsize=(10, 10),
                                           color='blue',
                                           label='accessible',
                                           marker='o',
                                           markersize=50)

stops[stops.accessible == 'no'].plot(color='red',
                                     label='not accessible',
                                     marker='x',
                                     markersize=100,
                                     ax=ax)

cx.add_basemap(ax=ax, crs=stops.crs)
plt.legend()
plt.show()

Creating Interactive Maps#

import folium
import hvplot.pandas
import plotly.express as px
stops.explore(column='accessible',
              tooltip='stop_name',
              tooltip_kwds=dict(labels=False),
              marker_kwds=dict(radius=5, fill=True),
              popup=True,
              legend=True,
              cmap='bwr_r',
              tiles='CartoDB positron')
stops_wgs84 = stops.to_crs(epsg=4326)
stops_wgs84_ctr = stops_wgs84.unary_union.centroid
m = folium.Map(location=(stops_wgs84_ctr.y, stops_wgs84_ctr.x),
               zoom_start=12)

def style_function(feature):
    color = 'blue' if feature['properties']['accessible']=='yes' else 'red'
    return {'color': color, 'fillColor': color}

folium.GeoJson(data=stops_wgs84.to_json(),
               style_function = style_function,
               tooltip=folium.GeoJsonTooltip(fields=['stop_name'],
                                             labels=False),
               marker=folium.CircleMarker(radius=5,
                                          fill=True,
                                          fill_opacity=0.5)).add_to(m)

m
stops.hvplot(crs=stops.crs.to_epsg(),
             tiles='StamenTerrainRetina',
             hover_cols=['stop_name'],
             c='accessible',
             cmap='bwr',
             alpha=0.7,
             size=200,
             width=1200,
             height=600)
fig = px.scatter_mapbox(stops_wgs84,
                        lat=stops_wgs84.geometry.y,
                        lon=stops_wgs84.geometry.x,
                        color='accessible',
                        color_discrete_map={'yes': 'blue', 'no': 'red'},
                        hover_name='stop_name',
                        mapbox_style='carto-positron',
                        opacity=0.7,
                        zoom=10,
                        width=1200,
                        height=600)

fig.update_traces(marker={'size': 15})
fig.show()

Exploring Shapely Objects#

from shapely.geometry import Polygon, LineString
from shapely.ops import transform
import pyproj
stops[stops.stop_name == 'Medford/Tufts']
stops[stops.stop_name == 'Medford/Tufts'].geometry
medford = stops[stops.stop_name == 'Medford/Tufts'].geometry.values[0]
medford
type(medford)
medford.x
medford.y
plt.scatter(medford.x, medford.y)
plt.show()
boston = stops[stops.stop_name == 'Tufts Medical Center'].geometry.values[0]
smfa = stops[stops.stop_name == 'Museum of Fine Arts'].geometry.values[0]
ax = stops.plot(figsize=(10, 10), color='black', markersize=25)
ax.scatter(medford.x, medford.y, color='blue', marker='*', s=200)
ax.scatter(boston.x, boston.y, color='red', marker='X', s=100)
ax.scatter(smfa.x, smfa.y, color='green', marker='s', s=50)
cx.add_basemap(ax, crs=stops.crs, source=cx.providers.CartoDB.Positron)
plt.show()
triangle = Polygon([medford, boston, smfa])
triangle
type(triangle)
triangle.exterior
type(triangle.exterior)
triangle.length
triangle.exterior.length
triangle.area
triangle.exterior.area
medford.distance(boston)
medford.distance(smfa)
triangle.exterior.xy
x, y = triangle.exterior.xy
plt.plot(x, y)
plt.show()
x, y = triangle.exterior.xy
plt.fill(x, y)
plt.show()
x, y = triangle.exterior.xy
ax = stops.plot(figsize=(10, 10), color='black', markersize=50, zorder=3)
plt.plot(x, y, color='blue', linewidth=3, zorder=2)
plt.fill(x, y, color='blue', alpha=0.5, zorder=1)
cx.add_basemap(ax, crs=stops.crs, source=cx.providers.CartoDB.Positron)
plt.show()
transformer = pyproj.Transformer.from_crs(stops.crs, 'epsg:4326').transform
m = folium.Map(location=transform(transformer, triangle).centroid.coords[0],
               tiles='CartoDB positron',
               zoom_start=12)
folium.Polygon(locations=transform(transformer, triangle.exterior).coords,
                color='blue', fill=True).add_to(m)
folium.Marker(location=transform(transformer, medford).coords[0],
              tooltip='Medford/Tufts').add_to(m)
folium.Marker(location=transform(transformer, boston).coords[0],
              tooltip='Tufts Medical Center').add_to(m)
folium.Marker(location=transform(transformer, smfa).coords[0],
              tooltip='Museum of Fine Arts').add_to(m)
m

Geocoding with GeoPy#

import random
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
glx_stops = ['Medford/Tufts', 'Ball Square', 'Magoun Square', 'Gilman Square',
             'East Somerville', 'Lechmere', 'Union Square']
glx = pd.read_csv('data/mbta-transit-stations.csv')
glx = glx[glx.stop_name.isin(glx_stops)].reset_index(drop=True)
glx.drop(columns=['stop_lat', 'stop_lon', 'accessible'], inplace=True)
glx
token = 'uep239-python-spatial-{:04}'.format(random.randint(0,9999))
print(token)
geolocator = Nominatim(user_agent=token)
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
glx['location'] = glx.stop_address.apply(geocode)
glx
glx['stop_lat'] = glx.location.apply(lambda loc: loc.latitude if loc else None)
glx['stop_lon'] = glx.location.apply(lambda loc: loc.longitude if loc else None)
glx.drop(columns='location', inplace=True)
glx
glx = gpd.GeoDataFrame(data=glx,
                       geometry=gpd.points_from_xy(x=glx.stop_lon,
                                                   y=glx.stop_lat,
                                                   crs='epsg:4326'))
glx.to_crs(epsg=26986, inplace=True)
ax = glx.plot(color='darkgreen', markersize=50)
cx.add_basemap(ax=ax, crs=glx.crs, source=cx.providers.CartoDB.Positron)
plt.show()
glx_line = LineString(glx.set_index('stop_name').loc[glx_stops, 'geometry'])
glx_line
x, y = glx_line.xy
ax = glx.plot(color='darkgreen', markersize=50, zorder=2)
plt.plot(x, y, color='green', zorder=1)
cx.add_basemap(ax=ax, crs=glx.crs, source=cx.providers.CartoDB.Positron)
plt.show()
m = folium.Map(location=transform(transformer, glx_line).centroid.coords[0],
               tiles='CartoDB positron',
               zoom_start=13)
folium.PolyLine(locations=transform(transformer, glx_line).coords,
                color='green').add_to(m)
folium.GeoJson(data=glx.to_crs(epsg=4326).to_json(),
               tooltip=folium.GeoJsonTooltip(fields=['stop_name'],
                                             labels=False),
               marker=folium.Marker(icon=folium.Icon(color='green',
                                                     icon='train-tram',
                                                     prefix='fa',))).add_to(m)
m

Exercise#

entries = (pd.read_csv('data/mbta-gated-entries-2022.csv')
             .groupby('stop_id')
             .gated_entries.sum()
             .to_frame('total_entries')
             .reset_index())
entries
stops = stops.merge(entries, how='left', on='stop_id')
stops
ax = stops.plot(figsize=(10, 10),
                column='total_entries',
                cmap='Greens',
                alpha=0.8,
                edgecolor='darkgreen',
                legend=True,
                markersize=100,
                missing_kwds=dict(color='gray',
                                  edgecolor=None,
                                  alpha=0.5,
                                  marker='x',
                                  markersize=50))
cx.add_basemap(ax, crs=stops.crs, source=cx.providers.CartoDB.Positron)
plt.show()