8. Advanced Spatial Joins#
8.1. Introduction#
8.2. Learning Objectives#
8.3. Sample Datasets#
8.4. Installation#
# %pip install duckdb leafmap
8.5. Library Import and Configuration#
import duckdb
import leafmap
url = "https://data.gishub.org/duckdb/nyc_data.db.zip"
leafmap.download_file(url, unzip=True)
8.6. Connecting to DuckDB#
con = duckdb.connect("nyc_data.db")
8.6.1. Loading the Spatial Extension#
con.install_extension("spatial")
con.load_extension("spatial")
8.6.2. Exploring the Database Contents#
con.sql("SHOW TABLES;")
8.7. Intersection Joins#
8.7.1. Examining the Input Tables#
con.sql("SELECT * FROM nyc_neighborhoods LIMIT 5;")
con.sql("SELECT * FROM nyc_subway_stations LIMIT 5;")
8.7.2. Performing a Basic Spatial Join#
con.sql(
"""
SELECT
subways.name AS subway_name,
neighborhoods.name AS neighborhood_name,
neighborhoods.boroname AS borough
FROM nyc_neighborhoods AS neighborhoods
JOIN nyc_subway_stations AS subways
ON ST_Intersects(neighborhoods.geom, subways.geom)
WHERE subways.NAME = 'Broad St';
"""
)
8.7.3. Scaling to All Features#
con.sql(
"""
SELECT
subways.name AS subway_name,
neighborhoods.name AS neighborhood_name,
neighborhoods.boroname AS borough
FROM nyc_neighborhoods AS neighborhoods
JOIN nyc_subway_stations AS subways
ON ST_Intersects(neighborhoods.geom, subways.geom)
"""
)
8.7.4. Combining Spatial and Attribute Filters#
con.sql(
"""
SELECT DISTINCT COLOR FROM nyc_subway_stations;
"""
)
con.sql(
"""
SELECT
subways.name AS subway_name,
neighborhoods.name AS neighborhood_name,
neighborhoods.boroname AS borough
FROM nyc_neighborhoods AS neighborhoods
JOIN nyc_subway_stations AS subways
ON ST_Intersects(neighborhoods.geom, subways.geom)
WHERE subways.color = 'RED';
"""
)
8.7.5. Pattern Summary for Point-in-Polygon Joins#
8.8. Distance Within Joins#
8.8.1. Understanding ST_DWithin Syntax#
8.8.2. Analyzing Demographics Near Transit: A Case Study#
con.sql("FROM nyc_census_blocks LIMIT 5")
con.sql(
"""
SELECT
100.0 * Sum(popn_white) / Sum(popn_total) AS white_pct,
100.0 * Sum(popn_black) / Sum(popn_total) AS black_pct,
Sum(popn_total) AS popn_total
FROM nyc_census_blocks;
"""
)
con.sql(
"""
SELECT DISTINCT routes FROM nyc_subway_stations;
"""
)
con.sql(
"""
SELECT DISTINCT routes
FROM nyc_subway_stations AS subways
WHERE strpos(subways.routes,'A') > 0;
"""
)
con.sql(
"""
SELECT
100.0 * Sum(popn_white) / Sum(popn_total) AS white_pct,
100.0 * Sum(popn_black) / Sum(popn_total) AS black_pct,
Sum(popn_total) AS popn_total
FROM nyc_census_blocks AS census
JOIN nyc_subway_stations AS subways
ON ST_DWithin(census.geom, subways.geom, 200)
WHERE strpos(subways.routes,'A') > 0;
"""
)
8.9. Advanced Join#
con.sql(
"""
CREATE OR REPLACE TABLE subway_lines ( route char(1) );
INSERT INTO subway_lines (route) VALUES
('A'),('B'),('C'),('D'),('E'),('F'),('G'),
('J'),('L'),('M'),('N'),('Q'),('R'),('S'),
('Z'),('1'),('2'),('3'),('4'),('5'),('6'),
('7');
"""
)
con.sql(
"""
SELECT
lines.route,
100.0 * Sum(popn_white) / Sum(popn_total) AS white_pct,
100.0 * Sum(popn_black) / Sum(popn_total) AS black_pct,
Sum(popn_total) AS popn_total
FROM nyc_census_blocks AS census
JOIN nyc_subway_stations AS subways
ON ST_DWithin(census.geom, subways.geom, 200)
JOIN subway_lines AS lines
ON strpos(subways.routes, lines.route) > 0
GROUP BY lines.route
ORDER BY black_pct DESC;
"""
)
8.10. Coordinate System Transformations#
8.10.1. Transforming Geographic to Projected Coordinates#
url = "https://data.gishub.org/duckdb/cities.parquet"
con.sql(f"SELECT * FROM '{url}'")
8.10.2. Applying the Transformation#
con.sql(
f"""
SELECT * EXCLUDE geometry, ST_Transform(geometry, 'EPSG:4326', 'EPSG:5070', true) AS geometry FROM '{url}'
"""
)
8.10.3. When and How to Use ST_Transform#
8.10.4. Practical Transformation Example in Joins#
con.sql(
"""
-- Join web-sourced cities (EPSG:4326) with local NYC neighborhoods (EPSG:26918)
SELECT
cities.name AS city_name,
neighborhoods.name AS neighborhood_name
FROM nyc_neighborhoods AS neighborhoods
JOIN (
SELECT name, ST_Transform(geometry, 'EPSG:4326', 'EPSG:26918', true) AS geom
FROM read_parquet('cities.parquet')
) AS cities
ON ST_Intersects(neighborhoods.geom, cities.geom);
"""
)