6. Geometry Operations and Functions#
6.1. Introduction#
6.2. Learning Objectives#
6.3. Sample Datasets#
6.4. Installation and Setup#
# %pip install duckdb leafmap
6.4.1. Library Import and Configuration#
import duckdb
import leafmap
print(f"DuckDB version: {duckdb.__version__}")
6.4.2. Downloading the Sample Data#
url = "https://data.gishub.org/duckdb/nyc_data.db.zip"
leafmap.download_file(url, unzip=True)
6.5. Connecting to DuckDB and Loading Extensions#
con = duckdb.connect("nyc_data.db")
6.5.1. Installing and Loading the Spatial Extension#
con.install_extension("spatial")
con.load_extension("spatial")
6.5.2. Exploring the Database Contents#
con.sql("SHOW TABLES;")
con.sql("SELECT * FROM nyc_subway_stations LIMIT 5;")
6.6. Understanding Geometry Types#
6.6.1. Creating Sample Geometries from WKT#
con.sql(
"""
CREATE OR REPLACE TABLE samples (name VARCHAR, geom GEOMETRY);
INSERT INTO samples VALUES
('Point', ST_GeomFromText('POINT(-100 40)')),
('Linestring', ST_GeomFromText('LINESTRING(0 0, 1 1, 2 1, 2 2)')),
('Polygon', ST_GeomFromText('POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))')),
('PolygonWithHole', ST_GeomFromText('POLYGON((0 0, 10 0, 10 10, 0 10, 0 0),(1 1, 1 2, 2 2, 2 1, 1 1))')),
('Collection', ST_GeomFromText('GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0, 1 0, 1 1, 0 1, 0 0)))'));
SELECT * FROM samples;
"""
)
6.6.2. Converting Geometries to Readable Text#
con.sql("SELECT name, ST_AsText(geom) AS geometry FROM samples;")
6.6.3. Exporting Sample Geometries#
con.sql(
"""
COPY samples TO 'samples.geojson' (FORMAT GDAL, DRIVER 'GeoJSON');
"""
)
6.7. Working with Points#
6.7.1. Examining Point Geometry Structure#
con.sql(
"""
SELECT ST_AsText(geom)
FROM samples
WHERE name = 'Point';
"""
)
6.7.2. Extracting Coordinate Values#
con.sql(
"""
SELECT name,
ST_X(geom) AS x_coordinate,
ST_Y(geom) AS y_coordinate
FROM samples
WHERE name = 'Point';
"""
)
6.7.3. Analyzing Real-World Point Data#
con.sql(
"""
SELECT * FROM nyc_subway_stations LIMIT 5;
"""
)
con.sql(
"""
SELECT name,
ROUTES,
ST_AsText(geom) AS wkt_geometry,
ST_X(geom) AS easting,
ST_Y(geom) AS northing
FROM nyc_subway_stations
LIMIT 10;
"""
)
6.8. Working with Linestrings#
6.8.1. Examining Linestring Geometry Structure#
con.sql(
"""
SELECT ST_AsText(geom)
FROM samples
WHERE name = 'Linestring';
"""
)
6.8.2. Extracting Linestring Properties#
con.sql(
"""
SELECT ST_Length(geom)
FROM samples
WHERE name = 'Linestring';
"""
)
6.9. Working with Polygons#
6.9.1. Examining Polygon Geometry Structure#
con.sql(
"""
SELECT name, ST_AsText(geom)
FROM samples
WHERE name LIKE 'Polygon%';
"""
)
6.9.2. Analyzing Polygon Properties#
con.sql(
"""
SELECT name, ST_Area(geom) AS area_sq_units
FROM samples
WHERE name LIKE 'Polygon%';
"""
)
6.10. Working with Collections#
6.10.1. Examining Collection Geometry Structure#
con.sql(
"""
SELECT name, ST_AsText(geom)
FROM samples
WHERE name = 'Collection';
"""
)
6.11. Visualizing NYC Spatial Data#
con.sql("SHOW TABLES;")
6.11.1. Visualizing Subway Stations (Points)#
subway_stations_df = con.sql(
"SELECT * EXCLUDE geom, ST_AsText(geom) as geometry FROM nyc_subway_stations"
).df()
subway_stations_df.head()
subway_stations_gdf = leafmap.df_to_gdf(
subway_stations_df, src_crs="EPSG:26918", dst_crs="EPSG:4326"
)
subway_stations_gdf.head()
subway_stations_gdf.explore()
6.11.2. Visualizing Streets (LineStrings)#
nyc_streets_df = con.sql(
"SELECT * EXCLUDE geom, ST_AsText(geom) as geometry FROM nyc_streets"
).df()
nyc_streets_df.head()
nyc_streets_gdf = leafmap.df_to_gdf(
nyc_streets_df, src_crs="EPSG:26918", dst_crs="EPSG:4326"
)
nyc_streets_gdf.head()
nyc_streets_gdf.explore()
6.12. Geometry Processing#
6.12.1. Creating Buffers and Finding Centroids#
con.sql(
"""
SELECT name,
ST_AsText(ST_Centroid(geom)) AS centroid,
ST_AsText(ST_Buffer(geom, 50)) AS buffer_50
FROM samples
WHERE name IN ('Point','Linestring')
"""
)
6.12.2. Simplification and Envelopes#
con.sql(
"""
SELECT name,
ST_AsText(ST_Simplify(geom, 0.05)) AS simplified,
ST_AsText(ST_Envelope(geom)) AS envelope
FROM samples
WHERE name LIKE 'Polygon%'
"""
)
6.12.3. Overlay Operations#
con.sql(
"""
WITH a AS (
SELECT ST_GeomFromText('POLYGON((0 0, 3 0, 3 3, 0 3, 0 0))') AS g
), b AS (
SELECT ST_GeomFromText('POLYGON((2 -1, 5 -1, 5 2, 2 2, 2 -1))') AS g
)
SELECT ST_AsText(ST_Intersection(a.g, b.g)) AS inter,
ST_AsText(ST_Difference(a.g, b.g)) AS only_a,
ST_AsText(ST_Union(a.g, b.g)) AS unioned
FROM a, b;
"""
)
6.13. Geometry Validity and Robustness#
6.13.1. Understanding Geometry Validity#
6.13.2. Testing and Repairing Geometry Validity#
6.13.2.1. Example 1: Self-Intersecting Polygon (Bow-Tie Shape)#
con.sql(
"""
-- Create a table with a valid and an invalid polygon
CREATE TABLE polygons AS
SELECT
1 AS id,
ST_GeomFromText('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))') AS geom
UNION ALL
SELECT
2 AS id,
ST_GeomFromText('POLYGON((0 0, 1 1, 1 0, 0 1, 0 0))') AS geom; -- self-intersecting "bow-tie"
"""
)
con.sql(
"""
-- Check which geometries are valid
SELECT
id,
ST_AsText(geom) AS wkt,
ST_IsValid(geom) AS is_valid
FROM polygons;
"""
)
con.sql(
"""
-- Fix invalid geometries using ST_MakeValid()
SELECT
id,
ST_IsValid(geom) AS before_valid,
ST_IsValid(ST_MakeValid(geom)) AS after_valid,
ST_AsText(ST_MakeValid(geom)) AS repaired_geom
FROM polygons;
"""
)