-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlanduse_without_buildings.sql
478 lines (438 loc) · 14.5 KB
/
landuse_without_buildings.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
\echo >>> Keep only residential landuse
-- There are many residential landuse areas that are large and that are
-- overlapped by parks, forests, etc. We need to subtract them first.
DROP TABLE IF EXISTS landuse_residential;
CREATE TABLE landuse_residential AS
SELECT area_id, landuse, geom FROM landuse
WHERE landuse IN ('residential', 'farmyard')
;
CREATE INDEX ON landuse_residential USING GIST(geom);
DROP TABLE IF EXISTS landuse_unwanted;
CREATE TABLE landuse_unwanted AS
SELECT area_id, landuse, geom FROM landuse
WHERE landuse NOT IN ('residential', 'farmyard')
UNION ALL
SELECT area_id, kind AS unwanted, geom FROM unwanted
;
CREATE INDEX ON landuse_unwanted USING GIST(geom);
DROP TABLE IF EXISTS landuse_trimmed;
CREATE TABLE landuse_trimmed AS
SELECT
area_id,
landuse,
COALESCE(
ST_Difference(
l.geom,
unwanted.geom
),
l.geom
) geom
FROM landuse_residential l
CROSS JOIN LATERAL
(
SELECT ST_Union(u.geom) AS geom
FROM landuse_unwanted u
WHERE ST_Intersects(l.geom, u.geom)
) unwanted
;
CREATE INDEX ON landuse_trimmed USING GIST(geom);
\echo >>> Reproject landuse and calculate area
-- This takes way too long when using geography, so instead use LAEA
-- pseudo-meters. Should be good enough when only looking at the ratio of
-- pseudo-squaremeters to pseudo-squaremeters on a local scale.
-- UPDATE and adding a column will rewrite the table anyway. It's faster to
-- create a new table.
--
-- Need to add a "0 buffer" trick to fix some invalid geometries after
-- subtracting. Otherwise any of the next steps might fail randomly.
BEGIN;
CREATE TABLE landuse_area AS
SELECT area_id, landuse, ST_Area(ST_Transform(geom, 3035)) AS area,
ST_Buffer(ST_Transform(geom, 3035), 0) AS geom FROM landuse_trimmed;
ALTER TABLE landuse_area ADD PRIMARY KEY (area_id);
CREATE INDEX ON landuse_area USING GIST(geom);
DROP TABLE landuse_trimmed;
ALTER TABLE landuse_area RENAME TO landuse_trimmed;
COMMIT;
\echo >>> Reproject buildings and calculate area
BEGIN;
CREATE TABLE building_area AS
SELECT area_id, building, ST_Area(ST_Transform(geom, 3035)) AS area,
ST_Transform(geom, 3035) AS geom FROM building;
ALTER TABLE building_area ADD PRIMARY KEY (area_id);
CREATE INDEX ON building_area USING GIST(geom);
DROP TABLE building;
ALTER TABLE building_area RENAME TO building;
COMMIT;
\echo >> Delete highways that should not be used for splitting, especially footways and cycleways
-- Using a positive list, as there are many weird values
DELETE FROM highway
WHERE highway NOT IN (
'motorway', 'motorway_link', 'trunk', 'trunk_link',
'primary', 'primary_link', 'secondary', 'secondary_link',
'tertiary', 'tertiary_link', 'unclassified', 'residential', 'service',
'track', 'living_street', 'road', 'busway'
)
;
\echo >>> Reproject highways
BEGIN;
CREATE TABLE highway_reprojected AS
SELECT way_id, highway, ST_Transform(geom, 3035) AS geom FROM highway;
ALTER TABLE highway_reprojected ADD PRIMARY KEY (way_id);
CREATE INDEX ON highway_reprojected USING GIST(geom);
DROP TABLE highway;
ALTER TABLE highway_reprojected RENAME TO highway;
COMMIT;
\echo >>> Split landuse areas by highways
DROP TABLE IF EXISTS landuse_split CASCADE;
CREATE TABLE landuse_split AS
WITH split AS (
SELECT
l.area_id,
CASE
WHEN l.area_id >= 0 THEN 'https://osm.org/way/' || l.area_id
ELSE 'https://osm.org/relation/' || -l.area_id
END AS osm_id,
(ST_Dump(ST_Split(l.geom, ST_Collect(h.geom)))).geom,
ST_Centroid(l.geom) AS centroid
FROM
landuse_trimmed l
CROSS JOIN LATERAL (
SELECT * FROM highway h WHERE ST_Intersects(l.geom, h.geom)
) h
WHERE l.area >= 25000
GROUP BY l.area_id, l.geom
UNION ALL
SELECT
l.area_id,
CASE
WHEN l.area_id >= 0 THEN 'https://osm.org/way/' || l.area_id
ELSE 'https://osm.org/relation/' || -l.area_id
END AS osm_id,
l.geom,
ST_Centroid(l.geom) AS centroid
FROM
landuse_trimmed l
LEFT JOIN highway h ON ST_Intersects(l.geom, h.geom)
WHERE
l.area >= 25000 AND
h.geom IS NULL
)
SELECT
ROW_NUMBER() OVER (ORDER BY s.area_id, ST_Azimuth(s.centroid, ST_Centroid(s.geom))) - 1 AS id,
s.osm_id,
s.geom
FROM split s
;
ALTER TABLE landuse_split ADD PRIMARY KEY (id);
CREATE INDEX ON landuse_split(osm_id);
CREATE INDEX ON landuse_split USING GIST(geom);
VACUUM ANALYZE landuse_split;
\echo >>> Join sliver polygons (very narrow polygons with small area) with their neighbor
-- Step 1: For each sliver polygon (defined by ST_MaximumInscribedCircle) find
-- the non-sliver neighbor with the longest shared border
DROP TABLE IF EXISTS sliver_neighbor;
CREATE TABLE sliver_neighbor AS
SELECT
id_small_poly,
id_neighbor
FROM
(
SELECT
a.id AS id_small_poly,
b.id AS id_neighbor,
-- ST_Intersection is guaranteed to be a (multi)linestring because
-- we check for (only) overlapping borders in ST_Relate
RANK() OVER (
PARTITION BY a.id
ORDER BY ST_Length(ST_Intersection(a.geom, b.geom)) DESC
) AS neighbor_rank
FROM landuse_split a
CROSS JOIN LATERAL (
SELECT *
FROM landuse_split b
WHERE
-- coming from the same source OSM area
a.osm_id = b.osm_id AND
-- Shared boundary line, but not just a corner
ST_Relate(a.geom, b.geom, 'FF2F1*212')
) b
WHERE
-- Requires PostGIS >= 3.1
(ST_MaximumInscribedCircle(a.geom)).radius < 30
) all_neighbors
WHERE
neighbor_rank = 1;
-- Now we want to merge slivers with their "largest" neighbor. We need to
-- identify the transitive closures first, because there may be chains of
-- neighbors sliver -> sliver -> regular, or circles sliver <-> sliver.
DROP TABLE IF EXISTS transitive_group;
-- Label every polygon with the ID of their transitive group.
-- Label 0 = Not yet touched in the algorithm
CREATE TABLE transitive_group (
id INTEGER PRIMARY KEY,
label INTEGER NOT NULL DEFAULT 0
);
CREATE INDEX ON transitive_group(label);
DO $funcBody$
DECLARE
label1 integer;
label2 integer;
nextlabel integer;
loop_counter integer;
total_count integer;
pair sliver_neighbor%rowtype;
BEGIN
DELETE FROM transitive_group;
INSERT INTO transitive_group(id)
SELECT DISTINCT unnest(array[id_small_poly, id_neighbor])
FROM sliver_neighbor ORDER BY 1;
nextlabel := 0;
loop_counter := 0;
SELECT COUNT(*) INTO total_count FROM sliver_neighbor;
FOR pair IN SELECT * FROM sliver_neighbor
LOOP
IF loop_counter % 10000 = 0 THEN
RAISE NOTICE '>>> Sliver neighbors progress: % %%',
(loop_counter::double precision / total_count * 100.0)::int;
END IF;
loop_counter := loop_counter + 1;
SELECT label INTO label1 FROM transitive_group WHERE id = pair.id_small_poly;
SELECT label INTO label2 FROM transitive_group WHERE id = pair.id_neighbor;
IF label1 = 0 AND label2 = 0 THEN
-- Both not yet seen: Start a new group
nextlabel := nextlabel+1;
UPDATE transitive_group SET label = nextlabel WHERE id in (pair.id_small_poly, pair.id_neighbor);
ELSIF label1 = 0 AND label2 != 0 THEN
-- One of them seen: Assign the same label to the other one
UPDATE transitive_group SET label = label2 WHERE id = pair.id_small_poly;
ELSIF label1 != 0 AND label2 = 0 THEN
-- One of them seen: Assign the same label to the other one
UPDATE transitive_group SET label = label1 WHERE id = pair.id_neighbor;
ELSIF label1 != label2 THEN
-- Found the connection between two groups: Merge them
UPDATE transitive_group SET label = label1 WHERE label = label2;
END IF;
END LOOP;
END;
$funcBody$ LANGUAGE plpgsql;
DROP TABLE IF EXISTS max_per_group;
CREATE TABLE max_per_group AS
SELECT max(id) AS max_id, label
FROM transitive_group t
GROUP BY LABEL
;
ALTER TABLE max_per_group ADD PRIMARY KEY (label);
CREATE INDEX ON max_per_group(max_id);
-- Now merge all polygons that are in the same group. Use the polygon with the
-- largest ID (it's not important which one out of the group) and overwrite its
-- geometry with the union of the group. Delete all other polygons in the group.
UPDATE landuse_split l
SET geom = merged.geom
FROM
max_per_group m
CROSS JOIN LATERAL
(
SELECT m.max_id AS id, ST_Union(s.geom) AS geom
FROM landuse_split s, transitive_group t
WHERE
m.label = t.label AND
t.id = s.id
GROUP BY m.max_id
) merged
WHERE
l.id = m.max_id
;
DELETE FROM landuse_split l
USING
max_per_group m,
transitive_group t
WHERE
l.id = t.id AND
t.label = m.label AND
l.id != m.max_id
;
\echo >>> Calculate area of polygons
ALTER TABLE landuse_split ADD COLUMN area float;
UPDATE landuse_split SET area = ST_Area(geom);
\echo >>> Cut up too large polygons
-- Add sequence to generate new, unique IDs when inserting split polygons
CREATE SEQUENCE landuse_split_id_seq
OWNED BY landuse_split.id
;
SELECT setval('landuse_split_id_seq', (SELECT MAX(id) FROM landuse_split));
CREATE OR REPLACE FUNCTION split_polygon_in_half(
geom geometry
)
RETURNS geometry(Geometrycollection)
AS $funcBody$
DECLARE
bbox geometry(Linestring);
blade geometry(Linestring);
blade_elongated geometry(Linestring);
elongation_factor double precision := 1.5;
BEGIN
SELECT ST_Boundary(ST_OrientedEnvelope(geom)) INTO bbox;
SELECT
ST_OffsetCurve(
ST_MakeLine( -- short edge
ST_PointN(bbox, 1),
ST_PointN(bbox, 2)
),
ST_Distance(
-- long edge
ST_PointN(bbox, 2),
ST_PointN(bbox, 3)
) / 2
)
INTO blade;
-- Need to extend the cut line in case it ends exactly on the polygon
-- boundary. In that case no cut would be performed.
SELECT
ST_Affine(blade,
elongation_factor, 0,
0, elongation_factor,
(elongation_factor - 1) * -ST_X(ST_LineInterpolatePoint(blade, 0.5)),
(elongation_factor - 1) * -ST_Y(ST_LineInterpolatePoint(blade, 0.5))
)
INTO blade_elongated;
RETURN ST_Split(geom, blade_elongated);
END
$funcBody$
LANGUAGE plpgsql
;
CREATE OR REPLACE FUNCTION split_too_large_polygons()
RETURNS void
AS $funcBody$
BEGIN
DROP TABLE IF EXISTS landuse_too_big;
CREATE TABLE landuse_too_big AS
SELECT
osm_id,
id AS original_id,
-- new_id doesn't have to be unique. It's only used to get a
-- reproducable order when regenerating the IDs in the next step.
id + (dump).path[1] AS new_id,
(dump).geom
FROM (
SELECT
osm_id,
id,
geom,
ST_Dump(split_polygon_in_half(geom)) AS dump
FROM landuse_split
WHERE area > 100000
) polygon_halves
;
DELETE FROM landuse_split s
USING landuse_too_big b
WHERE
s.id = b.original_id
;
INSERT INTO landuse_split(osm_id, geom, id, area)
SELECT
osm_id,
geom,
nextval('landuse_split_id_seq') AS id,
ST_Area(geom) AS area
FROM landuse_too_big
ORDER BY osm_id, new_id
;
END;
$funcBody$
LANGUAGE plpgsql
;
-- Split too big polygons recursively, until none remain
DO $loopBody$
DECLARE
polygon_count integer;
BEGIN
SELECT COUNT(*) INTO polygon_count FROM landuse_split WHERE area >= 100000;
WHILE polygon_count > 0
LOOP
RAISE NOTICE '>>> Polygons to split: %', polygon_count;
PERFORM split_too_large_polygons();
SELECT COUNT(*) INTO polygon_count FROM landuse_split WHERE area >= 100000;
END LOOP;
END;
$loopBody$ LANGUAGE plpgsql
;
\echo >>> Delete any areas outside Germany
-- Using a temporary table was way faster than a sub-select
CREATE TABLE germany AS
SELECT ST_Transform(geom, 3035) AS geom
FROM administrative
WHERE admin_level='2' AND "name"='Deutschland'
;
CREATE INDEX ON germany USING GIST(geom);
DELETE
FROM landuse_split l
USING germany g
WHERE NOT ST_Intersects(l.geom, g.geom)
;
\echo >>> Regenerate IDs after removals
ALTER TABLE landuse_split ADD COLUMN new_id text;
UPDATE landuse_split l
SET new_id = l.osm_id || '//' || rank_in_group
FROM (
SELECT l2.id,
row_number() OVER (PARTITION BY l2.osm_id ORDER BY l2.osm_id, l2.id) AS rank_in_group
FROM landuse_split l2
) renumber
WHERE l.id = renumber.id
;
ALTER TABLE landuse_split DROP COLUMN id CASCADE;
ALTER TABLE landuse_split RENAME COLUMN new_id TO id;
ALTER TABLE landuse_split ADD PRIMARY KEY(id);
\echo >>> Intersect with buildings to get the area fraction
ALTER TABLE landuse_split ADD COLUMN building_fraction float;
UPDATE landuse_split l
SET building_fraction = f.building_fraction
FROM (
SELECT
l2.id,
SUM(b.area) / l2.area AS building_fraction
FROM
landuse_split l2,
building b
WHERE ST_Intersects(l2.geom, b.geom)
GROUP BY l2.id
) f
WHERE l.id = f.id
;
UPDATE landuse_split SET building_fraction = 0 WHERE building_fraction IS NULL;
\echo >>> Number of landuse areas by area and building fraction:
SELECT
CASE
WHEN area_rounded >= 100000 THEN '≥ 100,000'
ELSE to_char(area_rounded, '999,999')
END AS "area [m²]",
CASE
WHEN building_fraction_rounded >= 0.1 THEN '≥ 10 %'
ELSE ROUND(building_fraction_rounded * 100) || ' %'
END AS building_fraction,
COUNT(*)
FROM (
SELECT
CASE
WHEN area > 100000 THEN 100000
ELSE floor(area / 5000) * 5000
END AS area_rounded,
CASE
WHEN building_fraction > 0.1 THEN 0.1
ELSE floor(building_fraction / 0.01) * 0.01
END AS building_fraction_rounded
FROM
landuse_split ls
WHERE
area >= 500
) rounded
GROUP BY area_rounded, building_fraction_rounded
ORDER BY area_rounded, building_fraction_rounded
\crosstabview
;
\echo >>> Median size of landuse areas with building fraction = 0:
SELECT percentile_cont(0.5) WITHIN GROUP(ORDER BY area) AS median
FROM landuse_split
WHERE building_fraction = 0
;