- We create a random set of the points (n=1000000)
Drop table if exists points;
CREATE TABLE points AS
select ST_GeoHash(the_geom) as geohash, ST_SetSRID(the_geom,4326) as the_geom from
(select (st_dump(ST_GeneratePoints(ST_MakeEnvelope(-180, -90, 180, 90),1000000))).geom as the_geom ) t1 ;
- Lets create a random set of the boxes (n=1000)
- We can generate a random number between min/max
- select rand()*(@upper-@lower)+@lower;
SELECT random()*(180+180)-180;
SELECT random()*(90+90)-90;
Drop table if exists boxes;
CREATE TABLE boxes AS
SELECT ST_SetSRID(ST_MakeEnvelope(x-5, y-5, x+5, y+5),4326) AS the_geom
FROM (SELECT 360*random()-180 AS x, 180*random()-90 AS y
FROM generate_series(1, 1000)) as t1
- Check if we already have spatial indexes enabled on our table
SELECT *
FROM pg_indexes
WHERE tablename = 'boxes';
SELECT *
FROM pg_indexes
WHERE tablename = 'points';
List of all the functions that can make use of a spatial index:
ST_Intersects()
ST_Contains()
ST_Within()
ST_DWithin()
ST_ContainsProperly()
ST_CoveredBy()
ST_Covers()
ST_Overlaps()
ST_Crosses()
ST_DFullyWithin()
ST_3DIntersects()
ST_3DDWithin()
ST_3DDFullyWithin()
ST_LineCrossingDirection()
ST_OrderingEquals()
ST_Equals()
And for a bonus there are also a few operators that access spatial indexes.
- geom_a && geom_b returns true if the bounding box of geom_a intersects the bounding box of geom_b in 2D space.
- geom_a &&& geom_b returns true if the bounding box of geom_a intersects the bounding box of geom_b in ND space (an ND index is required for this to be index assisted),
SELECT Count(*)
FROM points, boxes
WHERE points.the_geom && boxes.the_geom;
WAIT!!!!
For the PostGIS geometry type alone, there are actually 10 different operator classes!
Read More here if you are interested: https://habr.com/en/company/postgrespro/blog/446624/
- create GIST index
CREATE INDEX boxes_geom_x
ON boxes
USING GIST (the_geom);
CREATE INDEX points_geom_x
ON points
USING GIST (the_geom);
SELECT *
FROM pg_indexes
WHERE tablename = 'boxes';
SELECT *
FROM pg_indexes
WHERE tablename = 'points';
-- Run the same query again
SELECT Count(*)
FROM points, boxes
WHERE points.the_geom && boxes.the_geom;
lets drop the index from the tables and create another index
DROP INDEX boxes_geom_x;
DROP INDEX points_geom_x;
CREATE INDEX boxes_geom_x
ON boxes
USING SPGIST (the_geom);
CREATE INDEX points_geom_x
ON points
USING SPGIST (the_geom);
And finally test the queries again
SELECT Count(*)
FROM points, boxes
WHERE points.the_geom && boxes.the_geom;
read more:https://www.postgresql.org/docs/current/spgist.html
- Some basic considerations:
-GIST has full operator support, including (k)NN searches
-SP-GIST doesn't support (k)NN as of yet, and supports fewer operators (which is probably not a real issue, though)
-GIST isn't overly sensitive to the spatial distribution (homogeneous/consistently spaced vs heterogeneous/blobs of geometries) and the topology (many overlaps vs isolated distribution) of your geometries
-SP_GIST is most effective for non-overlapping geometries, and boost searches for spatially homogeneous distributions, due to its Spatial Partitioning
-GIST creation time is rising slightly non-linear with the amount of data it has to ingest, but has a an overall stable increase (ballpark figure: 20 minutes for 100 million rows (points; global distribution))
-SP_GIST is likely faster for smaller amount of data, but tends to have a significant performance drop after a few hundred million geometries compared to GIST
-GIST indexes have a non-trivial storage impact (ballpark figure: 5GB for 100 million geometries), but only BRIN indexes really make a difference here
-SP-GIST has a few percentages less space requirement
credits :https://gis.stackexchange.com/questions/374091/when-to-use-gist-and-when-to-use-sp-gist-index
lets create a geohash column
create table geohash_tbl as ( select ST_GeoHash(the_geom), * from public.points);
select distinct st_geohash, id, ST_GeomFromGeoHash(st_geohash,8) as poly from geohash_tbl
select distinct st_geohash, id, ST_GeomFromGeoHash(st_geohash,8) as poly from geohash_tbl
select * from geohash_tbl where st_geohash ~>=~ 'dpwz5cdj7g5t39t4ydyw' and st_geohash ~<=~ 'dpwzjh0wq3cuvkgpsgzm';
select distinct LEFT(st_geohash, 4) as gh, ST_Collect(geohash_tbl.the_geom) AS geom from geohash_tbl
group by gh
SELECT COUNT(geohash), ST_GeoHash((the_geom),2)
FROM (
SELECT geohash, the_geom FROM public.points
WHERE ST_Contains((ST_MakeEnvelope(-29.79, 16.38, 64.05, 90.26, 4326)), the_geom)
) AS bbox
GROUP BY ST_GeoHash((the_geom),2)
CREATE OR REPLACE FUNCTION ST_GeoHashAdjacent(
IN ref_hash TEXT,
IN direction TEXT,
OUT neighbor TEXT
) LANGUAGE 'plpgsql' IMMUTABLE STRICT AS
$FUNCTION$
DECLARE
_lu_d TEXT := 'n s e w';
_b_32 TEXT[] := ARRAY[
'0','1','2','3','4','5','6','7','8','9','b','c','d','e','f','g','h','j','k','m','n','p','q','r','s','t','u','v','w','x','y','z'
];
_g_nb TEXT[] := ARRAY[
'p0r21436x8zb9dcf5h7kjnmqesgutwvy', 'bc01fg45238967deuvhjyznpkmstqrwx',
'14365h7k9dcfesgujnmqp0r2twvyx8zb', '238967debc01fg45kmstqrwxuvhjyznp',
'bc01fg45238967deuvhjyznpkmstqrwx', 'p0r21436x8zb9dcf5h7kjnmqesgutwvy',
'238967debc01fg45kmstqrwxuvhjyznp', '14365h7k9dcfesgujnmqp0r2twvyx8zb'
];
_g_bd TEXT[] := ARRAY[
'prxz', 'bcfguvyz',
'028b', '0145hjnp',
'bcfguvyz', 'prxz',
'0145hjnp', '028b'
];
_h_pf TEXT := LEFT($1, -1);
_h_lc TEXT := RIGHT($1, 1);
_h_tp INT := LENGTH($1) % 2;
__ai INT := STRPOS(_lu_d, $2) + _h_tp;
BEGIN
If STRPOS(_g_bd[__ai], _h_lc) > 0 AND _h_pf <> ''
THEN _h_pf := ST_GeoHashAdjacent(_h_pf, $2);
END IF;
neighbor := _h_pf || _b_32[STRPOS(_g_nb[__ai], _h_lc)];
END;
$FUNCTION$
;
CREATE OR REPLACE FUNCTION ST_GeoHashNeighbors(
IN center_hash TEXT,
OUT neighbors TEXT[]
) LANGUAGE 'plpgsql' IMMUTABLE STRICT AS
$FUNCTION$
BEGIN
neighbors := ARRAY[
ST_GeoHashAdjacent(ST_GeoHashAdjacent($1, 'n'), 'w'),
ST_GeoHashAdjacent($1, 'n'),
ST_GeoHashAdjacent(ST_GeoHashAdjacent($1, 'n'), 'e'),
ST_GeoHashAdjacent($1, 'e'),
ST_GeoHashAdjacent(ST_GeoHashAdjacent($1, 's'), 'e'),
ST_GeoHashAdjacent($1, 's'),
ST_GeoHashAdjacent(ST_GeoHashAdjacent($1, 's'), 'w'),
ST_GeoHashAdjacent($1, 'w')
];
END;
$FUNCTION$
;
select ST_GeoHashAdjacent('dpwz5cd', 'n')