Last active
May 9, 2024 13:52
-
-
Save asinghvi17/6cd96854b315f1beeec68cda953d0663 to your computer and use it in GitHub Desktop.
Hook into RCall.jl to convert SF geometries to GeoInterface wrappers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#= | |
This file provides integration for R's SF objects | |
which are essentially geo-dataframes, | |
into Julia DataFrames with GeoInterface wrapper | |
geometries. | |
This requires https://github.com/JuliaInterop/RCall.jl/pull/528, | |
so run `Pkg.add((; url = "https://github.com/asinghvi17/RCall.jl", rev = "patch-1"))` to get that. | |
TODO: | |
- Evaluate all of this in a separate R environment (see `RCall.newEnvironment` in https://github.com/JuliaInterop/RCall.jl/blob/15178034400d449fb7500663717a9165c77ba020/src/methods.jl#L564-L574) | |
- Test on all sorts of shapefiles/input | |
- Examples of using `tigris` for this. | |
=# | |
using RCall | |
import RCall: rcopy, rcopytype, sexp | |
import OrderedCollections | |
R"library(sf)" | |
# Import geographic libraries | |
import GeoFormatTypes as GFT | |
import GeoInterface as GI | |
# Set up data | |
nc = R""" | |
st_read(system.file("shape/nc.shp", package="sf")) | |
""" | |
nc |> RCall.getclass # 2 element vector | |
g1 = R"$nc$geometry[1]" | |
# tests | |
R"library(tigris)" | |
R"library(tidycensus)" | |
R"""census_api_key("$(ENV["CENSUS_API_KEY"])")""" | |
manhattan_roads = rcopy((R"""roads("NY", "New York")""")) | |
orange = R""" | |
get_acs( | |
state = "CA", | |
county = "Orange", | |
geography = "tract", | |
variables = "B19013_001", | |
geometry = TRUE, | |
year = 2020 | |
) | |
""" |> rcopy | |
using DataFrames | |
nm_orange = dropmissing(orange) | |
f, a, p = poly(nm_orange.geometry; color = nm_orange.estimate, axis = (; title = "B19013_001 variable for Orange County, CA")) | |
Colorbar(f[1, 2], p; label = "Estimate") | |
f | |
# Now, you can copy any R geom directly... | |
R""" | |
p <- rbind(c(3.2,4), c(3,4.6), c(3.8,4.4), c(3.5,3.8), c(3.4,3.6), c(3.9,4.5)) | |
(mp <- st_multipoint(p)) | |
## MULTIPOINT ((3.2 4), (3 4.6), (3.8 4.4), (3.5 3.8), (3.4 3.6), (3.9 4.5)) | |
s1 <- rbind(c(0,3),c(0,4),c(1,5),c(2,5)) | |
(ls <- st_linestring(s1)) | |
## LINESTRING (0 3, 0 4, 1 5, 2 5) | |
s2 <- rbind(c(0.2,3), c(0.2,4), c(1,4.8), c(2,4.8)) | |
s3 <- rbind(c(0,4.4), c(0.6,5)) | |
(mls <- st_multilinestring(list(s1,s2,s3))) | |
p1 <- rbind(c(0,0), c(1,0), c(3,2), c(2,4), c(1,4), c(0,0)) | |
p2 <- rbind(c(1,1), c(1,2), c(2,2), c(1,1)) | |
pol <-st_polygon(list(p1,p2)) | |
p3 <- rbind(c(3,0), c(4,0), c(4,1), c(3,1), c(3,0)) | |
p4 <- rbind(c(3.3,0.3), c(3.8,0.3), c(3.8,0.8), c(3.3,0.8), c(3.3,0.3))[5:1,] | |
p5 <- rbind(c(3,3), c(4,2), c(4,3), c(3,3)) | |
(mpol <- st_multipolygon(list(list(p1,p2), list(p3,p4), list(p5)))) | |
## MULTIPOLYGON (((0 0, 1 0, 3 2, 2 4, 1 4, 0 0), (1 1, 1 2, 2 2, 1 1)), ((3 0, 4 0, 4 1, 3 1, 3 0), (3.3 0.3, 3.3 0.8, 3.8 0.8, 3.8 0.3, 3.3 0.3)), ((3 3, 4 2, 4 3, 3 3))) | |
(gc <- st_geometrycollection(list(mp, mpol, ls))) | |
""" | |
mp = R"mp" | |
ls = R"ls" | |
mls = R"mls" | |
pol = R"pol" | |
mpol = R"mpol" | |
# The way RCall works is that it iterates over the available classes, | |
# see the code in https://github.com/JuliaInterop/RCall.jl/blob/15178034400d449fb7500663717a9165c77ba020/src/convert/default.jl#L8-L22, | |
# and checks to see if any of those return a method which it can use in Julia. | |
# So, we will override first the RCall checking function, for `sf` class. | |
# | |
# Below is an example of how to do this for a multipolygon: | |
RCall.rcopytype(::Type{RCall.RClass{:sfc_MULTIPOLYGON}}, s::Ptr{RCall.VecSxp}) = Vector{GI.MultiPolygon} | |
RCall.rcopytype(::Type{RCall.RClass{:sfc_POLYGON}}, s::Ptr{RCall.VecSxp}) = Vector{GI.Polygon} | |
RCall.rcopytype(::Type{RCall.RClass{:sfc_LINESTRING}}, s::Ptr{RCall.VecSxp}) = Vector{GI.LineString} | |
RCall.rcopytype(::Type{RCall.RClass{:sfc_MULTILINESTRING}}, s::Ptr{RCall.VecSxp}) = Vector{GI.MultiLineString} | |
RCall.rcopytype(::Type{RCall.RClass{:sfc_MULTIPOINT}}, s::Ptr{RCall.VecSxp}) = Vector{GI.MultiPoint} | |
RCall.rcopytype(::Type{RCall.RClass{:sfc_POINT}}, s::Ptr{RCall.VecSxp}) = Vector{GI.Point} | |
# We can distinguish between `sfc` and `sfg` types, | |
# by specifying Z and M in different places. | |
# For sfc, we leave it unspecified, or the alternative | |
# which is better is that we can specify that they should be | |
# copied to vectors. | |
_tuplepointtype(::GI.Point{hasZ, hasM}) where {hasZ, hasM} = GI.Point{hasZ, hasM, NTuple{2+hasZ+hasM, Float64}, Nothing} | |
_tuplepointtype(::GI.LineString{hasZ, hasM}, crs::CRSType = nothing) where {hasZ, hasM, CRSType} = GI.LineString{hasZ, hasM, Vector{GI.Point{hasZ, hasM, Tuple{Float64, Float64}, Nothing}}, Nothing, Nothing} | |
function RCall.rcopy(::Type{Vector{GI.MultiPolygon}}, mpol_sfc::Ptr{VecSxp}) | |
# First, get the metadata and understand what's going on | |
crs = get_sf_crs(mpol_sfc) | |
# Now, get the `sfg` object out from the `sfc` object: | |
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(mpol_sfc) | |
LineStringType = GI.LineString{hasZ, hasM, Vector{GI.Point{hasZ, hasM, Tuple{Float64, Float64}, Nothing}}, Nothing, Nothing} | |
PolyType = GI.Polygon{hasZ, hasM, Vector{LineStringType}, Nothing, typeof(crs)} | |
multipolys = [ # The reason we need this is that GI wrapper geoms don't currently support empty geometries... | |
GI.MultiPolygon{hasZ, hasM, Vector{PolyType}, Nothing, typeof(crs)}( | |
[PolyType(LineStringType[ | |
GI.LineString(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls))) | |
for ls in pol | |
], | |
nothing, crs | |
) | |
for pol in mpol | |
], | |
nothing, crs | |
) for mpol in mpol_sfc | |
] | |
return multipolys | |
end | |
function RCall.rcopy(::Type{Vector{GI.Polygon}}, pol_sfc::Ptr{VecSxp}) | |
# First, get the metadata and understand what's going on | |
crs = get_sf_crs(pol_sfc) | |
# Now, get the `sfg` object out from the `sfc` object: | |
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(pol_sfc) | |
polys = [GI.Polygon([ GI.LineString(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls))) for ls in pol]; crs) for pol in pol_sfc] | |
return polys | |
end | |
function RCall.rcopy(::Type{Vector{GI.Polygon}}, mls_sfc::Ptr{VecSxp}) | |
# First, get the metadata and understand what's going on | |
crs = get_sf_crs(mls_sfc) | |
# Now, get the `sfg` object out from the `sfc` object: | |
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(mls_sfc) | |
mls = [GI.MultiLineString([ GI.LineString(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls))) for ls in mls_sfc]; crs) for pol in mls_sfc] | |
return mls | |
end | |
function RCall.rcopy(::Type{Vector{GI.LineString}}, ls_sfc::Ptr{VecSxp}) | |
# First, get the metadata and understand what's going on | |
crs = get_sf_crs(ls_sfc) | |
# Now, get the `sfg` object out from the `sfc` object: | |
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(ls_sfc) | |
linestrings = [ GI.LineString(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls)); crs) for ls in ls_sfc] | |
return linestrings | |
end | |
function RCall.rcopy(::Type{Vector{GI.MultiPoint}}, ls_sfc::Ptr{VecSxp}) | |
# First, get the metadata and understand what's going on | |
crs = get_sf_crs(ls_sfc) | |
# Now, get the `sfg` object out from the `sfc` object: | |
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(ls_sfc) | |
linestrings = [ GI.MultiPoint(_pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls)); crs) for ls in ls_sfc] | |
return linestrings | |
end | |
function RCall.rcopy(::Type{Vector{GI.Point}}, point_sfc::Ptr{VecSxp}) | |
# First, get the metadata and understand what's going on | |
crs = get_sf_crs(ls_sfc) | |
# Now, get the `sfg` object out from the `sfc` object: | |
hasZ, hasM = _has_z_m_from_first_geometry_of_sfc(ls_sfc) | |
points = [ _pointify_matrix_by_row(GI.Point{hasZ, hasM}, rcopy(Matrix{Float64}, ls)) for ls in ls_sfc] | |
return GI.Point.(points; crs) # re-wrap the points, but with CRS. | |
end | |
function _has_z_m_from_first_geometry_of_sfc(geom) | |
fg = first(geom) # the sf object can hold only one sfg geometry in this case! | |
ndims_string = rcopy(String, first(RCall.getclass(fg))) # this is where dimensions are always held | |
hasZ, hasM = if ndims_string == "XY" | |
false, false | |
elseif ndims_string == "XYZ" | |
true, false | |
elseif ndims_string == "XYZM" | |
true, true | |
elseif ndims_string == "XYM" | |
false, true | |
else | |
error("Unknown dimensions: $ndims_string") | |
end | |
return hasZ, hasM | |
end | |
# The constraints should evaluate statically, and the allocations | |
# should also be constrained to a single point here. | |
# Ideally, we would actually do this at the time of copying...but oh well. | |
# That can always be optimized later! | |
function _pointify_matrix_by_row(point_type::Type{GI.Point{hasZ, hasM}},mat::Matrix) where {hasZ, hasM} | |
if hasZ && hasM | |
return @views point_type.(mat[:, 1], mat[:, 2], mat[:, 3], mat[:, 4]) | |
elseif hasZ | |
return @views point_type.(mat[:, 1], mat[:, 2], mat[:, 3]) | |
elseif hasM | |
return @views point_type.(mat[:, 1], mat[:, 2], mat[:, 3]) | |
else | |
return @views point_type.(mat[:, 1], mat[:, 2]) | |
end | |
end | |
function get_sf_crs(o::RCall.OrderedDict) | |
ks = keys(o) | |
return if :wkt in ks | |
GFT.ESRIWellKnownText(GFT.CRS(), o[:wkt]) | |
elseif :esri in ks | |
GFT.EPSG(o[:esri]) | |
else | |
@warn "No good CRS found in the dict, setting it to `nothing`" | |
nothing | |
end | |
end | |
function get_sf_crs(s::Ptr{RCall.VecSxp}) | |
crs_dict= RCall.rcopy(R"st_crs($(s))") | |
return get_sf_crs(crs_dict) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment