Created
January 26, 2024 01:09
-
-
Save kissmygritts/cf6f847e598254541859cab32db9eca9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import annotations | |
from enum import Enum | |
from dataclasses import dataclass | |
from abc import ABC, abstractstaticmethod | |
import geopandas as gpd | |
# The current feature catalog class is doing a lot and feels overloaded. | |
# it's trying to be a factory/formatter class and a dataset class. I think | |
# we can split the dataset classes into a few separate concrete implementations | |
# of a FeatureCatalog abstract base class. This class will have it's own | |
# loader (as an abstract method) that creates an instance of itself. | |
# | |
# My current understanding is each FeatureCatalog class uses the same inventory | |
# file, but parses that inventory file differently for each class. This makes | |
# a lot of the code more difficult to reason about. For example the LiveDeadCatalog | |
# needs several months of sentinel data in the FeatureCatalog. The CanopyCoverCatalog | |
# needs only 2 months of sentinel data. | |
# | |
# Then create a FeatureCatalogFactor to do the intial loading of the S3 inventory | |
# file. Parsing this inventory file into each of the catalogs is delegated to | |
# the correct FeatureCatalog class. The factory will instantiate the dataset class. | |
class CatalogType(Enum): | |
CANOPY_COVER = "canopy_cover" | |
CHM = "chm" | |
LIVE_DEAD = "live_dead" | |
@dataclass | |
class FeatureCatalog(ABC): | |
gdf: gpd.GeodDataFrame | |
@abstractstaticmethod | |
def format(): | |
pass | |
def n_features(self) -> int: | |
# common feature catalog methods | |
return len(self.gdf) | |
@dataclass | |
class CanopyCoverFeatureCatalog(FeatureCatalog): | |
def format() -> FeatureCatalog: | |
# add concrete implementation here, i.e. the formatting logic | |
# that creates a canopy cover feature catalog | |
pass | |
@dataclass | |
class CHMFeatureCatalog(FeatureCatalog): | |
def format() -> FeatureCatalog: | |
# add concrete implementation here, i.e. the formatting logic | |
# that creates a chm feature catalog | |
pass | |
class FeatureCatalogFactory: | |
def __init__(self, inventory_url: str | None): | |
self._full_inventory = None | |
self._inventory_url = inventory_url | |
self.load_custom_inventory() | |
def load_custom_inventory(self) -> None: | |
# some initialization logic if necessary, this can be used | |
# in the init or by outside callers | |
if self._inventory_url: | |
self._full_inventory = gpd.read_file(self._inventory_url) | |
def create(self, catalog_type: CatalogType) -> FeatureCatalog: | |
# do something with inventory URL if needed | |
return self._catalog_formatters[catalog_type].format() | |
@staticmethod | |
def _catalog_class_map() -> dict[CatalogType, FeatureCatalog]: | |
# replace key with an enum and then we have better type safety | |
return { | |
CatalogType.CANOPY_COVER: CanopyCoverFeatureCatalog, | |
CatalogType.CHM: CHMFeatureCatalog, | |
} | |
# usage | |
# defualt behavior | |
catalog_formatter = FeatureCatalogFactory() | |
canopy_cover_catalog: CanopyCoverFeatureCatalog = catalog_formatter(CatalogType.CANOPY_COVER) | |
# use a custom inventory file | |
catalog_formatter = FeatureCatalogFactory("s3://vp-eng-test-data/inventory/inventory.csv") | |
chm_catalog: CHMFeatureCatalog = catalog_formatter(CatalogType.CHM) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment