Skip to content

Commit

Permalink
major refactor for better use of collections
Browse files Browse the repository at this point in the history
  • Loading branch information
sid_terrafloww committed Jan 6, 2025
1 parent 97f34fa commit b0bca1c
Show file tree
Hide file tree
Showing 9 changed files with 550 additions and 291 deletions.
48 changes: 29 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ bbox = aoi1_polygon.union(aoi2_polygon).bounds

2. Configure Rasteret

Set up basic parameters for data collection:
Set up basic parameters for data collection, and check for existing collection
in your workspace directory, if they were created earlier.

```python
# Collection configuration
Expand All @@ -88,44 +89,51 @@ data_source = DataSources.LANDSAT
workspace_dir = Path.home() / "rasteret_workspace"
workspace_dir.mkdir(exist_ok=True)
)

# List existing collections
collections = Rasteret.list_collections()
for c in collections:
print(f"- {c['name']}: {c['data_source']}, {c['date_range']}, {c['size']} scenes")

```
3. Initialize and Create Collection

Set up Rasteret processor and create a local collection:
Create or Load a local collection:
Containing internal COG metadata of scenes, and its STAC metadata

```python
# Initialize processor
processor = Rasteret(
data_source=data_source,
output_dir=workspace_dir,
custom_name=custom_name,
date_range=date_range
)

# Create local collection if not exists
if processor._collection is None:
# Try loading existing collection
try:
# example name
processor = Rasteret.load_collection("bangalore_202401-12_landsat")
except ValueError:
# Create new collection
processor = Rasteret(
custom_name="bangalore",
data_source=DataSources.LANDSAT,
date_range=("2024-01-01", "2024-01-31")
)
processor.create_collection(
bbox=bbox,
date_range=date_range,
cloud_cover_lt=90,
platform={"in": ["LANDSAT_8"]}
cloud_cover_lt=20,
platform={"in": ["LANDSAT_8"]}
)
```

4. Query and Process Data
4. Query collection and Process Data

Query the collection and process data:

```python
# Query collection with filters
# Query collection with filters to get the data you want
ds = processor.get_xarray(
geometries=[aoi1_polygon,aoi2_polygon],
bands=["B4", "B5"],
cloud_cover_lt=20,
date_range=["2024-01-10", "2024-01-30"]
)

# returns an xarray dataset with the data for the geometries and bands specified

# Calculate NDVI
ndvi_ds = (ds.B5 - ds.B4) / (ds.B5 + ds.B4)
ndvi_ds = xr.Dataset(
Expand All @@ -134,13 +142,15 @@ ndvi_ds = xr.Dataset(
attrs=ds.attrs,
)

# Save results from xarray to geotiff files
# Save results from xarray to geotiff files, each geometry's data will be stored in
# its own folder
output_files = save_per_geometry(ndvi_ds, output_dir, file_prefix="ndvi", data_var="NDVI")

for geom_id, filepath in output_files.items():
print(f"Geometry {geom_id}: {filepath}")
```


## Why this library?

Details on why this library was made, and how it reads multiple COGs efficiently and fast -
Expand Down
91 changes: 47 additions & 44 deletions examples/basic_workflow_gdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,62 +3,65 @@
from shapely.geometry import Polygon

from rasteret import Rasteret

from rasteret.constants import DataSources

def main():

"""Demonstrate core workflows with Rasteret."""
# 1. Define parameters

custom_name = "bangalore3"
date_range = ("2024-01-01", "2024-01-31")
data_source = "landsat-c2l2-sr"

# 1. Setup workspace and parameters

workspace_dir = Path.home() / "rasteret_workspace"
workspace_dir.mkdir(exist_ok=True)

print("1. Defining Area of Interest")
print("--------------------------")

# Define area and time of interest
aoi_polygon = Polygon(
[(77.55, 13.01), (77.58, 13.01), (77.58, 13.08), (77.55, 13.08), (77.55, 13.01)]
)

aoi_polygon2 = Polygon(
[(77.56, 13.02), (77.59, 13.02), (77.59, 13.09), (77.56, 13.09), (77.56, 13.02)]
)

# get total bounds of all polygons above
bbox = aoi_polygon.union(aoi_polygon2).bounds

print("\n2. Creating and Loading Collection")
print("--------------------------")

# 2. Initialize processor - name generated automatically
processor = Rasteret(
custom_name=custom_name,
data_source=data_source,
output_dir=workspace_dir,
date_range=date_range,
)

# Create index if needed
if processor._collection is None:
processor.create_index(
bbox=bbox, date_range=date_range, query={"cloud_cover_lt": 20}
)
custom_name = "bangalore"
date_range = ("2024-01-01", "2024-01-31")
data_source = DataSources.LANDSAT

# List existing collections
collections = Rasteret.list_collections(dir=workspace_dir)
print("Available collections:")
# 2. List existing collections
print("1. Available Collections")
print("----------------------")
collections = Rasteret.list_collections(workspace_dir=workspace_dir)
for c in collections:
print(f"- {c['name']}: {c['size']} scenes")
print(f"- {c['name']}: {c['data_source']}, {c['date_range']}, {c['size']} scenes")

# 3. Define areas of interest
print("\n2. Defining Areas of Interest")
print("---------------------------")
aoi1_polygon = Polygon([
(77.55, 13.01), (77.58, 13.01), (77.58, 13.08),
(77.55, 13.08), (77.55, 13.01)
])
aoi2_polygon = Polygon([
(77.56, 13.02), (77.59, 13.02), (77.59, 13.09),
(77.56, 13.09), (77.56, 13.02)
])
bbox = aoi1_polygon.union(aoi2_polygon).bounds

# 4. Load or create collection
print("\n3. Loading/Creating Collection")
print("---------------------------")
try:
processor = Rasteret.load_collection(f"{custom_name}_202401_landsat")
except ValueError:
processor = Rasteret(
custom_name=custom_name,
data_source=data_source,
output_dir=workspace_dir,
date_range=date_range
)
processor.create_collection(
bbox=bbox,
date_range=date_range,
cloud_cover_lt=20,
platform={"in": ["LANDSAT_8"]}
)

print("\n3. Processing Data")
# 5. Process data
print("\n4. Processing Data")
print("----------------")

df = processor.get_gdf(
geometries=[aoi_polygon, aoi_polygon2], bands=["B4", "B5"], cloud_cover_lt=20
geometries=[aoi1_polygon, aoi2_polygon], bands=["B4", "B5"], cloud_cover_lt=20
)

print(f"Columns: {df.columns}")
Expand Down
55 changes: 25 additions & 30 deletions examples/basic_workflow_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,14 @@


def main():

# 1. Define parameters
custom_name = "bangalore"
date_range = ("2024-01-01", "2024-01-31")
data_source = DataSources.LANDSAT # or SENTINEL2

"""Example of Rasteret workflow with xarray output."""
# 1. Setup workspace and parameters
workspace_dir = Path.home() / "rasteret_workspace"
workspace_dir.mkdir(exist_ok=True)

print("1. Defining Area of Interest")
print("--------------------------")
custom_name = "bangalore"
date_range = ("2024-01-01", "2024-03-31")
data_source = DataSources.LANDSAT

# Define area and time of interest
aoi1_polygon = Polygon(
Expand All @@ -33,35 +30,33 @@ def main():
# get total bounds of all polygons above for stac search and stac index creation
bbox = aoi1_polygon.union(aoi2_polygon).bounds

print("\n2. Creating and Loading Collection")
print("--------------------------")

# 2. Initialize processor - name generated automatically
processor = Rasteret(
custom_name=custom_name,
data_source=data_source,
output_dir=workspace_dir,
date_range=date_range,
)

# Create index if collection is not present
if processor._collection is None:
# 2. List existing collections
print("1. Available Collections")
print("----------------------")
collections = Rasteret.list_collections(workspace_dir=workspace_dir)
for c in collections:
print(f"- {c['name']}: {c['data_source']}, {c['date_range']}, {c['size']} scenes")

# 3. Try loading existing collection or create new
try:
processor = Rasteret.load_collection(f"{custom_name}_202401-03_landsat")
except ValueError:
print("\n2. Creating New Collection")
print("-------------------------")
processor = Rasteret(
custom_name=custom_name,
data_source=data_source,
output_dir=workspace_dir,
date_range=date_range
)
processor.create_collection(
bbox=bbox,
date_range=date_range,
cloud_cover_lt=20,
# add platform filter for Landsat 9, 8, 7, 5, 4 if needed,
# else remove it for all platforms
# This is unique to Landsat STAC endpoint
platform={"in": ["LANDSAT_8"]},
platform={"in": ["LANDSAT_8"]}
)

# List existing collections
collections = Rasteret.list_collections(dir=workspace_dir)
print("Available collections:")
for c in collections:
print(f"- {c['name']}: {c['size']} scenes")

print("\n3. Processing Data")
print("----------------")

Expand Down
Loading

0 comments on commit b0bca1c

Please sign in to comment.