Skip to content

Commit

Permalink
Merge pull request #28 from Sydney-Informatics-Hub/model-upgrade'
Browse files Browse the repository at this point in the history
Model loading and full use case
  • Loading branch information
hlydecker committed Mar 28, 2024
2 parents 5f4d539 + cfd2d13 commit 7d054d3
Show file tree
Hide file tree
Showing 6 changed files with 262 additions and 28 deletions.
76 changes: 75 additions & 1 deletion aigis/annotate/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# aerialannotation/utils.py
# aigis.annotate.utils.py

import geopandas as gpd
import numpy as np
Expand Down Expand Up @@ -61,3 +61,77 @@ def geojson_csv_filter(geojson_path, csv_path):
filtered_gdf = gdf[gdf["id"].isin(ids_to_keep)]

return filtered_gdf


def read_boundary_file(file_path):
try:
boundary_data = gpd.read_file(file_path)
boundary_data = boundary_data.to_crs(epsg=3857) # Convert to EPSG 3857
boundary_data = boundary_data.dissolve(by='geometry').boundary
return boundary_data
except Exception as e:
print(f"Error reading boundary file: {e}")
return None

def create_grid(boundary_data, grid_size):
try:
# Calculate the bounding box of the boundary data
bbox = boundary_data.total_bounds

# Calculate the number of grid cells in each dimension
num_cells_x = int((bbox[2] - bbox[0]) / grid_size)
num_cells_y = int((bbox[3] - bbox[1]) / grid_size)

# Create a grid of polygons
grid = gpd.GeoDataFrame(geometry=gpd.GeoSeries())

# Iterate over the grid cells and create polygons
for i in range(num_cells_x):
for j in range(num_cells_y):
# Calculate the coordinates of the grid cell
minx = bbox[0] + i * grid_size
miny = bbox[1] + j * grid_size
maxx = minx + grid_size
maxy = miny + grid_size

# Create a polygon for the grid cell
polygon = gpd.GeoSeries([Polygon([(minx, miny), (minx, maxy), (maxx, maxy), (maxx, miny)])])

# Add the polygon to the grid
grid = grid.append({'geometry': polygon[0]}, ignore_index=True)

# Create the GeoJSON structure
features = []
for index, row in grid.iterrows():
feature = {
"type": "Feature",
"properties": {
"id": index,
"left": row.geometry.bounds[0],
"top": row.geometry.bounds[1],
"right": row.geometry.bounds[2],
"bottom": row.geometry.bounds[3],
"row_index": int(index / num_cells_y),
"col_index": index % num_cells_y
},
"geometry": {
"type": "Polygon",
"coordinates": [list(row.geometry.exterior.coords)]
}
}
features.append(feature)

geojson = {
"type": "FeatureCollection",
"name": "GSU_grid_1",
"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::3857"}},
"features": features
}

# Save the grid as a GeoJSON file
grid.to_file("grid.geojson", driver="GeoJSON")

return geojson
except Exception as e:
print(f"Error creating grid: {e}")
return None
80 changes: 80 additions & 0 deletions aigis/segment/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# -*- coding: utf-8 -*-
import os
import wget

def download_detectron2_model_weights(model_type):
"""
Downloads the model weights and configuration file for the specified model type.
Args:
model_type (str or tuple): The type of model to download. If 'trees' or 'buildings', the corresponding
model weights and configuration file will be downloaded. If a tuple is provided, it should contain
the URLs for the model weights and configuration file.
Returns:
tuple: A tuple containing the filenames of the downloaded model weights and configuration file.
Raises:
ValueError: If an invalid model_type is provided.
"""
if model_type == "trees":
model_weights_url = "https://huggingface.co/spaces/SIH/aerial-segmentation-model-selection/resolve/main/tree_model_weights/treev3model_0012499.pth"
config_url = "https://huggingface.co/spaces/SIH/aerial-segmentation-model-selection/resolve/main/tree_model_weights/treev3_tms_sixmaps_cfg.yaml"
elif model_type == "buildings":
model_weights_url = "https://huggingface.co/spaces/SIH/building-segmentation/resolve/main/model_weights/model_final.pth"
config_url = "https://huggingface.co/spaces/SIH/building-segmentation/resolve/main/model_weights/buildings_poc_cfg.yml"
else:
model_weights_url, config_url = model_type

# Download model weights
model_weights_filename = os.path.basename(model_weights_url)
wget.download(model_weights_url, model_weights_filename)

# Download config file
config_filename = os.path.basename(config_url)
wget.download(config_url, config_filename)

return model_weights_filename, config_filename

def download_vit_model_weights(model_type, config_url=None, model_url=None, preprocessor_url=None, training_args_url=None):
"""
Downloads the files from the specified URLs using wget.
Args:
model_type (str): The type of model to download. If 'lczs', the corresponding
files will be downloaded. If any other value is provided, the user can
specify the URLs for the config, model, preprocessor, and training_args.
config_url (str): The URL for the config file.
model_url (str): The URL for the model file.
preprocessor_url (str): The URL for the preprocessor file.
training_args_url (str): The URL for the training_args file.
Returns:
list: A list of filenames of the downloaded files.
"""
if model_type == "lczs":
config_url = "https://huggingface.co/spaces/SIH/lcz-classification/resolve/main/ViT_LCZs_v3/config.json"
model_url = "https://huggingface.co/spaces/SIH/lcz-classification/resolve/main/ViT_LCZs_v3/model.safetensors"
preprocessor_url = "https://huggingface.co/spaces/SIH/lcz-classification/resolve/main/ViT_LCZs_v3/preprocessor_config.json"
training_args_url = "https://huggingface.co/spaces/SIH/lcz-classification/resolve/main/ViT_LCZs_v3/training_args.bin"

# Download config file
config_filename = os.path.basename(config_url)
wget.download(config_url, config_filename)
# Download model file
model_filename = os.path.basename(model_url)
wget.download(model_url, model_filename)
# Download preprocessor file
preprocessor_filename = os.path.basename(preprocessor_url)
wget.download(preprocessor_url, preprocessor_filename)
# Download training args file
training_args_filename = os.path.basename(training_args_url)
wget.download(training_args_url, training_args_filename)

return [config_filename, model_filename, preprocessor_filename, training_args_filename]
Empty file added aigis/utils/__init__.py
Empty file.
47 changes: 47 additions & 0 deletions aigis/utils/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-

import geopandas as gpd
import matplotlib.pyplot as plt

def calculate_coverage(boundary_file, building_file, tree_file):
# Load boundary file
boundary_data = gpd.read_file(boundary_file)

# Load building outline geoparquet
building_data = gpd.read_file(building_file)

# Load tree outline geoparquet
tree_data = gpd.read_file(tree_file)

# Calculate coverage percentage for buildings
building_coverage = (building_data.geometry.area.sum() / boundary_data.geometry.area.sum()) * 100

# Calculate coverage percentage for trees
tree_coverage = (tree_data.geometry.area.sum() / boundary_data.geometry.area.sum()) * 100

# Generate histogram for building sizes
building_data['area'] = building_data.geometry.area
building_data['area'].plot.hist(bins=10)
plt.xlabel('Building Area')
plt.ylabel('Frequency')
plt.title('Distribution of Building Sizes')
plt.show()

# Generate histogram for tree sizes
tree_data['area'] = tree_data.geometry.area
tree_data['area'].plot.hist(bins=10)
plt.xlabel('Tree Area')
plt.ylabel('Frequency')
plt.title('Distribution of Tree Sizes')
plt.show()

return building_coverage, tree_coverage

# Usage example
boundary_file = 'path/to/boundary_file.shp'
building_file = 'path/to/building_outline.parquet'
tree_file = 'path/to/tree_outline.parquet'

building_coverage, tree_coverage = calculate_coverage(boundary_file, building_file, tree_file)
print(f"Building coverage: {building_coverage}%")
print(f"Tree coverage: {tree_coverage}%")
37 changes: 37 additions & 0 deletions scripts/predict_area.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import argparse
import subprocess
from aigis.annotate.utils import read_boundary_file, create_grid
from aigis.segment.models import download_detectron2_model_weights
from aigis.utils.analysis import calculate_coverage

def main(args):
boundary_file = read_boundary_file(args.boundary)
grid = create_grid(boundary_file, args.grid_size)

download_detectron2_model_weights("trees")
download_detectron2_model_weights("buildings")

subprocess.run(["python", "aigis/scripts/get_raster_jpeg.py", "grid.geojson"])
subprocess.run(["python", "aigis/scripts/jpeg2tiff.py", "output_tiles", "grid.geojson"])

# Predict on the raster tif you downloaded with the bbox selection in the leafmap
# Trees
subprocess.run(["python", "aigis/scripts/prediction_batch_detectron2.py", "--indir", "tiff_tiles/", "-p", "*.tif", "-c", "treev3_tms_sixmaps_cfg.yaml", "-w", "treev3model_0012499.pth", "-t", "0.3", "--coco-out", "trees.json", "-s", "0.0"])
subprocess.run(["python", "aigis/scripts/coco2geojson.py", "tiff_tiles/", "trees.json", "--simplify-tolerance", "3.0", "--geoparquet-output", "trees.geoparquet", "--geojson-output", "trees.geojson"])

# Predict on the raster tif you downloaded with the bbox selection in the leafmap
# Buildings
subprocess.run(["python", "aigis/scripts/prediction_batch_detectron2.py", "--indir", "tiff_tiles/", "-p", "*.tif", "-c", "buildings_poc_cfg.yml", "-w", "model_final.pth", "-t", "0.1", "--coco-out", "buildings.json", "-s", "0.0"])
subprocess.run(["python", "aigis/scripts/coco2geojson.py", "tiff_tiles/", "buildings.json", "--simplify-tolerance", "0.1", "--geoparquet-output", "buildings.geoparquet", "--geojson-output", "buildings.geojson"])

building_coverage, tree_coverage = calculate_coverage(boundary_file, "buildings.geoparquet", "trees.geoparquet")
print(f"Building coverage: {building_coverage}%")
print(f"Tree coverage: {tree_coverage}%")

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--boundary", help="Path to boundary file")
parser.add_argument("--grid-size", type=int, help="Grid size")
args = parser.parse_args()

main(args)
50 changes: 23 additions & 27 deletions scripts/vit_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,29 @@
from transformers import ViTImageProcessor, ViTForImageClassification
from PIL import Image
import argparse
import rasterio
from shapely.geometry import Polygon
from aigis.convert.coordinates import wkt_parser
import geopandas as gpd

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

processor = ViTImageProcessor.from_pretrained("ViT_LCZs_v2", local_files_only=True)
model = ViTForImageClassification.from_pretrained("ViT_LCZs_v2", local_files_only=True).to(device)

def get_image_files(directory):
"""
Recursively finds all image files in the given directory and its subdirectories.
Args:
directory (str): The path to the directory.
Returns:
List[str]: A list of image file paths.
"""
image_files = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
image_files.append(os.path.join(root, file))
return image_files

def predict(image_path):
image = Image.open(image_path)
inputs = processor(images=image, return_tensors="pt").to(device)
Expand All @@ -25,7 +38,7 @@ def predict(image_path):
label = model.config.id2label[predicted_class_idx].split(",")[0]
return label, float(predicted_class_prob)

def predict_images(input_dir, output_name,user_crs=None):
def predict_images(input_dir, output_csv):
"""
Predicts the labels and confidences for a set of images in the given input directory,
and writes the results to a CSV file specified by the output_csv parameter.
Expand All @@ -41,39 +54,22 @@ def predict_images(input_dir, output_name,user_crs=None):
predictions = []
for image_file in image_files:
image_path = os.path.join(input_dir, image_file)
# get the image bounds form the geotiff
with rasterio.open(image_path) as src:
bounds = src.bounds
if user_crs is None:
user_crs = src.crs.to_wkt()
user_crs = wkt_parser(user_crs)
# make a polygon out of bounds
polygon = Polygon([(bounds.left, bounds.bottom), (bounds.right, bounds.bottom), (bounds.right, bounds.top), (bounds.left, bounds.top)])


label, confidence = predict(image_path)
predictions.append((image_file, label, confidence, polygon))
predictions.append((image_file, label, confidence))

with open(output_name+".csv", 'w', newline='') as csvfile:
with open(output_csv, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Image Filename', 'Prediction', 'Confidence'])
writer.writerows(predictions)

# create a geodataframe from the predictions
gdf = gpd.GeoDataFrame(predictions, columns=['Image Filename', 'Prediction', 'Confidence', 'geometry'])
gdf.set_crs(user_crs, inplace=True)
gdf.to_parquet(output_name+".geoparquet")
gdf.to_file(output_name+".geojson", driver="GeoJSON")

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='ViT LCZ Classification')
parser.add_argument('--input_dir', type=str, help='Path to input directory containing images')
parser.add_argument('--output_name', type=str, help='Path to output csv, geojson, and geparquet files')
parser.add_argument('--user_crs', type=str, help='User defined crs')
parser.add_argument('--output_csv', type=str, help='Path to output CSV file')
args = parser.parse_args()

input_dir = args.input_dir
output_name = args.output_name
user_crs = args.user_crs
output_csv = args.output_csv

predict_images(input_dir, output_name, user_crs)
predict_images(input_dir, output_csv)

0 comments on commit 7d054d3

Please sign in to comment.