Merge pull request #28 from Sydney-Informatics-Hub/model-upgrade'

Model loading and full use case
Sydney-Informatics-Hub · Mar 28, 2024 · 7d054d3 · 7d054d3
2 parents 5f4d539 + cfd2d13
commit 7d054d3
Show file tree

Hide file tree

Showing 6 changed files with 262 additions and 28 deletions.
diff --git a/aigis/annotate/utils.py b/aigis/annotate/utils.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# aerialannotation/utils.py
+# aigis.annotate.utils.py
 
 import geopandas as gpd
 import numpy as np
@@ -61,3 +61,77 @@ def geojson_csv_filter(geojson_path, csv_path):
     filtered_gdf = gdf[gdf["id"].isin(ids_to_keep)]
 
     return filtered_gdf
+
+
+def read_boundary_file(file_path):
+    try:
+        boundary_data = gpd.read_file(file_path)
+        boundary_data = boundary_data.to_crs(epsg=3857)  # Convert to EPSG 3857
+        boundary_data = boundary_data.dissolve(by='geometry').boundary
+        return boundary_data
+    except Exception as e:
+        print(f"Error reading boundary file: {e}")
+        return None
+
+def create_grid(boundary_data, grid_size):
+    try:
+        # Calculate the bounding box of the boundary data
+        bbox = boundary_data.total_bounds
+
+        # Calculate the number of grid cells in each dimension
+        num_cells_x = int((bbox[2] - bbox[0]) / grid_size)
+        num_cells_y = int((bbox[3] - bbox[1]) / grid_size)
+
+        # Create a grid of polygons
+        grid = gpd.GeoDataFrame(geometry=gpd.GeoSeries())
+
+        # Iterate over the grid cells and create polygons
+        for i in range(num_cells_x):
+            for j in range(num_cells_y):
+                # Calculate the coordinates of the grid cell
+                minx = bbox[0] + i * grid_size
+                miny = bbox[1] + j * grid_size
+                maxx = minx + grid_size
+                maxy = miny + grid_size
+
+                # Create a polygon for the grid cell
+                polygon = gpd.GeoSeries([Polygon([(minx, miny), (minx, maxy), (maxx, maxy), (maxx, miny)])])
+
+                # Add the polygon to the grid
+                grid = grid.append({'geometry': polygon[0]}, ignore_index=True)
+
+        # Create the GeoJSON structure
+        features = []
+        for index, row in grid.iterrows():
+            feature = {
+                "type": "Feature",
+                "properties": {
+                    "id": index,
+                    "left": row.geometry.bounds[0],
+                    "top": row.geometry.bounds[1],
+                    "right": row.geometry.bounds[2],
+                    "bottom": row.geometry.bounds[3],
+                    "row_index": int(index / num_cells_y),
+                    "col_index": index % num_cells_y
+                },
+                "geometry": {
+                    "type": "Polygon",
+                    "coordinates": [list(row.geometry.exterior.coords)]
+                }
+            }
+            features.append(feature)
+
+        geojson = {
+            "type": "FeatureCollection",
+            "name": "GSU_grid_1",
+            "crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::3857"}},
+            "features": features
+        }
+
+        # Save the grid as a GeoJSON file
+        grid.to_file("grid.geojson", driver="GeoJSON")
+
+        return geojson
+    except Exception as e:
+        print(f"Error creating grid: {e}")
+        return None
diff --git a/aigis/segment/models.py b/aigis/segment/models.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+import os
+import wget
+
+def download_detectron2_model_weights(model_type):
+    """
+    Downloads the model weights and configuration file for the specified model type.
+
+    Args:
+        model_type (str or tuple): The type of model to download. If 'trees' or 'buildings', the corresponding
+            model weights and configuration file will be downloaded. If a tuple is provided, it should contain
+            the URLs for the model weights and configuration file.
+
+    Returns:
+        tuple: A tuple containing the filenames of the downloaded model weights and configuration file.
+
+    Raises:
+        ValueError: If an invalid model_type is provided.
+
+    """
+    if model_type == "trees":
+        model_weights_url = "https://huggingface.co/spaces/SIH/aerial-segmentation-model-selection/resolve/main/tree_model_weights/treev3model_0012499.pth"
+        config_url = "https://huggingface.co/spaces/SIH/aerial-segmentation-model-selection/resolve/main/tree_model_weights/treev3_tms_sixmaps_cfg.yaml"
+    elif model_type == "buildings":
+        model_weights_url = "https://huggingface.co/spaces/SIH/building-segmentation/resolve/main/model_weights/model_final.pth"
+        config_url = "https://huggingface.co/spaces/SIH/building-segmentation/resolve/main/model_weights/buildings_poc_cfg.yml"
+    else:
+        model_weights_url, config_url = model_type
+
+    # Download model weights
+    model_weights_filename = os.path.basename(model_weights_url)
+    wget.download(model_weights_url, model_weights_filename)
+
+    # Download config file
+    config_filename = os.path.basename(config_url)
+    wget.download(config_url, config_filename)
+
+    return model_weights_filename, config_filename
+
+def download_vit_model_weights(model_type, config_url=None, model_url=None, preprocessor_url=None, training_args_url=None):
+    """
+    Downloads the files from the specified URLs using wget.
+
+    Args:
+        model_type (str): The type of model to download. If 'lczs', the corresponding
+            files will be downloaded. If any other value is provided, the user can
+            specify the URLs for the config, model, preprocessor, and training_args.
+
+        config_url (str): The URL for the config file.
+
+        model_url (str): The URL for the model file.
+
+        preprocessor_url (str): The URL for the preprocessor file.
+
+        training_args_url (str): The URL for the training_args file.
+
+    Returns:
+        list: A list of filenames of the downloaded files.
+
+    """
+    if model_type == "lczs":
+        config_url = "https://huggingface.co/spaces/SIH/lcz-classification/resolve/main/ViT_LCZs_v3/config.json"
+        model_url = "https://huggingface.co/spaces/SIH/lcz-classification/resolve/main/ViT_LCZs_v3/model.safetensors"
+        preprocessor_url = "https://huggingface.co/spaces/SIH/lcz-classification/resolve/main/ViT_LCZs_v3/preprocessor_config.json"
+        training_args_url = "https://huggingface.co/spaces/SIH/lcz-classification/resolve/main/ViT_LCZs_v3/training_args.bin"
+
+    # Download config file
+    config_filename = os.path.basename(config_url)
+    wget.download(config_url, config_filename)
+    # Download model file
+    model_filename = os.path.basename(model_url)
+    wget.download(model_url, model_filename)
+    # Download preprocessor file
+    preprocessor_filename = os.path.basename(preprocessor_url)
+    wget.download(preprocessor_url, preprocessor_filename)
+    # Download training args file
+    training_args_filename = os.path.basename(training_args_url)
+    wget.download(training_args_url, training_args_filename)
+
+    return [config_filename, model_filename, preprocessor_filename, training_args_filename]
diff --git a/aigis/utils/__init__.py b/aigis/utils/__init__.py
diff --git a/aigis/utils/analysis.py b/aigis/utils/analysis.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+import geopandas as gpd
+import matplotlib.pyplot as plt
+
+def calculate_coverage(boundary_file, building_file, tree_file):
+    # Load boundary file
+    boundary_data = gpd.read_file(boundary_file)
+
+    # Load building outline geoparquet
+    building_data = gpd.read_file(building_file)
+
+    # Load tree outline geoparquet
+    tree_data = gpd.read_file(tree_file)
+
+    # Calculate coverage percentage for buildings
+    building_coverage = (building_data.geometry.area.sum() / boundary_data.geometry.area.sum()) * 100
+
+    # Calculate coverage percentage for trees
+    tree_coverage = (tree_data.geometry.area.sum() / boundary_data.geometry.area.sum()) * 100
+
+    # Generate histogram for building sizes
+    building_data['area'] = building_data.geometry.area
+    building_data['area'].plot.hist(bins=10)
+    plt.xlabel('Building Area')
+    plt.ylabel('Frequency')
+    plt.title('Distribution of Building Sizes')
+    plt.show()
+
+    # Generate histogram for tree sizes
+    tree_data['area'] = tree_data.geometry.area
+    tree_data['area'].plot.hist(bins=10)
+    plt.xlabel('Tree Area')
+    plt.ylabel('Frequency')
+    plt.title('Distribution of Tree Sizes')
+    plt.show()
+
+    return building_coverage, tree_coverage
+
+# Usage example
+boundary_file = 'path/to/boundary_file.shp'
+building_file = 'path/to/building_outline.parquet'
+tree_file = 'path/to/tree_outline.parquet'
+
+building_coverage, tree_coverage = calculate_coverage(boundary_file, building_file, tree_file)
+print(f"Building coverage: {building_coverage}%")
+print(f"Tree coverage: {tree_coverage}%")
diff --git a/scripts/predict_area.py b/scripts/predict_area.py
@@ -0,0 +1,37 @@
+import argparse
+import subprocess
+from aigis.annotate.utils import read_boundary_file, create_grid
+from aigis.segment.models import download_detectron2_model_weights
+from aigis.utils.analysis import calculate_coverage
+
+def main(args):
+    boundary_file = read_boundary_file(args.boundary)
+    grid = create_grid(boundary_file, args.grid_size)
+
+    download_detectron2_model_weights("trees")
+    download_detectron2_model_weights("buildings")
+
+    subprocess.run(["python", "aigis/scripts/get_raster_jpeg.py", "grid.geojson"])
+    subprocess.run(["python", "aigis/scripts/jpeg2tiff.py", "output_tiles", "grid.geojson"])
+
+    # Predict on the raster tif you downloaded with the bbox selection in the leafmap
+    # Trees
+    subprocess.run(["python", "aigis/scripts/prediction_batch_detectron2.py", "--indir", "tiff_tiles/", "-p", "*.tif", "-c", "treev3_tms_sixmaps_cfg.yaml", "-w", "treev3model_0012499.pth", "-t", "0.3", "--coco-out", "trees.json", "-s", "0.0"])
+    subprocess.run(["python", "aigis/scripts/coco2geojson.py", "tiff_tiles/", "trees.json", "--simplify-tolerance", "3.0", "--geoparquet-output", "trees.geoparquet", "--geojson-output", "trees.geojson"])
+
+    # Predict on the raster tif you downloaded with the bbox selection in the leafmap
+    # Buildings
+    subprocess.run(["python", "aigis/scripts/prediction_batch_detectron2.py", "--indir", "tiff_tiles/", "-p", "*.tif", "-c", "buildings_poc_cfg.yml", "-w", "model_final.pth", "-t", "0.1", "--coco-out", "buildings.json", "-s", "0.0"])
+    subprocess.run(["python", "aigis/scripts/coco2geojson.py", "tiff_tiles/", "buildings.json", "--simplify-tolerance", "0.1", "--geoparquet-output", "buildings.geoparquet", "--geojson-output", "buildings.geojson"])
+
+    building_coverage, tree_coverage = calculate_coverage(boundary_file, "buildings.geoparquet", "trees.geoparquet")
+    print(f"Building coverage: {building_coverage}%")
+    print(f"Tree coverage: {tree_coverage}%")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--boundary", help="Path to boundary file")
+    parser.add_argument("--grid-size", type=int, help="Grid size")
+    args = parser.parse_args()
+
+    main(args)
diff --git a/scripts/vit_predict.py b/scripts/vit_predict.py
@@ -5,16 +5,29 @@
 from transformers import ViTImageProcessor, ViTForImageClassification
 from PIL import Image
 import argparse
-import rasterio
-from shapely.geometry import Polygon
-from aigis.convert.coordinates import wkt_parser
-import geopandas as gpd
 
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 processor = ViTImageProcessor.from_pretrained("ViT_LCZs_v2", local_files_only=True)
 model = ViTForImageClassification.from_pretrained("ViT_LCZs_v2", local_files_only=True).to(device)
 
+def get_image_files(directory):
+    """
+    Recursively finds all image files in the given directory and its subdirectories.
+
+    Args:
+        directory (str): The path to the directory.
+
+    Returns:
+        List[str]: A list of image file paths.
+    """
+    image_files = []
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
+                image_files.append(os.path.join(root, file))
+    return image_files
+
 def predict(image_path):
     image = Image.open(image_path)
     inputs = processor(images=image, return_tensors="pt").to(device)
@@ -25,7 +38,7 @@ def predict(image_path):
     label = model.config.id2label[predicted_class_idx].split(",")[0]
     return label, float(predicted_class_prob)
 
-def predict_images(input_dir, output_name,user_crs=None):
+def predict_images(input_dir, output_csv):
     """
     Predicts the labels and confidences for a set of images in the given input directory,
     and writes the results to a CSV file specified by the output_csv parameter.
@@ -41,39 +54,22 @@ def predict_images(input_dir, output_name,user_crs=None):
     predictions = []
     for image_file in image_files:
         image_path = os.path.join(input_dir, image_file)
-        # get the image bounds form the geotiff
-        with rasterio.open(image_path) as src:
-            bounds = src.bounds
-            if user_crs is None:
-                user_crs = src.crs.to_wkt()
-                user_crs = wkt_parser(user_crs)
-        # make a polygon out of bounds
-        polygon = Polygon([(bounds.left, bounds.bottom), (bounds.right, bounds.bottom), (bounds.right, bounds.top), (bounds.left, bounds.top)])
-
-
         label, confidence = predict(image_path)
-        predictions.append((image_file, label, confidence, polygon))
+        predictions.append((image_file, label, confidence))
 
-    with open(output_name+".csv", 'w', newline='') as csvfile:
+    with open(output_csv, 'w', newline='') as csvfile:
         writer = csv.writer(csvfile)
         writer.writerow(['Image Filename', 'Prediction', 'Confidence'])
         writer.writerows(predictions)
 
-    # create a geodataframe from the predictions
-    gdf = gpd.GeoDataFrame(predictions, columns=['Image Filename', 'Prediction', 'Confidence', 'geometry'])
-    gdf.set_crs(user_crs, inplace=True)
-    gdf.to_parquet(output_name+".geoparquet")
-    gdf.to_file(output_name+".geojson", driver="GeoJSON")
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='ViT LCZ Classification')
     parser.add_argument('--input_dir', type=str, help='Path to input directory containing images')
-    parser.add_argument('--output_name', type=str, help='Path to output csv, geojson, and geparquet files')
-    parser.add_argument('--user_crs', type=str, help='User defined crs')
+    parser.add_argument('--output_csv', type=str, help='Path to output CSV file')
     args = parser.parse_args()
 
     input_dir = args.input_dir
-    output_name = args.output_name
-    user_crs = args.user_crs
+    output_csv = args.output_csv
 
-    predict_images(input_dir, output_name, user_crs)
+    predict_images(input_dir, output_csv)