From d6fd84bdb58ffc5bd5e9420ff09d74bc46d729ce Mon Sep 17 00:00:00 2001
From: Cheng-Chia <20410209+cheng-chia@users.noreply.github.com>
Date: Mon, 27 Feb 2023 10:16:57 -0800
Subject: [PATCH] Update data.py

Add perlin data function
---
 mgwr/tests/data.py | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/mgwr/tests/data.py b/mgwr/tests/data.py
index 59582c6..1419879 100644
--- a/mgwr/tests/data.py
+++ b/mgwr/tests/data.py
@@ -8,6 +8,40 @@
 
 data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
 
+def get_perlin_data_set_0_n_1000_k_10(nx=11):
+    """
+       :param nx, number of x variables, including intercept
+       :return: a test data, both x and y are centered, has intercept column
+       """
+
+    depVarName = 'Y_new'
+    indVarNames = ['X0','X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9']
+
+    if nx-1 > len(indVarNames):
+        nx = len(indVarNames) + 1
+    if nx < 2:
+        nx = 2
+
+    data_path = os.path.join(data_dir, "perlin_data_set_0_n_1000_k_10.csv") # you can choose other size of the synthetic data. Eg. perlin_data_set_0_n_40000_k_10.csv
+    df = pd.read_csv(data_path)
+
+    indVarNames = indVarNames[:(nx-1)]
+
+    n = df.shape[0]
+    k = len(indVarNames)
+
+    y = df[depVarName].values.reshape((-1,1))
+    x = NUM.ones((n, k+1), dtype=float)
+
+    for column, variable in enumerate(indVarNames):
+        x[:, column + 1] = df[[variable]].values.flatten()
+
+    coords = list(zip(df['x_coord_earth'], df['y_coord_earth']))
+    coords = NUM.asarray(coords)
+    y = (y - y.mean(axis=0)) / y.std(axis=0)
+    x[:, 1:] = (x[:, 1:] - x[:, 1:].mean(axis=0)) / x[:, 1:].std(axis=0)
+    return (x, y, coords, data_path)
+
 def get_test2021_sub_xxx_10(xxx, nx=11):
     depVarName = 'Y_new'
     indVarNames = 'X0, X1, X2, X3, X4, X5, X6, X7, X8, X9'.split(", ")
@@ -567,4 +601,4 @@ def get_covid_data():
         x[:, column + 1] = varData.flatten()
 
     coords = list(zip(df['Long'], df['Lat']))
-    return (x, y, coords, n, k)
\ No newline at end of file
+    return (x, y, coords, n, k)