Add Flux variables CSV and enhance micromet functionality

Added a comprehensive CSV file for Flux variables, enhancing data tracking. Updated multiple scripts in the micromet package to improve configuration handling, CSV path management, and file compilation. Moreover, introduced new dependencies in the environment file and pyproject.toml to support enhanced data visualization and statistical analysis features.
inkenbrandt · Sep 15, 2024 · b964b01 · b964b01
1 parent f952b54
commit b964b01
Show file tree

Hide file tree

Showing 5 changed files with 280 additions and 9 deletions.
diff --git a/Notebooks/DL_test.ipynb b/Notebooks/DL_test.ipynb
diff --git a/micromet/__init__.py b/micromet/__init__.py
@@ -1,7 +1,8 @@
 from .ec import CalcFlux
 
 # from .Larry_Flux_Calc import *
-
+from .cs_wrapper import *
+from .licor_wrapper import *
 from .converter import *
 from .tools import *
 __version__ = '0.2.0'

diff --git a/micromet/licor_wrapper.py b/micromet/licor_wrapper.py
@@ -0,0 +1,74 @@
+import argparse
+import subprocess
+import os
+import sys
+
+
+def run_eddypro(system="win", mode="desktop", caller="console", environment=None, proj_file=None):
+    """
+    Run the EddyPro engine with specified parameters.
+
+    Args:
+    system (str): Operating system. Options: 'win', 'linux', 'mac'. Default is 'win'.
+    mode (str): Running mode. Options: 'embedded', 'desktop'. Default is 'desktop'.
+    caller (str): Caller type. Options: 'gui', 'console'. Default is 'console'.
+    environment (str): Working directory for embedded mode. Default is None.
+    proj_file (str): Path to the project file (*.eddypro). Default is None.
+
+    Returns:
+    subprocess.CompletedProcess: Result of the subprocess run.
+    """
+    # Construct the command
+    command = ["eddypro_rp"]
+
+    if system != "win":
+        command.extend(["-s", system])
+
+    if mode != "desktop":
+        command.extend(["-m", mode])
+
+    if caller != "console":
+        command.extend(["-c", caller])
+
+    if environment:
+        command.extend(["-e", environment])
+
+    if proj_file:
+        command.append(proj_file)
+
+    # Run the command
+    try:
+        result = subprocess.run(command, check=True, capture_output=True, text=True)
+        print("EddyPro executed successfully.")
+        print(result.stdout)
+        return result
+    except subprocess.CalledProcessError as e:
+        print(f"Error executing EddyPro: {e}")
+        print(e.stderr)
+        return e
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run EddyPro engine from command line")
+    parser.add_argument("-s", "--system", choices=["win", "linux", "mac"], default="win",
+                        help="Operating system (default: win)")
+    parser.add_argument("-m", "--mode", choices=["embedded", "desktop"], default="desktop",
+                        help="Running mode (default: desktop)")
+    parser.add_argument("-c", "--caller", choices=["gui", "console"], default="console",
+                        help="Caller type (default: console)")
+    parser.add_argument("-e", "--environment", help="Working directory for embedded mode")
+    parser.add_argument("proj_file", nargs="?", help="Path to project file (*.eddypro)")
+
+    args = parser.parse_args()
+
+    # Ensure eddypro_rp is in the system PATH
+    if not any(os.access(os.path.join(path, "eddypro_rp"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)):
+        print(
+            "Error: eddypro_rp is not found in the system PATH. Please add the EddyPro binary directory to your PATH.")
+        sys.exit(1)
+
+    run_eddypro(args.system, args.mode, args.caller, args.environment, args.proj_file)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/micromet/outliers.py b/micromet/outliers.py
@@ -0,0 +1,110 @@
+import numpy as np
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
+from typing import Tuple
+
+from sklearn.neighbors import LocalOutlierFactor
+
+def detect_outliers_kmeans(timeseries: np.ndarray, n_clusters: int = 3, threshold: float = 2.0) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Detect outliers in a time series using K-means clustering.
+
+    Args:
+    timeseries (np.ndarray): 1D numpy array containing the time series data.
+    n_clusters (int): Number of clusters to use in K-means. Default is 5.
+    threshold (float): Number of standard deviations from cluster center to consider as outlier. Default is 2.0.
+
+    Returns:
+    Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D numpy arrays:
+        - Boolean array where True indicates an outlier.
+        - Array of distances from each point to its nearest cluster center.
+    """
+    # Reshape and scale the time series
+    X = timeseries.reshape(-1, 1)
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+
+    # Apply K-means clustering
+    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
+    kmeans.fit(X_scaled)
+
+    # Calculate distances to nearest cluster centers
+    distances = np.min(kmeans.transform(X_scaled), axis=1)
+
+    # Calculate mean and standard deviation of distances
+    mean_distance = np.mean(distances)
+    std_distance = np.std(distances)
+
+    # Identify outliers
+    is_outlier = distances > mean_distance + threshold * std_distance
+
+    return is_outlier, distances
+
+# Example usage:
+# import numpy as np
+# import matplotlib.pyplot as plt
+#
+# # Generate sample data
+# np.random.seed(42)
+# timeseries = np.random.randn(1000)
+# timeseries[500:510] += 5  # Add some outliers
+#
+# # Detect outliers
+# is_outlier, distances = detect_outliers_kmeans(timeseries)
+#
+# # Print results
+# print(f"Number of outliers detected: {np.sum(is_outlier)}")
+# print(f"Indices of outliers: {np.where(is_outlier)[0]}")
+#
+# # Plot the results
+# plt.figure(figsize=(12, 6))
+# plt.plot(timeseries, label='Time Series')
+# plt.scatter(np.where(is_outlier)[0], timeseries[is_outlier], color='red', label='Outliers')
+# plt.legend()
+# plt.title('Time Series with Detected Outliers')
+# plt.show()
+
+
+def detect_outliers_lof(timeseries: np.ndarray, n_neighbors: int = 20, contamination: float = 0.08) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Detect outliers in a high-frequency time series using the Local Outlier Factor (LOF) algorithm.
+
+    Args:
+    timeseries (np.ndarray): 1D numpy array containing the time series data.
+    n_neighbors (int): Number of neighbors to consider for each point. Default is 20.
+    contamination (float): The proportion of outliers in the data set. Default is 0.08.
+
+    Returns:
+    Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D numpy arrays:
+        - Boolean array where True indicates an outlier.
+        - Array of outlier scores (negative LOF values).
+    """
+    # Reshape the time series for sklearn
+    X = timeseries.reshape(-1, 1)
+
+    # Initialize and fit the LOF model
+    lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=contamination)
+    y_pred = lof.fit_predict(X)
+
+    # Get the outlier scores
+    outlier_scores = lof.negative_outlier_factor_
+
+    # Create a boolean mask for outliers
+    is_outlier = y_pred == -1
+
+    return is_outlier, outlier_scores
+
+# Example usage:
+# import numpy as np
+#
+# # Generate sample data
+# np.random.seed(42)
+# timeseries = np.random.randn(10000)
+# timeseries[5000:5010] += 10  # Add some outliers
+#
+# # Detect outliers
+# is_outlier, outlier_scores = detect_outliers_lof(timeseries)
+#
+# # Print results
+# print(f"Number of outliers detected: {np.sum(is_outlier)}")
+# print(f"Indices of outliers: {np.where(is_outlier)[0]}")
diff --git a/station_config/US-UTE.ini b/station_config/US-UTE.ini
@@ -0,0 +1,66 @@
+[METADATA]
+climate_file_path = ../station_data/US-UTE_amfluxeddy.csv
+station_latitude = 37.7353
+station_longitude = -111.5708
+station_elevation = 1729.3
+missing_data_value = -9999
+skiprows = 0
+date_parser = %Y%m%d%H%M
+site_id = US-UTE
+country = USA
+flux_measurements_method = Eddy Covariance
+flux_measurements_variable = CO2
+flux_measurements_operations = Continuous operation
+site_name = UFLUX Escalante
+igbp = CRO
+network = AmeriFlux
+research_topic = Validate remote-sensing data
+terrain = Flat
+aspect = FLAT
+site_funding = Colorado River Authority of Utah
+team_member_name = Paul Inkenbrandt
+team_member_role = Technician
+team_member_email = paulinkenbrandt@utah.gov
+team_member_institution = Utah Geological Survey
+url_ameriflux = http://ameriflux.lbl.gov/sites/siteinfo/US-UTW
+utc_offset = -7
+mat = 10.83
+map = 258.318
+climate_koeppen = Bsk
+url = https://geology.utah.gov
+site_snow_cover_days = 0.0
+state = UT
+
+[DATA]
+datestring_col = TIMESTAMP_START
+net_radiation_col = NETRAD
+net_radiation_units = w/m2
+sensible_heat_flux_col = H
+sensible_heat_flux_units = w/m2
+latent_heat_flux_col = LE
+latent_heat_flux_units = w/m2
+ground_flux_col = G
+ground_flux_units = w/m2
+shortwave_in_col = SW_IN
+shortwave_in_units = w/m2
+shortwave_out_col = SW_OUT
+shortwave_out_units = w/m2
+longwave_in_col = LW_IN
+longwave_in_units = w/m2
+longwave_out_col = LW_OUT
+longwave_out_units = w/m2
+vap_press_def_col = VPD
+vap_press_def_units = hPa
+avg_temp_col = T_SONIC
+avg_temp_units = C
+rel_humidity_col = RH_1_1_1
+rel_humidity_units = (%): Relative humidity, range 0-100
+wind_spd_col = WS
+wind_spd_units = m/s
+wind_dir_col = WD
+wind_dir_units = azimuth (degrees)
+theta_1 = SWC_1_1_1
+theta_1_units = (%): Soil water content (volumetric), range 0-100
+theta_2 = SWC_2_1_1
+theta_2_units = (%): Soil water content (volumetric), range 0-100
+