Skip to content

Commit

Permalink
Add Flux variables CSV and enhance micromet functionality
Browse files Browse the repository at this point in the history
Added a comprehensive CSV file for Flux variables, enhancing data tracking. Updated multiple scripts in the micromet package to improve configuration handling, CSV path management, and file compilation. Moreover, introduced new dependencies in the environment file and pyproject.toml to support enhanced data visualization and statistical analysis features.
  • Loading branch information
inkenbrandt committed Sep 15, 2024
1 parent f952b54 commit b964b01
Show file tree
Hide file tree
Showing 5 changed files with 280 additions and 9 deletions.
36 changes: 28 additions & 8 deletions Notebooks/DL_test.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion micromet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .ec import CalcFlux

# from .Larry_Flux_Calc import *

from .cs_wrapper import *
from .licor_wrapper import *
from .converter import *
from .tools import *
__version__ = '0.2.0'
Expand Down
74 changes: 74 additions & 0 deletions micromet/licor_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import argparse
import subprocess
import os
import sys


def run_eddypro(system="win", mode="desktop", caller="console", environment=None, proj_file=None):
"""
Run the EddyPro engine with specified parameters.
Args:
system (str): Operating system. Options: 'win', 'linux', 'mac'. Default is 'win'.
mode (str): Running mode. Options: 'embedded', 'desktop'. Default is 'desktop'.
caller (str): Caller type. Options: 'gui', 'console'. Default is 'console'.
environment (str): Working directory for embedded mode. Default is None.
proj_file (str): Path to the project file (*.eddypro). Default is None.
Returns:
subprocess.CompletedProcess: Result of the subprocess run.
"""
# Construct the command
command = ["eddypro_rp"]

if system != "win":
command.extend(["-s", system])

if mode != "desktop":
command.extend(["-m", mode])

if caller != "console":
command.extend(["-c", caller])

if environment:
command.extend(["-e", environment])

if proj_file:
command.append(proj_file)

# Run the command
try:
result = subprocess.run(command, check=True, capture_output=True, text=True)
print("EddyPro executed successfully.")
print(result.stdout)
return result
except subprocess.CalledProcessError as e:
print(f"Error executing EddyPro: {e}")
print(e.stderr)
return e


def main():
parser = argparse.ArgumentParser(description="Run EddyPro engine from command line")
parser.add_argument("-s", "--system", choices=["win", "linux", "mac"], default="win",
help="Operating system (default: win)")
parser.add_argument("-m", "--mode", choices=["embedded", "desktop"], default="desktop",
help="Running mode (default: desktop)")
parser.add_argument("-c", "--caller", choices=["gui", "console"], default="console",
help="Caller type (default: console)")
parser.add_argument("-e", "--environment", help="Working directory for embedded mode")
parser.add_argument("proj_file", nargs="?", help="Path to project file (*.eddypro)")

args = parser.parse_args()

# Ensure eddypro_rp is in the system PATH
if not any(os.access(os.path.join(path, "eddypro_rp"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)):
print(
"Error: eddypro_rp is not found in the system PATH. Please add the EddyPro binary directory to your PATH.")
sys.exit(1)

run_eddypro(args.system, args.mode, args.caller, args.environment, args.proj_file)


if __name__ == "__main__":
main()
110 changes: 110 additions & 0 deletions micromet/outliers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from typing import Tuple

from sklearn.neighbors import LocalOutlierFactor

def detect_outliers_kmeans(timeseries: np.ndarray, n_clusters: int = 3, threshold: float = 2.0) -> Tuple[np.ndarray, np.ndarray]:
"""
Detect outliers in a time series using K-means clustering.
Args:
timeseries (np.ndarray): 1D numpy array containing the time series data.
n_clusters (int): Number of clusters to use in K-means. Default is 5.
threshold (float): Number of standard deviations from cluster center to consider as outlier. Default is 2.0.
Returns:
Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D numpy arrays:
- Boolean array where True indicates an outlier.
- Array of distances from each point to its nearest cluster center.
"""
# Reshape and scale the time series
X = timeseries.reshape(-1, 1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply K-means clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(X_scaled)

# Calculate distances to nearest cluster centers
distances = np.min(kmeans.transform(X_scaled), axis=1)

# Calculate mean and standard deviation of distances
mean_distance = np.mean(distances)
std_distance = np.std(distances)

# Identify outliers
is_outlier = distances > mean_distance + threshold * std_distance

return is_outlier, distances

# Example usage:
# import numpy as np
# import matplotlib.pyplot as plt
#
# # Generate sample data
# np.random.seed(42)
# timeseries = np.random.randn(1000)
# timeseries[500:510] += 5 # Add some outliers
#
# # Detect outliers
# is_outlier, distances = detect_outliers_kmeans(timeseries)
#
# # Print results
# print(f"Number of outliers detected: {np.sum(is_outlier)}")
# print(f"Indices of outliers: {np.where(is_outlier)[0]}")
#
# # Plot the results
# plt.figure(figsize=(12, 6))
# plt.plot(timeseries, label='Time Series')
# plt.scatter(np.where(is_outlier)[0], timeseries[is_outlier], color='red', label='Outliers')
# plt.legend()
# plt.title('Time Series with Detected Outliers')
# plt.show()


def detect_outliers_lof(timeseries: np.ndarray, n_neighbors: int = 20, contamination: float = 0.08) -> Tuple[np.ndarray, np.ndarray]:
"""
Detect outliers in a high-frequency time series using the Local Outlier Factor (LOF) algorithm.
Args:
timeseries (np.ndarray): 1D numpy array containing the time series data.
n_neighbors (int): Number of neighbors to consider for each point. Default is 20.
contamination (float): The proportion of outliers in the data set. Default is 0.08.
Returns:
Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D numpy arrays:
- Boolean array where True indicates an outlier.
- Array of outlier scores (negative LOF values).
"""
# Reshape the time series for sklearn
X = timeseries.reshape(-1, 1)

# Initialize and fit the LOF model
lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=contamination)
y_pred = lof.fit_predict(X)

# Get the outlier scores
outlier_scores = lof.negative_outlier_factor_

# Create a boolean mask for outliers
is_outlier = y_pred == -1

return is_outlier, outlier_scores

# Example usage:
# import numpy as np
#
# # Generate sample data
# np.random.seed(42)
# timeseries = np.random.randn(10000)
# timeseries[5000:5010] += 10 # Add some outliers
#
# # Detect outliers
# is_outlier, outlier_scores = detect_outliers_lof(timeseries)
#
# # Print results
# print(f"Number of outliers detected: {np.sum(is_outlier)}")
# print(f"Indices of outliers: {np.where(is_outlier)[0]}")
66 changes: 66 additions & 0 deletions station_config/US-UTE.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
[METADATA]
climate_file_path = ../station_data/US-UTE_amfluxeddy.csv
station_latitude = 37.7353
station_longitude = -111.5708
station_elevation = 1729.3
missing_data_value = -9999
skiprows = 0
date_parser = %Y%m%d%H%M
site_id = US-UTE
country = USA
flux_measurements_method = Eddy Covariance
flux_measurements_variable = CO2
flux_measurements_operations = Continuous operation
site_name = UFLUX Escalante
igbp = CRO
network = AmeriFlux
research_topic = Validate remote-sensing data
terrain = Flat
aspect = FLAT
site_funding = Colorado River Authority of Utah
team_member_name = Paul Inkenbrandt
team_member_role = Technician
team_member_email = paulinkenbrandt@utah.gov
team_member_institution = Utah Geological Survey
url_ameriflux = http://ameriflux.lbl.gov/sites/siteinfo/US-UTW
utc_offset = -7
mat = 10.83
map = 258.318
climate_koeppen = Bsk
url = https://geology.utah.gov
site_snow_cover_days = 0.0
state = UT

[DATA]
datestring_col = TIMESTAMP_START
net_radiation_col = NETRAD
net_radiation_units = w/m2
sensible_heat_flux_col = H
sensible_heat_flux_units = w/m2
latent_heat_flux_col = LE
latent_heat_flux_units = w/m2
ground_flux_col = G
ground_flux_units = w/m2
shortwave_in_col = SW_IN
shortwave_in_units = w/m2
shortwave_out_col = SW_OUT
shortwave_out_units = w/m2
longwave_in_col = LW_IN
longwave_in_units = w/m2
longwave_out_col = LW_OUT
longwave_out_units = w/m2
vap_press_def_col = VPD
vap_press_def_units = hPa
avg_temp_col = T_SONIC
avg_temp_units = C
rel_humidity_col = RH_1_1_1
rel_humidity_units = (%): Relative humidity, range 0-100
wind_spd_col = WS
wind_spd_units = m/s
wind_dir_col = WD
wind_dir_units = azimuth (degrees)
theta_1 = SWC_1_1_1
theta_1_units = (%): Soil water content (volumetric), range 0-100
theta_2 = SWC_2_1_1
theta_2_units = (%): Soil water content (volumetric), range 0-100

0 comments on commit b964b01

Please sign in to comment.