-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Flux variables CSV and enhance micromet functionality
Added a comprehensive CSV file for Flux variables, enhancing data tracking. Updated multiple scripts in the micromet package to improve configuration handling, CSV path management, and file compilation. Moreover, introduced new dependencies in the environment file and pyproject.toml to support enhanced data visualization and statistical analysis features.
- Loading branch information
1 parent
f952b54
commit b964b01
Showing
5 changed files
with
280 additions
and
9 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import argparse | ||
import subprocess | ||
import os | ||
import sys | ||
|
||
|
||
def run_eddypro(system="win", mode="desktop", caller="console", environment=None, proj_file=None): | ||
""" | ||
Run the EddyPro engine with specified parameters. | ||
Args: | ||
system (str): Operating system. Options: 'win', 'linux', 'mac'. Default is 'win'. | ||
mode (str): Running mode. Options: 'embedded', 'desktop'. Default is 'desktop'. | ||
caller (str): Caller type. Options: 'gui', 'console'. Default is 'console'. | ||
environment (str): Working directory for embedded mode. Default is None. | ||
proj_file (str): Path to the project file (*.eddypro). Default is None. | ||
Returns: | ||
subprocess.CompletedProcess: Result of the subprocess run. | ||
""" | ||
# Construct the command | ||
command = ["eddypro_rp"] | ||
|
||
if system != "win": | ||
command.extend(["-s", system]) | ||
|
||
if mode != "desktop": | ||
command.extend(["-m", mode]) | ||
|
||
if caller != "console": | ||
command.extend(["-c", caller]) | ||
|
||
if environment: | ||
command.extend(["-e", environment]) | ||
|
||
if proj_file: | ||
command.append(proj_file) | ||
|
||
# Run the command | ||
try: | ||
result = subprocess.run(command, check=True, capture_output=True, text=True) | ||
print("EddyPro executed successfully.") | ||
print(result.stdout) | ||
return result | ||
except subprocess.CalledProcessError as e: | ||
print(f"Error executing EddyPro: {e}") | ||
print(e.stderr) | ||
return e | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Run EddyPro engine from command line") | ||
parser.add_argument("-s", "--system", choices=["win", "linux", "mac"], default="win", | ||
help="Operating system (default: win)") | ||
parser.add_argument("-m", "--mode", choices=["embedded", "desktop"], default="desktop", | ||
help="Running mode (default: desktop)") | ||
parser.add_argument("-c", "--caller", choices=["gui", "console"], default="console", | ||
help="Caller type (default: console)") | ||
parser.add_argument("-e", "--environment", help="Working directory for embedded mode") | ||
parser.add_argument("proj_file", nargs="?", help="Path to project file (*.eddypro)") | ||
|
||
args = parser.parse_args() | ||
|
||
# Ensure eddypro_rp is in the system PATH | ||
if not any(os.access(os.path.join(path, "eddypro_rp"), os.X_OK) for path in os.environ["PATH"].split(os.pathsep)): | ||
print( | ||
"Error: eddypro_rp is not found in the system PATH. Please add the EddyPro binary directory to your PATH.") | ||
sys.exit(1) | ||
|
||
run_eddypro(args.system, args.mode, args.caller, args.environment, args.proj_file) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import numpy as np | ||
from sklearn.cluster import KMeans | ||
from sklearn.preprocessing import StandardScaler | ||
from typing import Tuple | ||
|
||
from sklearn.neighbors import LocalOutlierFactor | ||
|
||
def detect_outliers_kmeans(timeseries: np.ndarray, n_clusters: int = 3, threshold: float = 2.0) -> Tuple[np.ndarray, np.ndarray]: | ||
""" | ||
Detect outliers in a time series using K-means clustering. | ||
Args: | ||
timeseries (np.ndarray): 1D numpy array containing the time series data. | ||
n_clusters (int): Number of clusters to use in K-means. Default is 5. | ||
threshold (float): Number of standard deviations from cluster center to consider as outlier. Default is 2.0. | ||
Returns: | ||
Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D numpy arrays: | ||
- Boolean array where True indicates an outlier. | ||
- Array of distances from each point to its nearest cluster center. | ||
""" | ||
# Reshape and scale the time series | ||
X = timeseries.reshape(-1, 1) | ||
scaler = StandardScaler() | ||
X_scaled = scaler.fit_transform(X) | ||
|
||
# Apply K-means clustering | ||
kmeans = KMeans(n_clusters=n_clusters, random_state=42) | ||
kmeans.fit(X_scaled) | ||
|
||
# Calculate distances to nearest cluster centers | ||
distances = np.min(kmeans.transform(X_scaled), axis=1) | ||
|
||
# Calculate mean and standard deviation of distances | ||
mean_distance = np.mean(distances) | ||
std_distance = np.std(distances) | ||
|
||
# Identify outliers | ||
is_outlier = distances > mean_distance + threshold * std_distance | ||
|
||
return is_outlier, distances | ||
|
||
# Example usage: | ||
# import numpy as np | ||
# import matplotlib.pyplot as plt | ||
# | ||
# # Generate sample data | ||
# np.random.seed(42) | ||
# timeseries = np.random.randn(1000) | ||
# timeseries[500:510] += 5 # Add some outliers | ||
# | ||
# # Detect outliers | ||
# is_outlier, distances = detect_outliers_kmeans(timeseries) | ||
# | ||
# # Print results | ||
# print(f"Number of outliers detected: {np.sum(is_outlier)}") | ||
# print(f"Indices of outliers: {np.where(is_outlier)[0]}") | ||
# | ||
# # Plot the results | ||
# plt.figure(figsize=(12, 6)) | ||
# plt.plot(timeseries, label='Time Series') | ||
# plt.scatter(np.where(is_outlier)[0], timeseries[is_outlier], color='red', label='Outliers') | ||
# plt.legend() | ||
# plt.title('Time Series with Detected Outliers') | ||
# plt.show() | ||
|
||
|
||
def detect_outliers_lof(timeseries: np.ndarray, n_neighbors: int = 20, contamination: float = 0.08) -> Tuple[np.ndarray, np.ndarray]: | ||
""" | ||
Detect outliers in a high-frequency time series using the Local Outlier Factor (LOF) algorithm. | ||
Args: | ||
timeseries (np.ndarray): 1D numpy array containing the time series data. | ||
n_neighbors (int): Number of neighbors to consider for each point. Default is 20. | ||
contamination (float): The proportion of outliers in the data set. Default is 0.08. | ||
Returns: | ||
Tuple[np.ndarray, np.ndarray]: A tuple containing two 1D numpy arrays: | ||
- Boolean array where True indicates an outlier. | ||
- Array of outlier scores (negative LOF values). | ||
""" | ||
# Reshape the time series for sklearn | ||
X = timeseries.reshape(-1, 1) | ||
|
||
# Initialize and fit the LOF model | ||
lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=contamination) | ||
y_pred = lof.fit_predict(X) | ||
|
||
# Get the outlier scores | ||
outlier_scores = lof.negative_outlier_factor_ | ||
|
||
# Create a boolean mask for outliers | ||
is_outlier = y_pred == -1 | ||
|
||
return is_outlier, outlier_scores | ||
|
||
# Example usage: | ||
# import numpy as np | ||
# | ||
# # Generate sample data | ||
# np.random.seed(42) | ||
# timeseries = np.random.randn(10000) | ||
# timeseries[5000:5010] += 10 # Add some outliers | ||
# | ||
# # Detect outliers | ||
# is_outlier, outlier_scores = detect_outliers_lof(timeseries) | ||
# | ||
# # Print results | ||
# print(f"Number of outliers detected: {np.sum(is_outlier)}") | ||
# print(f"Indices of outliers: {np.where(is_outlier)[0]}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
[METADATA] | ||
climate_file_path = ../station_data/US-UTE_amfluxeddy.csv | ||
station_latitude = 37.7353 | ||
station_longitude = -111.5708 | ||
station_elevation = 1729.3 | ||
missing_data_value = -9999 | ||
skiprows = 0 | ||
date_parser = %Y%m%d%H%M | ||
site_id = US-UTE | ||
country = USA | ||
flux_measurements_method = Eddy Covariance | ||
flux_measurements_variable = CO2 | ||
flux_measurements_operations = Continuous operation | ||
site_name = UFLUX Escalante | ||
igbp = CRO | ||
network = AmeriFlux | ||
research_topic = Validate remote-sensing data | ||
terrain = Flat | ||
aspect = FLAT | ||
site_funding = Colorado River Authority of Utah | ||
team_member_name = Paul Inkenbrandt | ||
team_member_role = Technician | ||
team_member_email = paulinkenbrandt@utah.gov | ||
team_member_institution = Utah Geological Survey | ||
url_ameriflux = http://ameriflux.lbl.gov/sites/siteinfo/US-UTW | ||
utc_offset = -7 | ||
mat = 10.83 | ||
map = 258.318 | ||
climate_koeppen = Bsk | ||
url = https://geology.utah.gov | ||
site_snow_cover_days = 0.0 | ||
state = UT | ||
|
||
[DATA] | ||
datestring_col = TIMESTAMP_START | ||
net_radiation_col = NETRAD | ||
net_radiation_units = w/m2 | ||
sensible_heat_flux_col = H | ||
sensible_heat_flux_units = w/m2 | ||
latent_heat_flux_col = LE | ||
latent_heat_flux_units = w/m2 | ||
ground_flux_col = G | ||
ground_flux_units = w/m2 | ||
shortwave_in_col = SW_IN | ||
shortwave_in_units = w/m2 | ||
shortwave_out_col = SW_OUT | ||
shortwave_out_units = w/m2 | ||
longwave_in_col = LW_IN | ||
longwave_in_units = w/m2 | ||
longwave_out_col = LW_OUT | ||
longwave_out_units = w/m2 | ||
vap_press_def_col = VPD | ||
vap_press_def_units = hPa | ||
avg_temp_col = T_SONIC | ||
avg_temp_units = C | ||
rel_humidity_col = RH_1_1_1 | ||
rel_humidity_units = (%): Relative humidity, range 0-100 | ||
wind_spd_col = WS | ||
wind_spd_units = m/s | ||
wind_dir_col = WD | ||
wind_dir_units = azimuth (degrees) | ||
theta_1 = SWC_1_1_1 | ||
theta_1_units = (%): Soil water content (volumetric), range 0-100 | ||
theta_2 = SWC_2_1_1 | ||
theta_2_units = (%): Soil water content (volumetric), range 0-100 | ||
|