Skip to content

Commit

Permalink
Merge pull request #11 from AgPipeline/data_quality_checks
Browse files Browse the repository at this point in the history
Truncate calculated values to significant digits
  • Loading branch information
Chris-Schnaufer authored Nov 19, 2020
2 parents 5f76a6b + abf14de commit 04bec29
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datetime
import logging
import math
import numbers
import os
import random
import time
Expand Down Expand Up @@ -58,6 +59,8 @@
FILE_NAME_GEO_CSV = "rgb_plot_geo.csv"
FILE_NAME_BETYDB_CSV = "rgb_plot_betydb.csv"

# The number of significant digits to keep
SIGNIFICANT_DIGITS = 3

class __internal__:
"""Class containing functions for this file only
Expand Down Expand Up @@ -759,7 +762,7 @@ def perform_process(self, environment: Environment, check_md: dict, transformer_
Return:
Returns a dictionary with the results of processing
"""
# pylint: disable=unused-argument
# pylint: disable=unused-argument, no-self-use
# The following pylint disables are here because to satisfy them would make the code unreadable
# pylint: disable=too-many-statements, too-many-locals, too-many-branches

Expand Down Expand Up @@ -802,6 +805,7 @@ def perform_process(self, environment: Environment, check_md: dict, transformer_
num_image_files = 0
entries_written = 0
additional_files_list = []
significant_digits_format = '.' + str(SIGNIFICANT_DIGITS) + 'g'
for one_file in __internal__.filter_file_list_by_ext(check_md['list_files'](), KNOWN_IMAGE_FILE_EXTS):

plot_name = None
Expand Down Expand Up @@ -834,16 +838,22 @@ def perform_process(self, environment: Environment, check_md: dict, transformer_

# Write the data points geographically and otherwise
for idx, trait_name in enumerate(variable_names):
# Get numbers truncated to significant digits
if isinstance(values[idx], numbers.Number):
value_str = format(values[idx], significant_digits_format)
else:
value_str = str(values[idx])

# Geostreams can only handle one field at a time so we write out one row per field/value pair
geo_traits['trait'] = trait_name
geo_traits['value'] = str(values[idx])
geo_traits['value'] = value_str
if write_geostreams_csv:
__internal__.write_trait_csv(geostreams_csv_file, geo_csv_header, geo_fields, geo_traits)

# csv and BETYdb can handle wide rows with multiple values so we just set the field
# values here and write the single row after the loop
csv_traits[variable_names[idx]] = str(values[idx])
bety_traits[variable_names[idx]] = str(values[idx])
csv_traits[variable_names[idx]] = value_str
bety_traits[variable_names[idx]] = value_str

csv_traits['site'] = plot_name
csv_traits['timestamp'] = datestamp
Expand Down

0 comments on commit 04bec29

Please sign in to comment.