local commit

Merge branch 'master' of https://github.com/USEPA/LakeCat into master # Conflicts: # .gitignore
USEPA · Mar 30, 2021 · f1f3651 · f1f3651
2 parents 4d95071 + 6855ebd
commit f1f3651
Show file tree

Hide file tree

Showing 4 changed files with 73 additions and 37 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,8 @@
+<<<<<<< HEAD
 .Rproj.user
 .Rhistory
 .RData
 .Ruserdata
+=======
+lake_cat_config.py
+>>>>>>> 6855ebd44f8253ab82168173af248a140c6b3ba6
diff --git a/LakeCat_functions.py b/LakeCat_functions.py
@@ -1131,15 +1131,15 @@ def main (nhd, out):
         os.mkdir("%s/shps" % out)
         os.mkdir("%s/joinTables" % out)
         os.mkdir("%s/LakeCat_npy" % out)
-    
-    
-#    NHDbounds = gpd.read_file(
-#                            "%s/NHDPlusGlobalData/BoundaryUnit.shp" % nhd).drop(
-#                                            ['AreaSqKM','DrainageID','Shape_Area',
-#                                             'Shape_Leng','UnitName'], axis=1)
-#    if not os.path.exists("%s/Lake_QA.csv" % out):
-#        NHDtblMerge(nhd, NHDbounds, out)
-#    makeBasins(nhd, NHDbounds, out)
+
+
+    NHDbounds = gpd.read_file(
+                            "%s/NHDPlusGlobalData/BoundaryUnit.shp" % nhd).drop(
+                                            ['AreaSqKM','DrainageID','Shape_Area',
+                                             'Shape_Leng','UnitName'], axis=1)
+    if not os.path.exists("%s/Lake_QA.csv" % out):
+        NHDtblMerge(nhd, NHDbounds, out)
+    makeBasins(nhd, NHDbounds, out)
     makeNParrays('%s' % out)
 ##############################################################################
 

diff --git a/MakeFinalTables_LakeCat.py b/MakeFinalTables_LakeCat.py
@@ -7,45 +7,40 @@
 #
 
 import sys, os
+import zipfile
 import pandas as pd
-#from collections import  OrderedDict
-ctl = pd.read_csv(sys.argv[1]) #ctl = pd.read_csv('D:/Projects/LakeCat_scrap/ControlTable_LakeCat_RD.csv')
-ctl = pd.read_csv(r'F:/GitProjects/LakeCat/ControlTable_LakeCat.csv')
-ctl = pd.read_csv('F:/GitProjects/NARS/Landscape Metrics/ControlTable_LakeCat_NLA17.csv')
-#inputs = OrderedDict([('10U','MS'),('10L','MS'),('07','MS'),('11','MS'),('06','MS'),('05','MS'),('08','MS'),\
-#                      ('01','NE'),('02','MA'),('03N','SA'),('03S','SA'),('03W','SA'),('04','GL'),('09','SR'),\
-#                      ('12','TX'),('13','RG'),('14','CO'),('15','CO'),('16','GB'),('17','PN'),('18','CA')])
-inDir = ctl.DirectoryLocations.values[2]
-outDir = ctl.DirectoryLocations.values[6]
-tables = dict()
-for row in range(len(ctl.Final_Table_Name)):
-    if ctl.run[row] == 1 and  len(ctl.Final_Table_Name[row]):
-        tables[ctl.Final_Table_Name[row]] = ctl.FullTableName.loc[ctl.Final_Table_Name == ctl.Final_Table_Name[row]].tolist()
-        tables[ctl.Final_Table_Name[row]].sort()
+
+from lake_cat_config import FINAL_DIR, OUT_DIR
+
+ctl = pd.read_csv("ControlTable_LakeCat.csv")
+
+runners = ctl.query("run == 1").groupby("Final_Table_Name")
+tables = runners["FullTableName"].unique().to_dict()
 missing = []
-for table in tables:
-    for var in range(len(tables[table])):
-        if not os.path.exists(inDir + '/%s.csv'%(tables[table][var])):
-            missing.append(tables[table][var] + '.csv')
+for table, metrics in tables.items():
+    for metric in metrics:
+        accumulated_file = OUT_DIR + "/{}.csv".format(metric)
+        if not os.path.exists(accumulated_file):
+            missing.append(metric)
+
 if len(missing) > 0:
     for miss in missing:
-        print 'Missing ' + miss
-    print 'Check output from LakeCat.py'
+        print('Missing ' + miss)
+    print('Check output from LakeCat.py')
     sys.exit()
 allStats = pd.DataFrame()
 for table in tables:
-    if not os.path.exists(outDir +'/' + table + '.csv'):
+    if not os.path.exists(FINAL_DIR +'/' + table + '.csv'):
         print 'Running ' + table + ' .....'
         for var in range(len(tables[table])):
-            print var
             accum = ctl.accum_type.loc[ctl.Final_Table_Name == table].any()
             metricName = ctl.MetricName.loc[ctl.FullTableName == tables[table][var]].item()
             metricType = ctl.MetricType.loc[ctl.FullTableName == tables[table][var]].item()
             appendMetric = ctl.AppendMetric.loc[ctl.FullTableName == tables[table][var]].item()
             if appendMetric == 'none':
                 appendMetric = ''
             conversion = float(ctl.Conversion.loc[ctl.FullTableName == tables[table][var]].values[0])
-            tbl = pd.read_csv(inDir + '/%s.csv'%(tables[table][var]))
+            tbl = pd.read_csv(OUT_DIR + '/%s.csv'%(tables[table][var]))
             frontCols = [title for title in tbl.columns for x in ['COMID','AreaSqKm','PctFull','inStreamCat'] if x in title and not 'Up' in title]
             catArea = frontCols[1]
             catPct = frontCols[2]
@@ -125,12 +120,14 @@
                                                        'MIN': [final[c].min()],
                                                        'MAX':[final[c].max()]})])
         allStats = pd.concat([allStats,statTbl])
+        print(statTbl)
         final = final.set_index('COMID').fillna('NA')
         final = final[final.columns.tolist()[:5] + [x for x in final.columns[5:] if 'Cat' in x] + [x for x in final.columns[5:] if 'Ws' in x]].fillna('NA')
-        final.to_csv('%s/%s.csv' % (outDir, table))
-
+        out_file = '%s/%s.csv' % (FINAL_DIR, table)
+        final.to_csv(out_file)
+        # zip up the file....
+        zf = zipfile.ZipFile("{}/zips/{}.zip".format(FINAL_DIR, table), mode="w")
+        zf.write(out_file, "{}.csv".format(table), compress_type=zipfile.ZIP_DEFLATED)
+        zf.close()
 print 'All Done.....'
 
-
-#    if summaryfield != None:
-#        off.columns = [col.replace('M3','') for col in off.columns]
diff --git a/lake_cat_config.py.template b/lake_cat_config.py.template
@@ -0,0 +1,35 @@
+
+
+# location of the landscape layers to perform statistics on
+LYR_DIR = 'C:/path/to/landscape/layers'
+
+# path to the framework created for LakeCat
+FRAMEWORK = ("L:/Priv/CORFiles/Geospatial_Library_Projects/LakeCat/"
+             "LakeCat_Framework")
+
+# path to Allocated StreamCat files
+STREAMCAT_DIR = ("L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/"
+                 "Allocation_and_Accumulation")
+
+# location of the NHDPlus, indicate the top-level directory
+NHD_DIR = 'D:/NHDPlusV21'
+
+# TIGER shapefile is needed for calculaitng the PctFUll values of Point type
+# metrics as well as organizing the final tables into separate files for states.
+STATES_FILE = "path/to/file/tl_2008_us_state.shp"
+
+# location to write out accumulated StreamCat data <- this is intermediate
+# these files will hold statistics for...
+#   $Cat -> statistics for each catchment
+#   $UpCat -> the accumulated upstream catchment statistics
+#   $Ws -> statistics for the entire watershed
+OUT_DIR = ('C:/path/to/write/out/files/to')
+
+# location for the  final tables
+FINAL_DIR = "L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/FTP_Staging/Hydroregions"
+
+# files that hold the pct_full data, created from ???
+pct_full_file	= "L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/ALL_BORDER_CATS.csv"
+pct_full_file_RP100 = "L:/Priv/CORFiles/Geospatial_Library_Projects/StreamCat/ControlTables/ALL_BORDER_CATS_Rp100.csv"
+
+#final_tables_dir	L:/Priv/CORFiles/Geospatial_Library/Data/Project/StreamCat/FTP_Staging/StreamCat/HydroRegions