nextflow-io · awgymer · Jan 30, 2024 · Oct 26, 2023 · Oct 26, 2023 · Dec 21, 2023
diff --git a/plugins/nf-validation/src/main/nextflow/validation/SamplesheetConverter.groovy b/plugins/nf-validation/src/main/nextflow/validation/SamplesheetConverter.groovy
@@ -97,18 +97,19 @@ class SamplesheetConverter {
         def Boolean headerCheck = true
         this.rows = []
         resetCount()
+
         def List outputs = samplesheetList.collect { Map<String,String> fullRow ->
             increaseCount()
 
-            Map<String,String> row = fullRow.findAll { it.value != "" }
+            Map<String,Object> row = fullRow.findAll { it.value != "" }
             def Set rowKeys = containsHeader ? row.keySet() : ["empty"].toSet()
-            def String yamlInfo = fileType == "yaml" ? " for entry ${this.getCount()}." : ""
+            def String entryInfo = fileType in ["yaml", "json"] ? " for entry ${this.getCount()}." : ""
 
             // Check the header (CSV/TSV) or present fields (YAML)
             if(headerCheck) {
                 def unexpectedFields = containsHeader ? rowKeys - allFields : []
                 if(unexpectedFields.size() > 0) {
-                    this.warnings << "The samplesheet contains following unchecked field(s): ${unexpectedFields}${yamlInfo}".toString()
+                    this.warnings << "The samplesheet contains following unchecked field(s): ${unexpectedFields}${entryInfo}".toString()
                 }
 
                 if(fileType != 'yaml'){
@@ -128,7 +129,7 @@ class SamplesheetConverter {
 
             for( Map.Entry<String, Map> field : schemaFields ){
                 def String key = containsHeader ? field.key : "empty"
-                def String input = row[key]
+                def Object input = row[key]
 
                 // Check if the field is deprecated
                 if(field['value']['deprecated']){
@@ -159,7 +160,7 @@ class SamplesheetConverter {
                     if(input in booleanUniques[key] && input){
                         this.errors << addSample("The '${key}' value needs to be unique. '${input}' was found at least twice in the samplesheet.".toString())
                     }
-                    booleanUniques[key].add(input)
+                    booleanUniques[key].add(input as String)
                 }
                 else if(unique && uniqueIsList) {
                     def Map<String,String> newMap = (Map) row.subMap((List) [key] + (List) unique)
@@ -176,20 +177,20 @@ class SamplesheetConverter {
                 def List<String> metaNames = field['value']['meta'] as List<String>
                 if(metaNames) {
                     for(name : metaNames) {
-                        meta[name] = (input != '' && input) ? 
-                                castToType(input, field) : 
-                            field['value']['default'] != null ? 
-                                castToType(field['value']['default'] as String, field) : 
+                        meta[name] = (input != '' && input != null) ?
+                                castToNFType(input, field) :
+                            field['value']['default'] != null ?
+                                castToNFType(field['value']['default'], field) :
                                 null
                     }
                 }
                 else {
-                    def inputFile = (input != '' && input) ? 
-                            castToType(input, field) : 
-                        field['value']['default'] != null ? 
-                            castToType(field['value']['default'] as String, field) : 
+                    def inputVal = (input != '' && input != null) ?
+                            castToNFType(input, field) :
+                        field['value']['default'] != null ?
+                            castToNFType(field['value']['default'], field) :
                             []
-                    output.add(inputFile)
+                    output.add(inputVal)
                 }
             }
             // Add meta to the output when a meta field has been created
@@ -253,26 +254,36 @@ class SamplesheetConverter {
     }
 
     // Function to transform an input field from the samplesheet to its desired type
-    private static castToType(
-        String input,
+    private static castToNFType(
+        Object input,
         Map.Entry<String, Map> field
     ) {
         def String type = field['value']['type']
         def String key = field.key
 
+        // Recursively call this function for each item in the array if the field is an array-type
+        // The returned values are collected into a single array
+        if (type == "array") {
+            def Map.Entry<String, Map> subfield = (Map.Entry<String, Map>) Map.entry(field.key, field['value']['items'])
+            log.debug "subfield = $subfield"
+            def ArrayList result = input.collect{ castToNFType(it, subfield) } as ArrayList
+            return result
+        }
+
+        def String inputStr = input as String
         // Convert string values
         if(type == "string" || !type) {
-            def String result = input as String
+            def String result = inputStr as String
 
             // Check and convert to the desired format
             def String format = field['value']['format']
             if(format) {
                 if(format == "file-path-pattern") {
-                    def ArrayList inputFiles = Nextflow.file(input) as ArrayList
+                    def ArrayList inputFiles = Nextflow.file(inputStr) as ArrayList
                     return inputFiles
                 }
                 if(format.contains("path")) {
-                    def Path inputFile = Nextflow.file(input) as Path
+                    def Path inputFile = Nextflow.file(inputStr) as Path
                     return inputFile
                 }
             }
@@ -285,36 +296,36 @@ class SamplesheetConverter {
         // Convert number values
         else if(type == "number") {
             try {
-                def int result = input as int
+                def int result = inputStr as int
                 return result
             }
             catch (NumberFormatException e) {
                 log.debug("Could not convert ${input} to an integer. Trying to convert to a float.")
             }
 
             try {
-                def float result = input as float
+                def float result = inputStr as float
                 return result
             }
             catch (NumberFormatException e) {
-                log.debug("Could not convert ${input} to a float. Trying to convert to a double.")
+                log.debug("Could not convert ${inputStr} to a float. Trying to convert to a double.")
             }
 
-            def double result = input as double
+            def double result = inputStr as double
             return result
         }
 
         // Convert integer values
         else if(type == "integer") {
 
-            def int result = input as int
+            def int result = inputStr as int
             return result
         }
 
         // Convert boolean values
         else if(type == "boolean") {
 
-            if(input.toLowerCase() == "true") {
+            if(inputStr.toLowerCase() == "true") {
                 return true
             }
             return false

diff --git a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy
@@ -184,23 +184,29 @@ class SchemaValidator extends PluginExtensionPoint {
         def String fileType = SamplesheetConverter.getFileType(samplesheetFile)
         def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null
         def List<Map<String,String>> fileContent
+        def List<Map<String,String>> fileContentCasted
         def Boolean s3PathCheck = params.validationS3PathCheck ? params.validationS3PathCheck : false
         def Map types = variableTypes(schemaFile.toString(), baseDir)
+        if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){
+            def msg = "Using \"type\": \"array\" in schema with a \".$fileType\" samplesheet is not supported\n"
+            log.error("ERROR: Validation of pipeline parameters failed!")
+            throw new SchemaValidationException(msg, [])
+        }
         def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "")
 
         if(!containsHeader){
             types = ["empty": types[""]]
         }
         if(fileType == "yaml"){
-            fileContent = new Yaml().load((samplesheetFile.text)).collect {
+            fileContentCasted = new Yaml().load((samplesheetFile.text)).collect {
                 if(containsHeader) {
                     return it as Map
                 }
                 return ["empty": it] as Map
             }
         }
         else if(fileType == "json"){
-            fileContent = new JsonSlurper().parseText(samplesheetFile.text).collect {
+            fileContentCasted = new JsonSlurper().parseText(samplesheetFile.text).collect {
                 if(containsHeader) {
                     return it as Map
                 }
@@ -209,8 +215,8 @@ class SchemaValidator extends PluginExtensionPoint {
         }
         else {
             fileContent = samplesheetFile.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
+            fileContentCasted = castToType(fileContent, types)
         }
-        def List<Map<String,String>> fileContentCasted = castToType(fileContent, types)
         if (validateFile(false, samplesheetFile.toString(), fileContentCasted, schemaFile.toString(), baseDir, s3PathCheck)) {
             log.debug "Validation passed: '$samplesheetFile' with '$schemaFile'"
         }
@@ -430,23 +436,29 @@ class SchemaValidator extends PluginExtensionPoint {
                     def String fileType = SamplesheetConverter.getFileType(file_path)
                     def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null
                     def List<Map<String,String>> fileContent
+                    def List<Map<String,String>> fileContentCasted
                     def Map types = variableTypes(schema_name, baseDir)
+                    if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){
+                        def msg = "${colors.red}Using {\"type\": \"array\"} in schema with a \".$fileType\" samplesheet is not supported${colors.reset}\n"
+                        log.error("ERROR: Validation of pipeline parameters failed!")
+                        throw new SchemaValidationException(msg, [])
+                    }
                     def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "")
 
                     if(!containsHeader){
                         types = ["empty": types[""]]
                     }
 
                     if(fileType == "yaml"){
-                        fileContent = new Yaml().load(file_path.text).collect {
+                        fileContentCasted = new Yaml().load(file_path.text).collect {
                             if(containsHeader) {
                                 return it as Map
                             }
                             return ["empty": it] as Map
                         }
                     }
                     else if(fileType == "json"){
-                        fileContent = new JsonSlurper().parseText(file_path.text).collect {
+                        fileContentCasted = new JsonSlurper().parseText(file_path.text).collect {
                             if(containsHeader) {
                                 return it as Map
                             }
@@ -455,8 +467,8 @@ class SchemaValidator extends PluginExtensionPoint {
                     }
                     else {
                         fileContent = file_path.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
+                        fileContentCasted = castToType(fileContent, types)
                     }
-                    def List<Map<String,String>> fileContentCasted = castToType(fileContent, types)
                     if (validateFile(useMonochromeLogs, key, fileContentCasted, schema_name, baseDir, s3PathCheck)) {
                         log.debug "Validation passed: '$key': '$file_path' with '$schema_name'"
                     }
@@ -554,6 +566,8 @@ class SchemaValidator extends PluginExtensionPoint {
         Boolean monochrome_logs, String paramName, Object fileContent, String schemaFilename, String baseDir, Boolean s3PathCheck = false
 
     ) {
+        // declare this once for the method
+        def colors = logColours(monochrome_logs)
 
         // Load the schema
         def String schema_string = Files.readString( Path.of(getSchemaPath(baseDir, schemaFilename)) )
@@ -591,7 +605,10 @@ class SchemaValidator extends PluginExtensionPoint {
         pathsToCheck.each { String fieldName ->
             for (int i=0; i < arrayJSON.size(); i++) {
                 def JSONObject entry = arrayJSON.getJSONObject(i)
-                if ( entry.has(fieldName) ) {
+                if ( entry.has(fieldName) && entry[fieldName] instanceof JSONArray ) {
+                    entry[fieldName].collect{ pathExists(it.toString(), " Entry ${(i+1).toString()} - ${fieldName.toString()}", s3PathCheck) }
+                }
+                else if ( entry.has(fieldName) ) {
                     pathExists(entry[fieldName].toString(), " Entry ${(i+1).toString()} - ${fieldName.toString()}", s3PathCheck)
                 }
             }
@@ -607,13 +624,11 @@ class SchemaValidator extends PluginExtensionPoint {
             validator.performValidation(schema, arrayJSON);
             if (this.hasErrors()) {
                 // Needed for custom errors such as pathExists() errors
-                def colors = logColours(monochrome_logs)
                 def msg = "${colors.red}The following errors have been detected:\n\n" + this.getErrors().join('\n').trim() + "\n${colors.reset}\n"
                 log.error("ERROR: Validation of '$paramName' file failed!")
                 throw new SchemaValidationException(msg, this.getErrors())
             }
         } catch (ValidationException e) {
-            def colors = logColours(monochrome_logs)
             JSONObject exceptionJSON = (JSONObject) e.toJSON()
             JSONObject objectJSON = new JSONObject();
             objectJSON.put("objects",arrayJSON);            
@@ -651,7 +666,10 @@ class SchemaValidator extends PluginExtensionPoint {
             def Map properties = (Map) group.value['properties']
             for (p in properties) {
                 def String key = (String) p.key
-                def Map property = properties[key] as Map
+                def Map<String,Object> property = properties[key] as Map
+                if(property.containsKey('items')){
+                    property = property.items as Map
+                }
                 if (property.containsKey('exists') && property.containsKey('format')) {
                     if (property['exists'] && (property['format'] == 'file-path' || property['format'] == 'directory-path' || property['format'] == 'path') ) {
                         exists.push(key)

diff --git a/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy
@@ -867,4 +867,28 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
         error.message == '''The following errors have been detected:\n\n* -- Entry 1: Missing required value: sample\n* -- Entry 2: Missing required value: sample\n\n'''
         !stdout
     }
-}
+
+    def 'should fail because of arrays with csv' () {
+        given:
+        def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json').toAbsolutePath().toString()
+        def  SCRIPT_TEXT = """
+            params.monochrome_logs = true
+            params.input = 'src/testResources/correct.csv'
+            include { validateParameters } from 'plugin/nf-validation'
+
+            validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs)
+        """
+
+        when:
+        dsl_eval(SCRIPT_TEXT)
+        def stdout = capture
+                .toString()
+                .readLines()
+                .findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }
+
+        then:
+        def error = thrown(SchemaValidationException)
+        error.message == '''Using {"type": "array"} in schema with a ".csv" samplesheet is not supported\n'''
+        !stdout
+    }
+}