Skip to content
This repository has been archived by the owner on Aug 20, 2024. It is now read-only.

Add array support for samplesheets #128

Merged
merged 9 commits into from
Jan 30, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,19 @@ class SamplesheetConverter {
def Boolean headerCheck = true
this.rows = []
resetCount()

def List outputs = samplesheetList.collect { Map<String,String> fullRow ->
increaseCount()

Map<String,String> row = fullRow.findAll { it.value != "" }
Map<String,Object> row = fullRow.findAll { it.value != "" }
def Set rowKeys = containsHeader ? row.keySet() : ["empty"].toSet()
def String yamlInfo = fileType == "yaml" ? " for entry ${this.getCount()}." : ""
def String entryInfo = fileType in ["yaml", "json"] ? " for entry ${this.getCount()}." : ""

// Check the header (CSV/TSV) or present fields (YAML)
if(headerCheck) {
def unexpectedFields = containsHeader ? rowKeys - allFields : []
if(unexpectedFields.size() > 0) {
this.warnings << "The samplesheet contains following unchecked field(s): ${unexpectedFields}${yamlInfo}".toString()
this.warnings << "The samplesheet contains following unchecked field(s): ${unexpectedFields}${entryInfo}".toString()
}

if(fileType != 'yaml'){
Expand All @@ -128,7 +129,7 @@ class SamplesheetConverter {

for( Map.Entry<String, Map> field : schemaFields ){
def String key = containsHeader ? field.key : "empty"
def String input = row[key]
def Object input = row[key]

// Check if the field is deprecated
if(field['value']['deprecated']){
Expand Down Expand Up @@ -159,7 +160,7 @@ class SamplesheetConverter {
if(input in booleanUniques[key] && input){
this.errors << addSample("The '${key}' value needs to be unique. '${input}' was found at least twice in the samplesheet.".toString())
}
booleanUniques[key].add(input)
booleanUniques[key].add(input as String)
}
else if(unique && uniqueIsList) {
def Map<String,String> newMap = (Map) row.subMap((List) [key] + (List) unique)
Expand All @@ -176,20 +177,20 @@ class SamplesheetConverter {
def List<String> metaNames = field['value']['meta'] as List<String>
if(metaNames) {
for(name : metaNames) {
meta[name] = (input != '' && input) ?
castToType(input, field) :
field['value']['default'] != null ?
castToType(field['value']['default'] as String, field) :
meta[name] = (input != '' && input != null) ?
castToNFType(input, field) :
field['value']['default'] != null ?
castToNFType(field['value']['default'], field) :
null
}
}
else {
def inputFile = (input != '' && input) ?
castToType(input, field) :
field['value']['default'] != null ?
castToType(field['value']['default'] as String, field) :
def inputVal = (input != '' && input != null) ?
castToNFType(input, field) :
field['value']['default'] != null ?
castToNFType(field['value']['default'], field) :
[]
output.add(inputFile)
output.add(inputVal)
}
}
// Add meta to the output when a meta field has been created
Expand Down Expand Up @@ -253,26 +254,36 @@ class SamplesheetConverter {
}

// Function to transform an input field from the samplesheet to its desired type
private static castToType(
String input,
private static castToNFType(
Object input,
Map.Entry<String, Map> field
) {
def String type = field['value']['type']
def String key = field.key

// Recursively call this function for each item in the array if the field is an array-type
// The returned values are collected into a single array
if (type == "array") {
mirpedrol marked this conversation as resolved.
Show resolved Hide resolved
def Map.Entry<String, Map> subfield = (Map.Entry<String, Map>) Map.entry(field.key, field['value']['items'])
log.debug "subfield = $subfield"
def ArrayList result = input.collect{ castToNFType(it, subfield) } as ArrayList
return result
}

def String inputStr = input as String
// Convert string values
if(type == "string" || !type) {
def String result = input as String
def String result = inputStr as String

// Check and convert to the desired format
def String format = field['value']['format']
if(format) {
if(format == "file-path-pattern") {
def ArrayList inputFiles = Nextflow.file(input) as ArrayList
def ArrayList inputFiles = Nextflow.file(inputStr) as ArrayList
return inputFiles
}
if(format.contains("path")) {
def Path inputFile = Nextflow.file(input) as Path
def Path inputFile = Nextflow.file(inputStr) as Path
return inputFile
}
}
Expand All @@ -285,36 +296,36 @@ class SamplesheetConverter {
// Convert number values
else if(type == "number") {
try {
def int result = input as int
def int result = inputStr as int
return result
}
catch (NumberFormatException e) {
log.debug("Could not convert ${input} to an integer. Trying to convert to a float.")
}

try {
def float result = input as float
def float result = inputStr as float
return result
}
catch (NumberFormatException e) {
log.debug("Could not convert ${input} to a float. Trying to convert to a double.")
log.debug("Could not convert ${inputStr} to a float. Trying to convert to a double.")
}

def double result = input as double
def double result = inputStr as double
return result
}

// Convert integer values
else if(type == "integer") {

def int result = input as int
def int result = inputStr as int
return result
}

// Convert boolean values
else if(type == "boolean") {

if(input.toLowerCase() == "true") {
if(inputStr.toLowerCase() == "true") {
return true
}
return false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,23 +184,29 @@ class SchemaValidator extends PluginExtensionPoint {
def String fileType = SamplesheetConverter.getFileType(samplesheetFile)
def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null
def List<Map<String,String>> fileContent
def List<Map<String,String>> fileContentCasted
def Boolean s3PathCheck = params.validationS3PathCheck ? params.validationS3PathCheck : false
def Map types = variableTypes(schemaFile.toString(), baseDir)
if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){
def msg = "Using \"type\": \"array\" in schema with a \".$fileType\" samplesheet is not supported\n"
log.error("ERROR: Validation of pipeline parameters failed!")
throw new SchemaValidationException(msg, [])
}
def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "")

if(!containsHeader){
types = ["empty": types[""]]
}
if(fileType == "yaml"){
fileContent = new Yaml().load((samplesheetFile.text)).collect {
fileContentCasted = new Yaml().load((samplesheetFile.text)).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else if(fileType == "json"){
fileContent = new JsonSlurper().parseText(samplesheetFile.text).collect {
fileContentCasted = new JsonSlurper().parseText(samplesheetFile.text).collect {
if(containsHeader) {
return it as Map
}
Expand All @@ -209,8 +215,8 @@ class SchemaValidator extends PluginExtensionPoint {
}
else {
fileContent = samplesheetFile.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
fileContentCasted = castToType(fileContent, types)
}
def List<Map<String,String>> fileContentCasted = castToType(fileContent, types)
if (validateFile(false, samplesheetFile.toString(), fileContentCasted, schemaFile.toString(), baseDir, s3PathCheck)) {
log.debug "Validation passed: '$samplesheetFile' with '$schemaFile'"
}
Expand Down Expand Up @@ -430,23 +436,29 @@ class SchemaValidator extends PluginExtensionPoint {
def String fileType = SamplesheetConverter.getFileType(file_path)
def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null
def List<Map<String,String>> fileContent
def List<Map<String,String>> fileContentCasted
def Map types = variableTypes(schema_name, baseDir)
if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){
def msg = "${colors.red}Using {\"type\": \"array\"} in schema with a \".$fileType\" samplesheet is not supported${colors.reset}\n"
log.error("ERROR: Validation of pipeline parameters failed!")
throw new SchemaValidationException(msg, [])
}
def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "")

if(!containsHeader){
types = ["empty": types[""]]
}

if(fileType == "yaml"){
fileContent = new Yaml().load(file_path.text).collect {
fileContentCasted = new Yaml().load(file_path.text).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else if(fileType == "json"){
fileContent = new JsonSlurper().parseText(file_path.text).collect {
fileContentCasted = new JsonSlurper().parseText(file_path.text).collect {
if(containsHeader) {
return it as Map
}
Expand All @@ -455,8 +467,8 @@ class SchemaValidator extends PluginExtensionPoint {
}
else {
fileContent = file_path.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
fileContentCasted = castToType(fileContent, types)
}
def List<Map<String,String>> fileContentCasted = castToType(fileContent, types)
if (validateFile(useMonochromeLogs, key, fileContentCasted, schema_name, baseDir, s3PathCheck)) {
log.debug "Validation passed: '$key': '$file_path' with '$schema_name'"
}
Expand Down Expand Up @@ -554,6 +566,8 @@ class SchemaValidator extends PluginExtensionPoint {
Boolean monochrome_logs, String paramName, Object fileContent, String schemaFilename, String baseDir, Boolean s3PathCheck = false

) {
// declare this once for the method
def colors = logColours(monochrome_logs)

// Load the schema
def String schema_string = Files.readString( Path.of(getSchemaPath(baseDir, schemaFilename)) )
Expand Down Expand Up @@ -591,7 +605,10 @@ class SchemaValidator extends PluginExtensionPoint {
pathsToCheck.each { String fieldName ->
for (int i=0; i < arrayJSON.size(); i++) {
def JSONObject entry = arrayJSON.getJSONObject(i)
if ( entry.has(fieldName) ) {
if ( entry.has(fieldName) && entry[fieldName] instanceof JSONArray ) {
entry[fieldName].collect{ pathExists(it.toString(), " Entry ${(i+1).toString()} - ${fieldName.toString()}", s3PathCheck) }
}
else if ( entry.has(fieldName) ) {
pathExists(entry[fieldName].toString(), " Entry ${(i+1).toString()} - ${fieldName.toString()}", s3PathCheck)
}
}
Expand All @@ -607,13 +624,11 @@ class SchemaValidator extends PluginExtensionPoint {
validator.performValidation(schema, arrayJSON);
if (this.hasErrors()) {
// Needed for custom errors such as pathExists() errors
def colors = logColours(monochrome_logs)
def msg = "${colors.red}The following errors have been detected:\n\n" + this.getErrors().join('\n').trim() + "\n${colors.reset}\n"
log.error("ERROR: Validation of '$paramName' file failed!")
throw new SchemaValidationException(msg, this.getErrors())
}
} catch (ValidationException e) {
def colors = logColours(monochrome_logs)
JSONObject exceptionJSON = (JSONObject) e.toJSON()
JSONObject objectJSON = new JSONObject();
objectJSON.put("objects",arrayJSON);
Expand Down Expand Up @@ -651,7 +666,10 @@ class SchemaValidator extends PluginExtensionPoint {
def Map properties = (Map) group.value['properties']
for (p in properties) {
def String key = (String) p.key
def Map property = properties[key] as Map
def Map<String,Object> property = properties[key] as Map
if(property.containsKey('items')){
property = property.items as Map
}
if (property.containsKey('exists') && property.containsKey('format')) {
if (property['exists'] && (property['format'] == 'file-path' || property['format'] == 'directory-path' || property['format'] == 'path') ) {
exists.push(key)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -867,4 +867,28 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
error.message == '''The following errors have been detected:\n\n* -- Entry 1: Missing required value: sample\n* -- Entry 2: Missing required value: sample\n\n'''
!stdout
}
}

def 'should fail because of arrays with csv' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.monochrome_logs = true
params.input = 'src/testResources/correct.csv'
include { validateParameters } from 'plugin/nf-validation'

validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs)
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
def error = thrown(SchemaValidationException)
error.message == '''Using {"type": "array"} in schema with a ".csv" samplesheet is not supported\n'''
!stdout
}
}
Loading