diff --git a/parsers/CKAN/profiler/profilers/quality/measures/completeness.js b/parsers/CKAN/profiler/profilers/quality/measures/completeness.js index b439b95..1974e2d 100644 --- a/parsers/CKAN/profiler/profilers/quality/measures/completeness.js +++ b/parsers/CKAN/profiler/profilers/quality/measures/completeness.js @@ -40,7 +40,7 @@ function completeness(parent, dataset) { * */ - checkMetaField("url", root, URLs); + if (_.has(root, "url") && root["url"]) URLs++ // Do the async loop on the resources and do the necessary checks completeness.async.eachSeries(root.resources,function(resource, asyncCallback){ @@ -55,10 +55,10 @@ function completeness(parent, dataset) { dataSerializations.push(resource.format); // Check if the format contains an exemplary URL if (exemplaryURLS.indexOf(resource.format) > -1) - profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.37", 1); + profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.37", 0); // Check if format contains void or dcat which are dataset descriptions vocabularies [format should be meta/void, meta/dcat] if (resource.format.indexOf("void") > -1 || resource.format.indexOf("dcat") > -1) - profileTemplate.setQualityIndicatorScore("completeness", "QI.4", 1); + profileTemplate.setQualityIndicatorScore("completeness", "QI.4", 0); } } @@ -87,8 +87,8 @@ function completeness(parent, dataset) { unreachableURLs++; if (!completeness.util.validator.isURL(resource.url)) inCorrectURLs++; - checkMetaField("size", resource, sizeInformation); - checkMetaField("mimetype", resource, MIMEInformation); + if (_.has(resource, "size") && resource["size"]) sizeInformation++; + if (_.has(resource, "mimetype") && resource["mimetype"]) MIMEInformation++ // Signal the async callback to switch to the next async.series asyncCallback(); @@ -99,15 +99,15 @@ function completeness(parent, dataset) { * The check we need to do now is related to completeness and availability since the URL is available */ - checkMetaField("size", resource, sizeInformation); - checkMetaField("mimetype", resource, MIMEInformation); + if (_.has(resource, "size") && resource["size"]) sizeInformation++; + if (_.has(resource, "mimetype") && resource["mimetype"]) MIMEInformation++ // check if there is a resource representing a data dump if ( (_.has(resource, "description") && resource.description) && resource.description.toLowerCase().indexOf("dump") > -1) - profileTemplate.setQualityIndicatorScore("availability", "QI.18", 1); + profileTemplate.setQualityIndicatorScore("availability", "QI.18", 0); // Check if there is a resource representing an API if (_.has(resource, "resource_type") && resource.resource_type && resource.resource_type.indexOf("api") > -1) - profileTemplate.setQualityIndicatorScore("availability", "QI.19", 1); + profileTemplate.setQualityIndicatorScore("availability", "QI.19", 0); // Check if we can extract a size and MIME type from the HTTP Head and check if they match the defined values if (_.has(resource, "size") && response.headers["content-length"]) { @@ -144,16 +144,15 @@ function completeness(parent, dataset) { var serializationsNumber = _.intersection(serializations, _.unique(dataSerializations)).length; if (accessPointsNumber < accessPoints.length) { - profileTemplate.setQualityIndicatorScore("completeness", "QI.3", (accessPoints.length - accessPointsNumber) / accessPoints.length); + profileTemplate.setQualityIndicatorScore("completeness", "QI.3", accessPointsNumber / accessPoints.length); } if (serializationsNumber < serializations.length) { - profileTemplate.setQualityIndicatorScore("completeness", "QI.2", (serializations.length - serializationsNumber) / serializations.length); + profileTemplate.setQualityIndicatorScore("completeness", "QI.2", serializationsNumber / serializations.length); } - profileTemplate.setQualityIndicatorScore("completeness", "QI.5", (num_resources - sizeInformation) / num_resources); profileTemplate.setQualityIndicatorScore("completeness", "QI.6", (num_resources - MIMEInformation) / num_resources); - profileTemplate.setQualityIndicatorScore("correctness", "QI.25", ((num_resources - MIMEInformation) - inCorrectMIME) / num_resources); - profileTemplate.setQualityIndicatorScore("correctness", "QI.26", ((num_resources - sizeInformation) - inCorrectSize) / num_resources); + profileTemplate.setQualityIndicatorScore("correctness", "QI.25", inCorrectMIME / num_resources); + profileTemplate.setQualityIndicatorScore("correctness", "QI.26", inCorrectSize / num_resources); if (_.has(root, "url")) { @@ -169,11 +168,11 @@ function completeness(parent, dataset) { // This function is executed to check the tags and categorization infomration aftet the dataset URL check function process() { // set the number of URLs defined - profileTemplate.setQualityIndicatorScore("completeness", "QI.9", URLs / num_resources); + profileTemplate.setQualityIndicatorScore("completeness", "QI.9", (num_resources - URLs) / num_resources); // Set the number of unreachable URLs in the completenss Score - profileTemplate.setQualityIndicatorScore("availability", "QI.20", (URLs - unreachableURLs) / URLs); + profileTemplate.setQualityIndicatorScore("availability", "QI.20", unreachableURLs / URLs); // Set the number of syntactically valid URLs in the completenss Score - profileTemplate.setQualityIndicatorScore("correctness", "QI.28", (URLs - inCorrectURLs) / URLs); + profileTemplate.setQualityIndicatorScore("correctness", "QI.28", inCorrectURLs / URLs); // Call the series of validation checks i want to run on the dataset completeness.async.series([checkTags, checkGroup], function(err){ profileTemplate.setQualityIndicatorScore("completeness", "QI.7", (groupsErrors + tagsErrors) / 2); @@ -194,7 +193,7 @@ function completeness(parent, dataset) { }); var totalTagFields = tagsKeys.length * num_tags; - tagsErrors = ((totalTagFields - tagsError) / totalTagFields); + tagsErrors = (tagsError / totalTagFields); callback(); } else callback(); } @@ -213,7 +212,7 @@ function completeness(parent, dataset) { },function(err){ var totalGroupFields = groupsKeys.length * num_groups; - groupsErrors = ((totalGroupFields - groupError) / totalGroupFields); + groupsErrors = (groupError / totalGroupFields); callback(); }); @@ -226,17 +225,6 @@ function completeness(parent, dataset) { // The quality checks have been completed qualityCallback(null, profileTemplate); } - - /* This function will check the existence of a field in a JSON section - * The checks will update a value that is passed ot the function - */ - function checkMetaField(field, section, value) { - if (_.has(section, field)) { - if (_.isUndefined(section[field]) || _.isNull(section[field]) || ( _.isString(section[field]) && section[field].length == 0)) { - value++; - } else value++; - } - } } } diff --git a/parsers/CKAN/profiler/profilers/quality/measures/licensing.js b/parsers/CKAN/profiler/profilers/quality/measures/licensing.js index 75fc3a0..ea93595 100644 --- a/parsers/CKAN/profiler/profilers/quality/measures/licensing.js +++ b/parsers/CKAN/profiler/profilers/quality/measures/licensing.js @@ -17,7 +17,7 @@ function licensing(parent, dataset) { if (_.has(root, "license_url") && root.license_url) { licensing.util.checkAddress(root.license_url, function(error, body, response) { if (!error){ - profileTemplate.setQualityIndicatorScore("licensing", "QI.22", 1); + profileTemplate.setQualityIndicatorScore("licensing", "QI.22", 0); } process(); }, "HEAD"); diff --git a/parsers/CKAN/profiler/profilers/quality/measures/provenance.js b/parsers/CKAN/profiler/profilers/quality/measures/provenance.js index 90b1c53..2f0d1ef 100644 --- a/parsers/CKAN/profiler/profilers/quality/measures/provenance.js +++ b/parsers/CKAN/profiler/profilers/quality/measures/provenance.js @@ -38,7 +38,7 @@ function provenance(parent, dataset) { profileTemplate.setQualityIndicatorScore("provenance", "QI.46", (provQualityCounter / provMetadtaKeys.length)); if (ownershipDetails) - profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.40", 1); + profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.40", 0); // The quality checks have been completed qualityCallback(null, profileTemplate); diff --git a/parsers/CKAN/profiler/profilers/quality/qualityProfiler.js b/parsers/CKAN/profiler/profilers/quality/qualityProfiler.js index 7906d16..3226443 100644 --- a/parsers/CKAN/profiler/profilers/quality/qualityProfiler.js +++ b/parsers/CKAN/profiler/profilers/quality/qualityProfiler.js @@ -24,7 +24,7 @@ function qualityProfiler(parent) { if (dataset) { // There is a valid dataset metadata file (Q1.1) - profileTemplate.setQualityIndicatorScore("completeness", "QI.1", 1); + profileTemplate.setQualityIndicatorScore("completeness", "QI.1", 0); var root = dataset.result ? dataset.result : dataset; diff --git a/util/qualityModel.js b/util/qualityModel.js index 69adc35..ce1d260 100644 --- a/util/qualityModel.js +++ b/util/qualityModel.js @@ -4,136 +4,136 @@ function qualityModel() { "QI.1": { "description": "Existence of supporting structured metadata", "weight": 1, - "score": 0 + "score": 1 }, "QI.2": { "description": "Supports multiple serializations", "weight": 1, - "score": 0 + "score": 1 }, "QI.3": { "description": "Has different data access points", "weight": 1, - "score": 0 + "score": 1 }, "QI.4": { "description": "Uses datasets description vocabularies", "weight": 1, - "score": 0 + "score": 1 }, "QI.5": { "description": "Existence of descriptions about its size", "weight": 1, - "score": 0 + "score": 1 }, "QI.6": { "description": "Existence of descriptions about its structure (MIME Type, Format)", "weight": 1, - "score": 0 + "score": 1 }, "QI.7": { "description": "Existence of descriptions about its organization and categorization", "weight": 1, - "score": 0 + "score": 1 }, "QI.9": { "description": "Existence of dereferencable links for the dataset and its resources", "weight": 1, - "score": 0 + "score": 1 } }, "availability": { "QI.18": { "description": "Existence of an RDF dump that can be downloaded by users", "weight": 1, - "score": 0 + "score": 1 }, "QI.19": { "description": "Existence of queryable endpoints that respond to direct queries", "weight": 1, - "score": 0 + "score": 1 }, "QI.20": { "description": "Existence of valid dereferencable URLs (respond to HTTP request)", "weight": 1, - "score": 0 + "score": 1 } }, "licensing": { "QI.21": { "description": "Existence of human and machine readable license information", "weight": 1, - "score": 0 + "score": 1 }, "QI.22": { "description": "Existence of dereferencable links to the full license information", "weight": 1, - "score": 0 + "score": 1 } }, "freshness": { "QI.24": { "description": "Existence of timestamps that can keep track of its modifications", "weight": 1, - "score": 0 + "score": 1 } }, "correctness": { "QI.25": { "description": "Includes the correct MIME type for the content", "weight": 1, - "score": 0 + "score": 1 }, "QI.26": { "description": "Includes the correct size for the content", "weight": 1, - "score": 0 + "score": 1 }, "QI.28": { "description": "Absence of Syntactic errors on the links level", "weight": 1, - "score": 0 + "score": 1 } }, "comprehensibility": { "QI.37": { "description": "Existence of at least one exemplary RDF file", "weight": 1, - "score": 0 + "score": 1 }, "QI.39": { "description": "Existence of general information (title, URL, description) for the dataset", "weight": 1, - "score": 0 + "score": 1 }, "QI.40": { "description": "Existence of mailing list, message board or point of contact", "weight": 1, - "score": 0 + "score": 1 } }, "provenance": { "QI.44": { "description": "Existence of metadata that describes its authoritative information", "weight": 1, - "score": 0 + "score": 1 }, "QI.46": { "description": "Usage of versioning", "weight": 1, - "score": 0 + "score": 1 } }, "security": { "QI.43": { "description": "Uses login credentials to restrict access", "weight": 1, - "score": 0 + "score": 1 }, "QI.44": { "description": "Uses SSL or SSH to provide access to the dataset", "weight": 1, - "score": 0 + "score": 1 } } }