From 2ba7f505affdaf0624431dc071c91620ca84aff3 Mon Sep 17 00:00:00 2001 From: Ahmad Assaf Date: Wed, 11 Feb 2015 05:19:00 +0100 Subject: [PATCH] fixing the various bugs in assessing single value quality indicators --- parsers/CKAN/profiler/profilers/profile.js | 4 +- .../quality/measures/completeness.js | 12 +++-- .../profilers/quality/measures/licensing.js | 4 +- .../profilers/quality/measures/provenance.js | 2 +- .../profilers/quality/qualityProfiler.js | 2 +- util/qualityModel.js | 48 +++++++++---------- util/utils.js | 4 +- 7 files changed, 39 insertions(+), 37 deletions(-) diff --git a/parsers/CKAN/profiler/profilers/profile.js b/parsers/CKAN/profiler/profilers/profile.js index 833f672..6570949 100644 --- a/parsers/CKAN/profiler/profilers/profile.js +++ b/parsers/CKAN/profiler/profilers/profile.js @@ -379,7 +379,7 @@ function profile(parent) { if (_.indexOf(excludeList, measureTitle) == -1) { var measureTotal = 0; _.each(qualityMeasure, function(qualityIndicator, indicatorTitle){ - if (qualityIndicator.score > 0 || (size && qualityIndicator.score < size)) + if (qualityIndicator.score < 1 || (size && qualityIndicator.score < size)) qualityIndicators.push({"description" : qualityIndicator.description, "score" : qualityIndicator.score}); measureTotal+= qualityIndicator.score; }); @@ -401,7 +401,7 @@ function profile(parent) { function printQualityIndicators(qualityIndicators) { _.each(qualityIndicators, function(indicator){ - var score = size ? indicator.score / size : indicator.score; + var score = size ? (indicator.score / size) : indicator.score; util.colorify(["green","red"], ["Quality Indicator : " + indicator.description + " : " ,parseFloat( score * 100).toFixed(2)+ "%"]); }); } diff --git a/parsers/CKAN/profiler/profilers/quality/measures/completeness.js b/parsers/CKAN/profiler/profilers/quality/measures/completeness.js index 1974e2d..22af325 100644 --- a/parsers/CKAN/profiler/profilers/quality/measures/completeness.js +++ b/parsers/CKAN/profiler/profilers/quality/measures/completeness.js @@ -55,10 +55,10 @@ function completeness(parent, dataset) { dataSerializations.push(resource.format); // Check if the format contains an exemplary URL if (exemplaryURLS.indexOf(resource.format) > -1) - profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.37", 0); + profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.37", 1); // Check if format contains void or dcat which are dataset descriptions vocabularies [format should be meta/void, meta/dcat] if (resource.format.indexOf("void") > -1 || resource.format.indexOf("dcat") > -1) - profileTemplate.setQualityIndicatorScore("completeness", "QI.4", 0); + profileTemplate.setQualityIndicatorScore("completeness", "QI.4", 1); } } @@ -104,10 +104,10 @@ function completeness(parent, dataset) { // check if there is a resource representing a data dump if ( (_.has(resource, "description") && resource.description) && resource.description.toLowerCase().indexOf("dump") > -1) - profileTemplate.setQualityIndicatorScore("availability", "QI.18", 0); + profileTemplate.setQualityIndicatorScore("availability", "QI.18", 1); // Check if there is a resource representing an API if (_.has(resource, "resource_type") && resource.resource_type && resource.resource_type.indexOf("api") > -1) - profileTemplate.setQualityIndicatorScore("availability", "QI.19", 0); + profileTemplate.setQualityIndicatorScore("availability", "QI.19", 1); // Check if we can extract a size and MIME type from the HTTP Head and check if they match the defined values if (_.has(resource, "size") && response.headers["content-length"]) { @@ -167,8 +167,10 @@ function completeness(parent, dataset) { // This function is executed to check the tags and categorization infomration aftet the dataset URL check function process() { + // The total number of URLs should be the total number of URIs in resources + the dataset URI + var totalResources = num_resources + 1; // set the number of URLs defined - profileTemplate.setQualityIndicatorScore("completeness", "QI.9", (num_resources - URLs) / num_resources); + profileTemplate.setQualityIndicatorScore("completeness", "QI.9", (totalResources - URLs) / totalResources); // Set the number of unreachable URLs in the completenss Score profileTemplate.setQualityIndicatorScore("availability", "QI.20", unreachableURLs / URLs); // Set the number of syntactically valid URLs in the completenss Score diff --git a/parsers/CKAN/profiler/profilers/quality/measures/licensing.js b/parsers/CKAN/profiler/profilers/quality/measures/licensing.js index 7362b7c..e61b856 100644 --- a/parsers/CKAN/profiler/profilers/quality/measures/licensing.js +++ b/parsers/CKAN/profiler/profilers/quality/measures/licensing.js @@ -14,13 +14,13 @@ function licensing(parent, dataset) { var root = dataset.result ? dataset.result : dataset; if (_.has(root, "license_information") && root.license_information) - profileTemplate.setQualityIndicatorScore("licensing", "QI.23", 0); + profileTemplate.setQualityIndicatorScore("licensing", "QI.23", 1); // Check if the dataset has a defined license URL if (_.has(root, "license_url") && root.license_url) { licensing.util.checkAddress(root.license_url, function(error, body, response) { if (!error){ - profileTemplate.setQualityIndicatorScore("licensing", "QI.22", 0); + profileTemplate.setQualityIndicatorScore("licensing", "QI.22", 1); } process(); }, "HEAD"); diff --git a/parsers/CKAN/profiler/profilers/quality/measures/provenance.js b/parsers/CKAN/profiler/profilers/quality/measures/provenance.js index 2f0d1ef..90b1c53 100644 --- a/parsers/CKAN/profiler/profilers/quality/measures/provenance.js +++ b/parsers/CKAN/profiler/profilers/quality/measures/provenance.js @@ -38,7 +38,7 @@ function provenance(parent, dataset) { profileTemplate.setQualityIndicatorScore("provenance", "QI.46", (provQualityCounter / provMetadtaKeys.length)); if (ownershipDetails) - profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.40", 0); + profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.40", 1); // The quality checks have been completed qualityCallback(null, profileTemplate); diff --git a/parsers/CKAN/profiler/profilers/quality/qualityProfiler.js b/parsers/CKAN/profiler/profilers/quality/qualityProfiler.js index 951bc53..8997cad 100644 --- a/parsers/CKAN/profiler/profilers/quality/qualityProfiler.js +++ b/parsers/CKAN/profiler/profilers/quality/qualityProfiler.js @@ -24,7 +24,7 @@ function qualityProfiler(parent) { if (dataset) { // There is a valid dataset metadata file (Q1.1) - profileTemplate.setQualityIndicatorScore("completeness", "QI.1", 0); + profileTemplate.setQualityIndicatorScore("completeness", "QI.1", 1); var root = dataset.result ? dataset.result : dataset; diff --git a/util/qualityModel.js b/util/qualityModel.js index ce1d260..69adc35 100644 --- a/util/qualityModel.js +++ b/util/qualityModel.js @@ -4,136 +4,136 @@ function qualityModel() { "QI.1": { "description": "Existence of supporting structured metadata", "weight": 1, - "score": 1 + "score": 0 }, "QI.2": { "description": "Supports multiple serializations", "weight": 1, - "score": 1 + "score": 0 }, "QI.3": { "description": "Has different data access points", "weight": 1, - "score": 1 + "score": 0 }, "QI.4": { "description": "Uses datasets description vocabularies", "weight": 1, - "score": 1 + "score": 0 }, "QI.5": { "description": "Existence of descriptions about its size", "weight": 1, - "score": 1 + "score": 0 }, "QI.6": { "description": "Existence of descriptions about its structure (MIME Type, Format)", "weight": 1, - "score": 1 + "score": 0 }, "QI.7": { "description": "Existence of descriptions about its organization and categorization", "weight": 1, - "score": 1 + "score": 0 }, "QI.9": { "description": "Existence of dereferencable links for the dataset and its resources", "weight": 1, - "score": 1 + "score": 0 } }, "availability": { "QI.18": { "description": "Existence of an RDF dump that can be downloaded by users", "weight": 1, - "score": 1 + "score": 0 }, "QI.19": { "description": "Existence of queryable endpoints that respond to direct queries", "weight": 1, - "score": 1 + "score": 0 }, "QI.20": { "description": "Existence of valid dereferencable URLs (respond to HTTP request)", "weight": 1, - "score": 1 + "score": 0 } }, "licensing": { "QI.21": { "description": "Existence of human and machine readable license information", "weight": 1, - "score": 1 + "score": 0 }, "QI.22": { "description": "Existence of dereferencable links to the full license information", "weight": 1, - "score": 1 + "score": 0 } }, "freshness": { "QI.24": { "description": "Existence of timestamps that can keep track of its modifications", "weight": 1, - "score": 1 + "score": 0 } }, "correctness": { "QI.25": { "description": "Includes the correct MIME type for the content", "weight": 1, - "score": 1 + "score": 0 }, "QI.26": { "description": "Includes the correct size for the content", "weight": 1, - "score": 1 + "score": 0 }, "QI.28": { "description": "Absence of Syntactic errors on the links level", "weight": 1, - "score": 1 + "score": 0 } }, "comprehensibility": { "QI.37": { "description": "Existence of at least one exemplary RDF file", "weight": 1, - "score": 1 + "score": 0 }, "QI.39": { "description": "Existence of general information (title, URL, description) for the dataset", "weight": 1, - "score": 1 + "score": 0 }, "QI.40": { "description": "Existence of mailing list, message board or point of contact", "weight": 1, - "score": 1 + "score": 0 } }, "provenance": { "QI.44": { "description": "Existence of metadata that describes its authoritative information", "weight": 1, - "score": 1 + "score": 0 }, "QI.46": { "description": "Usage of versioning", "weight": 1, - "score": 1 + "score": 0 } }, "security": { "QI.43": { "description": "Uses login credentials to restrict access", "weight": 1, - "score": 1 + "score": 0 }, "QI.44": { "description": "Uses SSL or SSH to provide access to the dataset", "weight": 1, - "score": 1 + "score": 0 } } } diff --git a/util/utils.js b/util/utils.js index 0b45350..f27a372 100644 --- a/util/utils.js +++ b/util/utils.js @@ -139,7 +139,7 @@ util.prototype.downloadWithProgress = function downloadWithProgress(cache, fileN if (!error) callback(false, file); else { // Initialize the request variable and the holders for data and progress bar - var download = request(url), bar, data = ""; + var download = request({url: url, maxRedirects: 5}), bar, data = ""; // The file is downloaded as "chuncks" that will be grouped together download.on('data', function(chunk) { bar = bar || new progressBar('Downloading... [:bar] :percent :etas', { @@ -185,7 +185,7 @@ util.prototype.download = function download(cache, fileName, url, callback){ cache.getCache(fileName, function(error, file) { if (!error) callback(false, file); else { - request(url, function(error, response, data){ + request({url: url, maxRedirects: 5}, function(error, response, data){ if (!error && response.statusCode == 200) { cache.setCache(fileName, data , function(error, success){ callback(false, JSON.parse(data));