Skip to content

Commit

Permalink
fixing the various bugs in assessing single value quality indicators
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmadassaf committed Feb 11, 2015
1 parent a0a6a28 commit 2ba7f50
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 37 deletions.
4 changes: 2 additions & 2 deletions parsers/CKAN/profiler/profilers/profile.js
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ function profile(parent) {
if (_.indexOf(excludeList, measureTitle) == -1) {
var measureTotal = 0;
_.each(qualityMeasure, function(qualityIndicator, indicatorTitle){
if (qualityIndicator.score > 0 || (size && qualityIndicator.score < size))
if (qualityIndicator.score < 1 || (size && qualityIndicator.score < size))
qualityIndicators.push({"description" : qualityIndicator.description, "score" : qualityIndicator.score});
measureTotal+= qualityIndicator.score;
});
Expand All @@ -401,7 +401,7 @@ function profile(parent) {

function printQualityIndicators(qualityIndicators) {
_.each(qualityIndicators, function(indicator){
var score = size ? indicator.score / size : indicator.score;
var score = size ? (indicator.score / size) : indicator.score;
util.colorify(["green","red"], ["Quality Indicator : " + indicator.description + " : " ,parseFloat( score * 100).toFixed(2)+ "%"]);
});
}
Expand Down
12 changes: 7 additions & 5 deletions parsers/CKAN/profiler/profilers/quality/measures/completeness.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ function completeness(parent, dataset) {
dataSerializations.push(resource.format);
// Check if the format contains an exemplary URL
if (exemplaryURLS.indexOf(resource.format) > -1)
profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.37", 0);
profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.37", 1);
// Check if format contains void or dcat which are dataset descriptions vocabularies [format should be meta/void, meta/dcat]
if (resource.format.indexOf("void") > -1 || resource.format.indexOf("dcat") > -1)
profileTemplate.setQualityIndicatorScore("completeness", "QI.4", 0);
profileTemplate.setQualityIndicatorScore("completeness", "QI.4", 1);
}
}

Expand Down Expand Up @@ -104,10 +104,10 @@ function completeness(parent, dataset) {

// check if there is a resource representing a data dump
if ( (_.has(resource, "description") && resource.description) && resource.description.toLowerCase().indexOf("dump") > -1)
profileTemplate.setQualityIndicatorScore("availability", "QI.18", 0);
profileTemplate.setQualityIndicatorScore("availability", "QI.18", 1);
// Check if there is a resource representing an API
if (_.has(resource, "resource_type") && resource.resource_type && resource.resource_type.indexOf("api") > -1)
profileTemplate.setQualityIndicatorScore("availability", "QI.19", 0);
profileTemplate.setQualityIndicatorScore("availability", "QI.19", 1);

// Check if we can extract a size and MIME type from the HTTP Head and check if they match the defined values
if (_.has(resource, "size") && response.headers["content-length"]) {
Expand Down Expand Up @@ -167,8 +167,10 @@ function completeness(parent, dataset) {

// This function is executed to check the tags and categorization infomration aftet the dataset URL check
function process() {
// The total number of URLs should be the total number of URIs in resources + the dataset URI
var totalResources = num_resources + 1;
// set the number of URLs defined
profileTemplate.setQualityIndicatorScore("completeness", "QI.9", (num_resources - URLs) / num_resources);
profileTemplate.setQualityIndicatorScore("completeness", "QI.9", (totalResources - URLs) / totalResources);
// Set the number of unreachable URLs in the completenss Score
profileTemplate.setQualityIndicatorScore("availability", "QI.20", unreachableURLs / URLs);
// Set the number of syntactically valid URLs in the completenss Score
Expand Down
4 changes: 2 additions & 2 deletions parsers/CKAN/profiler/profilers/quality/measures/licensing.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ function licensing(parent, dataset) {
var root = dataset.result ? dataset.result : dataset;

if (_.has(root, "license_information") && root.license_information)
profileTemplate.setQualityIndicatorScore("licensing", "QI.23", 0);
profileTemplate.setQualityIndicatorScore("licensing", "QI.23", 1);

// Check if the dataset has a defined license URL
if (_.has(root, "license_url") && root.license_url) {
licensing.util.checkAddress(root.license_url, function(error, body, response) {
if (!error){
profileTemplate.setQualityIndicatorScore("licensing", "QI.22", 0);
profileTemplate.setQualityIndicatorScore("licensing", "QI.22", 1);
}
process();
}, "HEAD");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ function provenance(parent, dataset) {
profileTemplate.setQualityIndicatorScore("provenance", "QI.46", (provQualityCounter / provMetadtaKeys.length));

if (ownershipDetails)
profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.40", 0);
profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.40", 1);

// The quality checks have been completed
qualityCallback(null, profileTemplate);
Expand Down
2 changes: 1 addition & 1 deletion parsers/CKAN/profiler/profilers/quality/qualityProfiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function qualityProfiler(parent) {
if (dataset) {

// There is a valid dataset metadata file (Q1.1)
profileTemplate.setQualityIndicatorScore("completeness", "QI.1", 0);
profileTemplate.setQualityIndicatorScore("completeness", "QI.1", 1);

var root = dataset.result ? dataset.result : dataset;

Expand Down
48 changes: 24 additions & 24 deletions util/qualityModel.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,136 +4,136 @@ function qualityModel() {
"QI.1": {
"description": "Existence of supporting structured metadata",
"weight": 1,
"score": 1
"score": 0
},
"QI.2": {
"description": "Supports multiple serializations",
"weight": 1,
"score": 1
"score": 0
},
"QI.3": {
"description": "Has different data access points",
"weight": 1,
"score": 1
"score": 0
},
"QI.4": {
"description": "Uses datasets description vocabularies",
"weight": 1,
"score": 1
"score": 0
},
"QI.5": {
"description": "Existence of descriptions about its size",
"weight": 1,
"score": 1
"score": 0
},
"QI.6": {
"description": "Existence of descriptions about its structure (MIME Type, Format)",
"weight": 1,
"score": 1
"score": 0
},
"QI.7": {
"description": "Existence of descriptions about its organization and categorization",
"weight": 1,
"score": 1
"score": 0
},
"QI.9": {
"description": "Existence of dereferencable links for the dataset and its resources",
"weight": 1,
"score": 1
"score": 0
}
},
"availability": {
"QI.18": {
"description": "Existence of an RDF dump that can be downloaded by users",
"weight": 1,
"score": 1
"score": 0
},
"QI.19": {
"description": "Existence of queryable endpoints that respond to direct queries",
"weight": 1,
"score": 1
"score": 0
},
"QI.20": {
"description": "Existence of valid dereferencable URLs (respond to HTTP request)",
"weight": 1,
"score": 1
"score": 0
}
},
"licensing": {
"QI.21": {
"description": "Existence of human and machine readable license information",
"weight": 1,
"score": 1
"score": 0
},
"QI.22": {
"description": "Existence of dereferencable links to the full license information",
"weight": 1,
"score": 1
"score": 0
}
},
"freshness": {
"QI.24": {
"description": "Existence of timestamps that can keep track of its modifications",
"weight": 1,
"score": 1
"score": 0
}
},
"correctness": {
"QI.25": {
"description": "Includes the correct MIME type for the content",
"weight": 1,
"score": 1
"score": 0
},
"QI.26": {
"description": "Includes the correct size for the content",
"weight": 1,
"score": 1
"score": 0
},
"QI.28": {
"description": "Absence of Syntactic errors on the links level",
"weight": 1,
"score": 1
"score": 0
}
},
"comprehensibility": {
"QI.37": {
"description": "Existence of at least one exemplary RDF file",
"weight": 1,
"score": 1
"score": 0
},
"QI.39": {
"description": "Existence of general information (title, URL, description) for the dataset",
"weight": 1,
"score": 1
"score": 0
},
"QI.40": {
"description": "Existence of mailing list, message board or point of contact",
"weight": 1,
"score": 1
"score": 0
}
},
"provenance": {
"QI.44": {
"description": "Existence of metadata that describes its authoritative information",
"weight": 1,
"score": 1
"score": 0
},
"QI.46": {
"description": "Usage of versioning",
"weight": 1,
"score": 1
"score": 0
}
},
"security": {
"QI.43": {
"description": "Uses login credentials to restrict access",
"weight": 1,
"score": 1
"score": 0
},
"QI.44": {
"description": "Uses SSL or SSH to provide access to the dataset",
"weight": 1,
"score": 1
"score": 0
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions util/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ util.prototype.downloadWithProgress = function downloadWithProgress(cache, fileN
if (!error) callback(false, file);
else {
// Initialize the request variable and the holders for data and progress bar
var download = request(url), bar, data = "";
var download = request({url: url, maxRedirects: 5}), bar, data = "";
// The file is downloaded as "chuncks" that will be grouped together
download.on('data', function(chunk) {
bar = bar || new progressBar('Downloading... [:bar] :percent :etas', {
Expand Down Expand Up @@ -185,7 +185,7 @@ util.prototype.download = function download(cache, fileName, url, callback){
cache.getCache(fileName, function(error, file) {
if (!error) callback(false, file);
else {
request(url, function(error, response, data){
request({url: url, maxRedirects: 5}, function(error, response, data){
if (!error && response.statusCode == 200) {
cache.setCache(fileName, data , function(error, success){
callback(false, JSON.parse(data));
Expand Down

0 comments on commit 2ba7f50

Please sign in to comment.