Skip to content

Commit

Permalink
fixing the completeness quality calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmadassaf committed Feb 10, 2015
1 parent 7b90fe7 commit ffa9c73
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 57 deletions.
48 changes: 18 additions & 30 deletions parsers/CKAN/profiler/profilers/quality/measures/completeness.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function completeness(parent, dataset) {
*
*/

checkMetaField("url", root, URLs);
if (_.has(root, "url") && root["url"]) URLs++

// Do the async loop on the resources and do the necessary checks
completeness.async.eachSeries(root.resources,function(resource, asyncCallback){
Expand All @@ -55,10 +55,10 @@ function completeness(parent, dataset) {
dataSerializations.push(resource.format);
// Check if the format contains an exemplary URL
if (exemplaryURLS.indexOf(resource.format) > -1)
profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.37", 1);
profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.37", 0);
// Check if format contains void or dcat which are dataset descriptions vocabularies [format should be meta/void, meta/dcat]
if (resource.format.indexOf("void") > -1 || resource.format.indexOf("dcat") > -1)
profileTemplate.setQualityIndicatorScore("completeness", "QI.4", 1);
profileTemplate.setQualityIndicatorScore("completeness", "QI.4", 0);
}
}

Expand Down Expand Up @@ -87,8 +87,8 @@ function completeness(parent, dataset) {
unreachableURLs++;
if (!completeness.util.validator.isURL(resource.url)) inCorrectURLs++;

checkMetaField("size", resource, sizeInformation);
checkMetaField("mimetype", resource, MIMEInformation);
if (_.has(resource, "size") && resource["size"]) sizeInformation++;
if (_.has(resource, "mimetype") && resource["mimetype"]) MIMEInformation++
// Signal the async callback to switch to the next async.series
asyncCallback();

Expand All @@ -99,15 +99,15 @@ function completeness(parent, dataset) {
* The check we need to do now is related to completeness and availability since the URL is available
*/

checkMetaField("size", resource, sizeInformation);
checkMetaField("mimetype", resource, MIMEInformation);
if (_.has(resource, "size") && resource["size"]) sizeInformation++;
if (_.has(resource, "mimetype") && resource["mimetype"]) MIMEInformation++

// check if there is a resource representing a data dump
if ( (_.has(resource, "description") && resource.description) && resource.description.toLowerCase().indexOf("dump") > -1)
profileTemplate.setQualityIndicatorScore("availability", "QI.18", 1);
profileTemplate.setQualityIndicatorScore("availability", "QI.18", 0);
// Check if there is a resource representing an API
if (_.has(resource, "resource_type") && resource.resource_type && resource.resource_type.indexOf("api") > -1)
profileTemplate.setQualityIndicatorScore("availability", "QI.19", 1);
profileTemplate.setQualityIndicatorScore("availability", "QI.19", 0);

// Check if we can extract a size and MIME type from the HTTP Head and check if they match the defined values
if (_.has(resource, "size") && response.headers["content-length"]) {
Expand Down Expand Up @@ -144,16 +144,15 @@ function completeness(parent, dataset) {
var serializationsNumber = _.intersection(serializations, _.unique(dataSerializations)).length;

if (accessPointsNumber < accessPoints.length) {
profileTemplate.setQualityIndicatorScore("completeness", "QI.3", (accessPoints.length - accessPointsNumber) / accessPoints.length);
profileTemplate.setQualityIndicatorScore("completeness", "QI.3", accessPointsNumber / accessPoints.length);
}
if (serializationsNumber < serializations.length) {
profileTemplate.setQualityIndicatorScore("completeness", "QI.2", (serializations.length - serializationsNumber) / serializations.length);
profileTemplate.setQualityIndicatorScore("completeness", "QI.2", serializationsNumber / serializations.length);
}

profileTemplate.setQualityIndicatorScore("completeness", "QI.5", (num_resources - sizeInformation) / num_resources);
profileTemplate.setQualityIndicatorScore("completeness", "QI.6", (num_resources - MIMEInformation) / num_resources);
profileTemplate.setQualityIndicatorScore("correctness", "QI.25", ((num_resources - MIMEInformation) - inCorrectMIME) / num_resources);
profileTemplate.setQualityIndicatorScore("correctness", "QI.26", ((num_resources - sizeInformation) - inCorrectSize) / num_resources);
profileTemplate.setQualityIndicatorScore("correctness", "QI.25", inCorrectMIME / num_resources);
profileTemplate.setQualityIndicatorScore("correctness", "QI.26", inCorrectSize / num_resources);


if (_.has(root, "url")) {
Expand All @@ -169,11 +168,11 @@ function completeness(parent, dataset) {
// This function is executed to check the tags and categorization infomration aftet the dataset URL check
function process() {
// set the number of URLs defined
profileTemplate.setQualityIndicatorScore("completeness", "QI.9", URLs / num_resources);
profileTemplate.setQualityIndicatorScore("completeness", "QI.9", (num_resources - URLs) / num_resources);
// Set the number of unreachable URLs in the completenss Score
profileTemplate.setQualityIndicatorScore("availability", "QI.20", (URLs - unreachableURLs) / URLs);
profileTemplate.setQualityIndicatorScore("availability", "QI.20", unreachableURLs / URLs);
// Set the number of syntactically valid URLs in the completenss Score
profileTemplate.setQualityIndicatorScore("correctness", "QI.28", (URLs - inCorrectURLs) / URLs);
profileTemplate.setQualityIndicatorScore("correctness", "QI.28", inCorrectURLs / URLs);
// Call the series of validation checks i want to run on the dataset
completeness.async.series([checkTags, checkGroup], function(err){
profileTemplate.setQualityIndicatorScore("completeness", "QI.7", (groupsErrors + tagsErrors) / 2);
Expand All @@ -194,7 +193,7 @@ function completeness(parent, dataset) {
});

var totalTagFields = tagsKeys.length * num_tags;
tagsErrors = ((totalTagFields - tagsError) / totalTagFields);
tagsErrors = (tagsError / totalTagFields);
callback();
} else callback();
}
Expand All @@ -213,7 +212,7 @@ function completeness(parent, dataset) {
},function(err){

var totalGroupFields = groupsKeys.length * num_groups;
groupsErrors = ((totalGroupFields - groupError) / totalGroupFields);
groupsErrors = (groupError / totalGroupFields);

callback();
});
Expand All @@ -226,17 +225,6 @@ function completeness(parent, dataset) {
// The quality checks have been completed
qualityCallback(null, profileTemplate);
}

/* This function will check the existence of a field in a JSON section
* The checks will update a value that is passed ot the function
*/
function checkMetaField(field, section, value) {
if (_.has(section, field)) {
if (_.isUndefined(section[field]) || _.isNull(section[field]) || ( _.isString(section[field]) && section[field].length == 0)) {
value++;
} else value++;
}
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ function licensing(parent, dataset) {
if (_.has(root, "license_url") && root.license_url) {
licensing.util.checkAddress(root.license_url, function(error, body, response) {
if (!error){
profileTemplate.setQualityIndicatorScore("licensing", "QI.22", 1);
profileTemplate.setQualityIndicatorScore("licensing", "QI.22", 0);
}
process();
}, "HEAD");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ function provenance(parent, dataset) {
profileTemplate.setQualityIndicatorScore("provenance", "QI.46", (provQualityCounter / provMetadtaKeys.length));

if (ownershipDetails)
profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.40", 1);
profileTemplate.setQualityIndicatorScore("comprehensibility", "QI.40", 0);

// The quality checks have been completed
qualityCallback(null, profileTemplate);
Expand Down
2 changes: 1 addition & 1 deletion parsers/CKAN/profiler/profilers/quality/qualityProfiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function qualityProfiler(parent) {
if (dataset) {

// There is a valid dataset metadata file (Q1.1)
profileTemplate.setQualityIndicatorScore("completeness", "QI.1", 1);
profileTemplate.setQualityIndicatorScore("completeness", "QI.1", 0);

var root = dataset.result ? dataset.result : dataset;

Expand Down
48 changes: 24 additions & 24 deletions util/qualityModel.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,136 +4,136 @@ function qualityModel() {
"QI.1": {
"description": "Existence of supporting structured metadata",
"weight": 1,
"score": 0
"score": 1
},
"QI.2": {
"description": "Supports multiple serializations",
"weight": 1,
"score": 0
"score": 1
},
"QI.3": {
"description": "Has different data access points",
"weight": 1,
"score": 0
"score": 1
},
"QI.4": {
"description": "Uses datasets description vocabularies",
"weight": 1,
"score": 0
"score": 1
},
"QI.5": {
"description": "Existence of descriptions about its size",
"weight": 1,
"score": 0
"score": 1
},
"QI.6": {
"description": "Existence of descriptions about its structure (MIME Type, Format)",
"weight": 1,
"score": 0
"score": 1
},
"QI.7": {
"description": "Existence of descriptions about its organization and categorization",
"weight": 1,
"score": 0
"score": 1
},
"QI.9": {
"description": "Existence of dereferencable links for the dataset and its resources",
"weight": 1,
"score": 0
"score": 1
}
},
"availability": {
"QI.18": {
"description": "Existence of an RDF dump that can be downloaded by users",
"weight": 1,
"score": 0
"score": 1
},
"QI.19": {
"description": "Existence of queryable endpoints that respond to direct queries",
"weight": 1,
"score": 0
"score": 1
},
"QI.20": {
"description": "Existence of valid dereferencable URLs (respond to HTTP request)",
"weight": 1,
"score": 0
"score": 1
}
},
"licensing": {
"QI.21": {
"description": "Existence of human and machine readable license information",
"weight": 1,
"score": 0
"score": 1
},
"QI.22": {
"description": "Existence of dereferencable links to the full license information",
"weight": 1,
"score": 0
"score": 1
}
},
"freshness": {
"QI.24": {
"description": "Existence of timestamps that can keep track of its modifications",
"weight": 1,
"score": 0
"score": 1
}
},
"correctness": {
"QI.25": {
"description": "Includes the correct MIME type for the content",
"weight": 1,
"score": 0
"score": 1
},
"QI.26": {
"description": "Includes the correct size for the content",
"weight": 1,
"score": 0
"score": 1
},
"QI.28": {
"description": "Absence of Syntactic errors on the links level",
"weight": 1,
"score": 0
"score": 1
}
},
"comprehensibility": {
"QI.37": {
"description": "Existence of at least one exemplary RDF file",
"weight": 1,
"score": 0
"score": 1
},
"QI.39": {
"description": "Existence of general information (title, URL, description) for the dataset",
"weight": 1,
"score": 0
"score": 1
},
"QI.40": {
"description": "Existence of mailing list, message board or point of contact",
"weight": 1,
"score": 0
"score": 1
}
},
"provenance": {
"QI.44": {
"description": "Existence of metadata that describes its authoritative information",
"weight": 1,
"score": 0
"score": 1
},
"QI.46": {
"description": "Usage of versioning",
"weight": 1,
"score": 0
"score": 1
}
},
"security": {
"QI.43": {
"description": "Uses login credentials to restrict access",
"weight": 1,
"score": 0
"score": 1
},
"QI.44": {
"description": "Uses SSL or SSH to provide access to the dataset",
"weight": 1,
"score": 0
"score": 1
}
}
}
Expand Down

0 comments on commit ffa9c73

Please sign in to comment.