From ccc3fcb27df9208be3463e4e8c9489ead8fad67f Mon Sep 17 00:00:00 2001 From: zunhoho <wangzun666@gmail.com> Date: Sun, 16 Jan 2022 22:32:19 +0100 Subject: [PATCH 1/3] add new OrdinatesLiteralGenerator --- .../literals/OrdinatesLiteralGenerator.java | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 src/main/java/org/aksw/simba/lemming/mimicgraph/literals/OrdinatesLiteralGenerator.java diff --git a/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/OrdinatesLiteralGenerator.java b/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/OrdinatesLiteralGenerator.java new file mode 100644 index 00000000..ffb9dfb5 --- /dev/null +++ b/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/OrdinatesLiteralGenerator.java @@ -0,0 +1,122 @@ +package org.aksw.simba.lemming.mimicgraph.literals; + +import com.carrotsearch.hppc.BitSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +public class OrdinatesLiteralGenerator extends AbstractLiteralGenerator implements ILiteralGenerator{ + + private static final Logger LOGGER = LoggerFactory.getLogger(OrdinatesLiteralGenerator.class); + + /** + * 1.key: date typed edge colour 2.key: literal tail vertex colour value: an double array of size 2, + * first value is min value of first ordinate, second value is min value of second ordinate. + */ + private Map<BitSet, Map<BitSet, double[]>> mapOfMinValues; + + /** + * 1.key: date typed edge colour 2.key: literal tail vertex colour value: an double array of size 2, + * first value is max value of first ordinate, second value is max value of second ordinate. + */ + private Map<BitSet, Map<BitSet, double[]>> mapOfMaxValues; + + public OrdinatesLiteralGenerator(Map<BitSet, Map<BitSet, Set<String>>> sampleData) { + super(sampleData); + mapOfMinValues = new HashMap<>(); + mapOfMaxValues = new HashMap<>(); + computeDataRange(); + } + + private void computeDataRange(){ + LOGGER.info("Start - computation of range for literals with type ordinates"); + Set<BitSet> dteColours = mBaseData.keySet(); + for(BitSet dteColour : dteColours){ + Map<BitSet, Set<String>> mapTColour2Literals = mBaseData.get(dteColour); + if(mapTColour2Literals != null && !mapTColour2Literals.isEmpty()){ + Map<BitSet, double[]> mapTColour2Mins = new HashMap<>(); + Map<BitSet, double[]> mapTColour2Maxes = new HashMap<>(); + + Set<BitSet> tColours = mapTColour2Literals.keySet(); + for(BitSet tColour : tColours){ + Set<String> literals = mapTColour2Literals.get(tColour); + if(literals != null && !literals.isEmpty()){ + double[] mins = new double[]{Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY}; + double[] maxes = new double[]{Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY}; + for(String literal : literals){ + double[] ordinates = getOrdinatesValues(literal); + mins[0] = Double.min(mins[0], ordinates[0]); + mins[1] = Double.min(mins[1], ordinates[1]); + maxes[0] = Double.max(maxes[0], ordinates[0]); + maxes[1] = Double.max(maxes[1], ordinates[1]); + } + mapTColour2Mins.put(tColour, mins); + mapTColour2Maxes.put(tColour, maxes); + } + } + mapOfMinValues.put(dteColour, mapTColour2Mins); + mapOfMaxValues.put(dteColour, mapTColour2Maxes); + } + } + LOGGER.info("End - computation of range for literals with type ordinates"); + } + + /** + * The method is used to change a literal with type ordinates into a double array. + */ + private double[] getOrdinatesValues(String literal){ + double[] values = new double[2]; + String[] words = literal.split(" "); + int i= 0; + for(String word : words ){ + if(!word.isEmpty()){ + try{ + double value = Double.parseDouble(word); + values[i] = value; + i++; + }catch (Exception e){ + LOGGER.error("The given ordinates cannot be parsed!"); + return new double[]{0.0, 0.0}; + } + } + } + return values; + } + + + /** + * Get an ordinates for the given data typed edge colour and tail colour. + * Note: the numberOfValues plays no role in this method, because we always generate 2 doubles to form a literal + */ + @Override + public String getValue(BitSet tColo, BitSet dteColo, int numberOfValues){ + String literal = ""; + if(tColo != null && dteColo != null){ + Map<BitSet, double[]> mapTColour2Mins = mapOfMinValues.get(dteColo); + Map<BitSet, double[]> mapTColour2Maxes = mapOfMaxValues.get(dteColo); + if(mapTColour2Mins != null && !mapTColour2Mins.isEmpty() && + mapTColour2Maxes != null && !mapTColour2Maxes.isEmpty()){ + double[] mins = mapTColour2Mins.get(tColo); + double[] maxes = mapTColour2Maxes.get(tColo); + if(mins != null && maxes !=null){ + double ordinate1 = mins[0] + mRand.nextDouble()*(maxes[0] - mins[0]); + double ordinate2 = mins[1] + mRand.nextDouble()*(maxes[1] - mins[1]); + String string1 = String.format("%.4f", ordinate1); + String string2 = String.format("%.4f", ordinate2); + literal = string1 + " " + string2; + } + }else{ + LOGGER.error("Cannot generate valid ordinates literal!"); + return "0.0 0.0"; + } + }else { + LOGGER.error("Cannot generate valid ordinates literal!"); + return "0.0 0.0"; + } + return literal; + } +} + From 89986d26eda06156447cdf2dd255ae02bf0581d2 Mon Sep 17 00:00:00 2001 From: zunhoho <wangzun666@gmail.com> Date: Sun, 16 Jan 2022 22:35:35 +0100 Subject: [PATCH 2/3] add new generator into LiteralAnalysis and LiteralGenerator --- .../mimicgraph/literals/LiteralAnalysis.java | 188 +++++++++--------- .../literals/RDFLiteralGenertor.java | 34 ++-- 2 files changed, 115 insertions(+), 107 deletions(-) diff --git a/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/LiteralAnalysis.java b/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/LiteralAnalysis.java index dfc58d4c..e36ff396 100644 --- a/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/LiteralAnalysis.java +++ b/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/LiteralAnalysis.java @@ -18,35 +18,35 @@ * LiteralAnalysis takes responsibility to collect data type of literals, * collect potential content of each literal, and perform analysis for some * statistic metrics such as literal distribution. - * + * * @author nptsy */ public class LiteralAnalysis { - /* + /* * map of literals associated with the datatype edge colour and tail colours - * 1st key: dteColo, 2nd key: tColo and value is a set of literals + * 1st key: dteColo, 2nd key: tColo and value is a set of literals */ - + private Map<BitSet, Map<BitSet, Set<String>>> mValuesOfEachDTEColour; - + /* - * a map of average length of literals for each dteColo associated with tColo + * a map of average length of literals for each dteColo associated with tColo * The length of a literal is the number of words the literal may have - * + * * 1st key is the tail colour, 2nd key is the dteColo and value is the average length */ private Map<BitSet, ObjectDoubleOpenHashMap<BitSet>> mAvrgNoOfWordsPerDTEdgeColour; - + /* * map of literal and its potential type. * One problem is: a literal may have more than one type. */ private Map<BitSet, String> mTypesOfDTEColours; - + /** * Constructor - * + * * @param origGrphs an array of the original RDF data graphs */ public LiteralAnalysis(ColouredGraph[] origGrphs){ @@ -54,10 +54,10 @@ public LiteralAnalysis(ColouredGraph[] origGrphs){ mValuesOfEachDTEColour = new HashMap<BitSet, Map<BitSet, Set<String>>>(); mAvrgNoOfWordsPerDTEdgeColour = new HashMap<BitSet, ObjectDoubleOpenHashMap<BitSet>>(); mTypesOfDTEColours = new HashMap<BitSet, String>(); - + analyze(origGrphs); } - + /** * get map of type of datatype properties * the key is the type of properties, value is a set of properties having the same type @@ -69,30 +69,30 @@ public Map<String, Set<BitSet>> getMapOfTypesAndDTEColours(){ Set<BitSet> setOfDTEColours = mTypesOfDTEColours.keySet(); for(BitSet dteColo : setOfDTEColours){ String type = mTypesOfDTEColours.get(dteColo); - - + + Set<BitSet> setOfTmpDTEColours = mapOfTypesAndDTEColours.get(type); if(setOfTmpDTEColours == null){ setOfTmpDTEColours = new HashSet<BitSet>(); mapOfTypesAndDTEColours.put(type, setOfTmpDTEColours); } - + setOfTmpDTEColours.add(dteColo); } } return mapOfTypesAndDTEColours; } - + /** * get map of literals based on the colour of datatype edges and of vertices - * + * * @param setOfDTEColours set of datatype edge's colours - * + * * @return a map containing literal of specific datatype edge colour for specific vertex colour */ public Map<BitSet, Map<BitSet, Set<String>>> getMapOfDTEColoursAndValues(Set<BitSet> setOfDTEColours){ if(setOfDTEColours!=null && setOfDTEColours.size() > 0){ - + Map<BitSet, Map<BitSet, Set<String>>> mapSampleData = new HashMap<BitSet, Map<BitSet, Set<String>>>(); for(BitSet dteColo: setOfDTEColours){ Map<BitSet, Set<String>> mapOfVColoAndSetValues = mValuesOfEachDTEColour.get(dteColo); @@ -104,34 +104,34 @@ public Map<BitSet, Map<BitSet, Set<String>>> getMapOfDTEColoursAndValues(Set<Bit } return null; } - + /** - * analyze and collect literals in the original RDF data graph + * analyze and collect literals in the original RDF data graph * @param origGrphs */ private void analyze(ColouredGraph [] origGrphs){ - + Map<BitSet, ObjectIntOpenHashMap<BitSet>> mapAppearTimesOfDTEColoursOverTColo = new HashMap<BitSet, ObjectIntOpenHashMap<BitSet>>(); - + //------------------------------------------------------ //collect type of literal first then collect literal //------------------------------------------------------ - - + + //Collect type of literals for(ColouredGraph grph: origGrphs){ // map of datatype edge colours to tail colours with their literals Map<BitSet, Map<BitSet, Set<String>>> mapDTEdgeColoursToLiterals = grph.getMapLiterals(); - + // set of datatype edge colours Set<BitSet> setOfDTEColours = mapDTEdgeColoursToLiterals.keySet(); - + for(BitSet dteColo : setOfDTEColours){ // type of literals associated to this datatype edge colour String type = grph.getLiteralType(dteColo); String orginalTypes = mTypesOfDTEColours.get(dteColo); - - // mapping of original type and new type are here + + // mapping of original type and new type are here if(orginalTypes == null || orginalTypes.isEmpty()){ mTypesOfDTEColours.put(dteColo, type); orginalTypes = type; @@ -143,17 +143,17 @@ private void analyze(ColouredGraph [] origGrphs){ } } } - + //collect values of literals for(ColouredGraph grph : origGrphs){ // map of datatype edge colours to tail colours with their literals Map<BitSet, Map<BitSet, Set<String>>> mapDTEdgeColoursToLiterals = grph.getMapLiterals(); - + // set of datatype edge colours Set<BitSet> setOfDTEColours = mapDTEdgeColoursToLiterals.keySet(); - + for(BitSet dteColo : setOfDTEColours){ - + // map of existing literals associated with tail colours Map<BitSet, Set<String>> origMapOfTColoAndLiterals = mValuesOfEachDTEColour.get(dteColo); if(origMapOfTColoAndLiterals == null){ @@ -163,47 +163,51 @@ private void analyze(ColouredGraph [] origGrphs){ //get type of the literal String orginalTypes = mTypesOfDTEColours.get(dteColo); - + // map of literals associated with tail colours Map<BitSet, Set<String>> mapOfTColoAndLiterals = mapDTEdgeColoursToLiterals.get(dteColo); - + if(mapOfTColoAndLiterals != null && mapOfTColoAndLiterals.size() > 0){ - + Set<BitSet> setOfTColours = mapOfTColoAndLiterals.keySet(); for(BitSet tColo : setOfTColours){ - + Set<String> setOfExistingLiterals = origMapOfTColoAndLiterals.get(tColo); if(setOfExistingLiterals == null){ setOfExistingLiterals = new HashSet<String>(); origMapOfTColoAndLiterals.put(tColo, setOfExistingLiterals); } - + Set<String> setOfLiterals= mapOfTColoAndLiterals.get(tColo); - + int totalNoOfWords = 0; - + for(String literal: setOfLiterals){ - + if(literal.isEmpty()) continue; - + // remove postfix of literal if any literal = normalizeLiterals(literal); - - String[] arrWords = literal.split(" "); - - for(String word: arrWords){ - - if(orginalTypes.contains("XMLSchema#string")){ - word = normalizeWords(word); - } - - if(!word.isEmpty()){ - totalNoOfWords ++; - setOfExistingLiterals.add(word); - } - } - } + + //if literal is in type ordinates + if(orginalTypes.equals("http://def.seegrid.csiro.au/isotc211/iso19103/2005/basic#ordinates")){ + totalNoOfWords = totalNoOfWords +2; + setOfExistingLiterals.add(literal); + }else { + String[] arrWords = literal.split(" "); + for(String word: arrWords){ + if(orginalTypes.contains("XMLSchema#string")){ + word = normalizeWords(word); + } + + if(!word.isEmpty()){ + totalNoOfWords ++; + setOfExistingLiterals.add(word); + } + } + } + } //update the average words of a literal double avrgNoOfWordsPerLiteral = totalNoOfWords/setOfLiterals.size(); ObjectDoubleOpenHashMap<BitSet> mapAvrgLengthOfLiterals = mAvrgNoOfWordsPerDTEdgeColour.get(tColo); @@ -212,42 +216,42 @@ private void analyze(ColouredGraph [] origGrphs){ mAvrgNoOfWordsPerDTEdgeColour.put(tColo, mapAvrgLengthOfLiterals); } mapAvrgLengthOfLiterals.putOrAdd(dteColo, avrgNoOfWordsPerLiteral, avrgNoOfWordsPerLiteral); - - + + ObjectIntOpenHashMap<BitSet> mapAppearTimes = mapAppearTimesOfDTEColoursOverTColo.get(tColo); if(mapAppearTimes == null ){ mapAppearTimes = new ObjectIntOpenHashMap<BitSet>(); mapAppearTimesOfDTEColoursOverTColo.put(tColo, mapAppearTimes); } - + mapAppearTimes.putOrAdd(dteColo, 1, 1); } } } } - + /* * compute average words of a literal associated with a specific data * typed property over all graphs */ - + Set<BitSet> setOfTColours = mapAppearTimesOfDTEColoursOverTColo.keySet(); for(BitSet tColo: setOfTColours){ ObjectIntOpenHashMap<BitSet> mapAppearTimeOfEachDTEColo = mapAppearTimesOfDTEColoursOverTColo.get(tColo); ObjectDoubleOpenHashMap<BitSet> mapAvrgLengthOfLiterals = mAvrgNoOfWordsPerDTEdgeColour.get(tColo); - - if(mapAppearTimeOfEachDTEColo!= null && mapAppearTimeOfEachDTEColo.size() > 0 + + if(mapAppearTimeOfEachDTEColo!= null && mapAppearTimeOfEachDTEColo.size() > 0 && mapAvrgLengthOfLiterals!= null && mapAvrgLengthOfLiterals.size() > 0){ - + Object[] arrDTEColours = mapAppearTimeOfEachDTEColo.keys; for(int i = 0 ; i< arrDTEColours.length ; i++){ if(mapAppearTimeOfEachDTEColo.allocated[i]){ BitSet dteColo = (BitSet)arrDTEColours[i]; - + int noOfAppearTimes = mapAppearTimeOfEachDTEColo.get(dteColo); - + double avrgWordsPerLiteral = mapAvrgLengthOfLiterals.get(dteColo); - + if(noOfAppearTimes != 0) mapAvrgLengthOfLiterals.put(dteColo, avrgWordsPerLiteral/noOfAppearTimes); else @@ -257,11 +261,11 @@ private void analyze(ColouredGraph [] origGrphs){ } } } - + public String getDataTypes(BitSet dteColo){ - return mTypesOfDTEColours.get(dteColo); + return mTypesOfDTEColours.get(dteColo); } - + /** * get a set of words associated to the data typed edge's colour * @param dteColo @@ -276,11 +280,11 @@ public Set<String> getSetOfValues(BitSet tColo, BitSet dteColo){ } return res; } - + /** * get the average number of words that a data typed edge's can hae * @param dteColo the data typed edge's colour - * + * * @return the average number of words */ public double getAvrgNoOfWords(BitSet tColo, BitSet dteColo){ @@ -291,23 +295,23 @@ public double getAvrgNoOfWords(BitSet tColo, BitSet dteColo){ } return 0; } - + private String normalizeLiterals(String originalLiteral){ - + if(originalLiteral.contains("^^")){ int endPos = originalLiteral.indexOf("^^"); - originalLiteral = originalLiteral.substring(0, endPos); + originalLiteral = originalLiteral.substring(0, endPos); } - + if(originalLiteral.startsWith("\"") && originalLiteral.contains("\"@")){ int endPos = originalLiteral.indexOf("\"@"); originalLiteral = originalLiteral.substring(0, endPos); - originalLiteral = originalLiteral.substring(1); + originalLiteral = originalLiteral.substring(1); } - + return originalLiteral; } - + /** * this function is only applied for string type values * @param word @@ -315,33 +319,33 @@ private String normalizeLiterals(String originalLiteral){ */ private String normalizeWords(String word){ Pattern special = Pattern.compile ("[!@#$%&*()_+=|<>?{}\\[\\]~-]"); - + word = word.trim(); - + //remove puntuation chars word = word.replaceAll("\\p{P}", ""); - + //remove new line chars word = word.replace("\n", "").replace("\r", ""); - + word = word.replace("\t", ""); - + //remove special chars in prefix - //word = word.replaceFirst("[^A-Za-z0-9]", ""); - + //word = word.replaceFirst("[^A-Za-z0-9]", ""); + //remove special chars in postfix //String reversedWord = new StringBuffer(word).reverse().toString(); - //reversedWord = reversedWord.replaceFirst("[^A-Za-z0-9]", ""); - //word = new StringBuffer(reversedWord).reverse().toString(); + //reversedWord = reversedWord.replaceFirst("[^A-Za-z0-9]", ""); + //word = new StringBuffer(reversedWord).reverse().toString(); Matcher hasSpecial = special.matcher(word); if(word.contains("-")) { System.err.println(""); } if(hasSpecial.find()){ - if(word.length() == 1){ - return ""; - } + if(word.length() == 1){ + return ""; + } } return word; } diff --git a/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/RDFLiteralGenertor.java b/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/RDFLiteralGenertor.java index 9b2f57ec..ba10c07c 100644 --- a/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/RDFLiteralGenertor.java +++ b/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/RDFLiteralGenertor.java @@ -9,37 +9,41 @@ import com.carrotsearch.hppc.BitSet; public class RDFLiteralGenertor { - + private LiteralAnalysis mLiteralAnalysis; private Map<String , ILiteralGenerator> mMapOfDataTypesAndGenerators; - + public RDFLiteralGenertor(ColouredGraph[] origGrphs){ // literal collection mLiteralAnalysis = new LiteralAnalysis(origGrphs); - mMapOfDataTypesAndGenerators = new HashMap<String, ILiteralGenerator>(); - + mMapOfDataTypesAndGenerators = new HashMap<>(); + initializeGenerators(); } - + private void initializeGenerators(){ if(mLiteralAnalysis != null){ - + Map<String, Set<BitSet>> mapOfTypesAndDTEColo = mLiteralAnalysis.getMapOfTypesAndDTEColours(); if(mapOfTypesAndDTEColo!= null && mapOfTypesAndDTEColo.size()> 0 ){ Set<String> setOfTypes = mapOfTypesAndDTEColo.keySet(); - + for(String dataType : setOfTypes){ Set<BitSet> setOfDTEColours = mapOfTypesAndDTEColo.get(dataType); // get sample data according to the types and dteColours; Map<BitSet, Map<BitSet, Set<String>>> mapOfDTEColoAndVColoValues = mLiteralAnalysis.getMapOfDTEColoursAndValues(setOfDTEColours); - + if(mapOfDTEColoAndVColoValues!=null){ - if(dataType.contains("#integer") || dataType.contains("#float") || dataType.contains("#long") || + if(dataType.equals("http://def.seegrid.csiro.au/isotc211/iso19103/2005/basic#ordinates")){ + ILiteralGenerator ordinatesGenerator = new OrdinatesLiteralGenerator(mapOfDTEColoAndVColoValues); + mMapOfDataTypesAndGenerators.put(dataType, ordinatesGenerator); + + } else if(dataType.contains("#integer") || dataType.contains("#float") || dataType.contains("#long") || dataType.contains("#double") || dataType.contains("#short") || dataType.contains("#char")){ //create a numeric generator ILiteralGenerator nummericGenerator = new NumericLiteralGenerator(mapOfDTEColoAndVColoValues); mMapOfDataTypesAndGenerators.put(dataType, nummericGenerator); - + }else if(dataType.contains("#boolean") || dataType.contains("#bool")){ //create a boolean generator ILiteralGenerator booleanGenerator = new BooleanLiteralGenerator(mapOfDTEColoAndVColoValues); @@ -58,11 +62,11 @@ private void initializeGenerators(){ }// end if of checking valid map types and edge's colours } } - + /** * get a string which includes 'noOfWords' words which are closest to the input set of words * associated with the dteColo (of a data typed proerty) - * + * * @return a string of words */ public String getValue(BitSet vColo, BitSet dteColo) { @@ -70,7 +74,7 @@ public String getValue(BitSet vColo, BitSet dteColo) { if(vColo != null && dteColo !=null){ double numOfValues = mLiteralAnalysis.getAvrgNoOfWords(vColo, dteColo); String typeOfData = mLiteralAnalysis.getDataTypes(dteColo); - + //System.out.println("\t\tGet "+numOfValues+" word(s) of type:" + typeOfData ); ILiteralGenerator literalGenerator = mMapOfDataTypesAndGenerators.get(typeOfData); //double currentTime = System.currentTimeMillis(); @@ -80,8 +84,8 @@ public String getValue(BitSet vColo, BitSet dteColo) { } return literal; } - - + + public String getLiteralType(BitSet dteColo){ String typeOfData = "http://www.w3.org/2001/XMLSchema#string"; if(dteColo !=null){ From 6e26556ed82500ffd6f4137bc0e38121657b49ee Mon Sep 17 00:00:00 2001 From: zunhoho <wangzun666@gmail.com> Date: Tue, 18 Jan 2022 20:25:54 +0100 Subject: [PATCH 3/3] add test case --- .../literals/OrdinatesLiteralGenerator.java | 34 +++++- .../OrdinatesLiteralGeneratorTest.java | 109 ++++++++++++++++++ 2 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 src/test/java/org/aksw/simba/lemming/creation/literal/OrdinatesLiteralGeneratorTest.java diff --git a/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/OrdinatesLiteralGenerator.java b/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/OrdinatesLiteralGenerator.java index ffb9dfb5..b20d259e 100644 --- a/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/OrdinatesLiteralGenerator.java +++ b/src/main/java/org/aksw/simba/lemming/mimicgraph/literals/OrdinatesLiteralGenerator.java @@ -31,6 +31,35 @@ public OrdinatesLiteralGenerator(Map<BitSet, Map<BitSet, Set<String>>> sampleDat computeDataRange(); } + /** + * Get the range of ordinates for a given data typed edge colour and a tail vertex colour. + * @return a double array of size 4: first two members form the range of the first part of ordinate, and the last + * two members form the range of the second part. + * + */ + public double[] getOrdinatesRange(BitSet dteColo, BitSet tColo){ + double[] ranges = new double[4]; + Map<BitSet, double[]> mTColo2Mins = mapOfMinValues.get(dteColo); + Map<BitSet, double[]> mTColo2Maxes = mapOfMaxValues.get(dteColo); + if(mTColo2Mins != null && !mTColo2Mins.isEmpty() && mTColo2Maxes != null && !mTColo2Maxes.isEmpty()){ + double[] mins = mTColo2Mins.get(tColo); + double[] maxes = mTColo2Maxes.get(tColo); + if(mins != null && mins.length>0 && maxes != null && maxes.length>0){ + ranges[0] = mins[0]; + ranges[1] = maxes[0]; + ranges[2] = mins[1]; + ranges[3] = maxes[1]; + }else{ + LOGGER.error("There's no valid ordinate's range!!"); + ranges = new double[]{0.0, 0.0, 0.0, 0.0}; + } + }else { + LOGGER.error("There's no valid ordinate's range!!"); + ranges = new double[]{0.0, 0.0, 0.0, 0.0}; + } + return ranges; + } + private void computeDataRange(){ LOGGER.info("Start - computation of range for literals with type ordinates"); Set<BitSet> dteColours = mBaseData.keySet(); @@ -93,7 +122,7 @@ private double[] getOrdinatesValues(String literal){ */ @Override public String getValue(BitSet tColo, BitSet dteColo, int numberOfValues){ - String literal = ""; + String literal; if(tColo != null && dteColo != null){ Map<BitSet, double[]> mapTColour2Mins = mapOfMinValues.get(dteColo); Map<BitSet, double[]> mapTColour2Maxes = mapOfMaxValues.get(dteColo); @@ -107,6 +136,9 @@ public String getValue(BitSet tColo, BitSet dteColo, int numberOfValues){ String string1 = String.format("%.4f", ordinate1); String string2 = String.format("%.4f", ordinate2); literal = string1 + " " + string2; + }else { + LOGGER.error("Cannot generate valid ordinates literal!"); + return "0.0 0.0"; } }else{ LOGGER.error("Cannot generate valid ordinates literal!"); diff --git a/src/test/java/org/aksw/simba/lemming/creation/literal/OrdinatesLiteralGeneratorTest.java b/src/test/java/org/aksw/simba/lemming/creation/literal/OrdinatesLiteralGeneratorTest.java new file mode 100644 index 00000000..0128f94e --- /dev/null +++ b/src/test/java/org/aksw/simba/lemming/creation/literal/OrdinatesLiteralGeneratorTest.java @@ -0,0 +1,109 @@ +package org.aksw.simba.lemming.creation.literal; + +import org.aksw.simba.lemming.mimicgraph.literals.OrdinatesLiteralGenerator; +import com.carrotsearch.hppc.BitSet; +import org.junit.Assert; +import org.junit.Test; + +import java.util.*; + +public class OrdinatesLiteralGeneratorTest { + + @Test + public void test(){ + OrdinatesLiteralGenerator generator = new OrdinatesLiteralGenerator(createSampleData()); + + BitSet b0 = new BitSet(); + b0.flip(0); + BitSet b1 = new BitSet(); + b1.flip(1); + BitSet b2 = new BitSet(); + b2.flip(2); + BitSet b3 = new BitSet(); + b3.flip(3); + BitSet b4 = new BitSet(); + b4.flip(4); + + double[] expectedRange02 = {-1.0, 1.0, -1.0, 1.0}; + double[] expectedRange03 = {-10.0, 1.0, 1.0, 10.0}; + double[] expectedRange14 = {0.0, 100.0, -100.0, 50.0}; + double[] expectedRange04 = {0.0, 0.0, 0.0, 0.0}; + double[] acturalRange02 = generator.getOrdinatesRange(b0, b2); + double[] acturalRange03 = generator.getOrdinatesRange(b0, b3); + double[] acturalRange14 = generator.getOrdinatesRange(b1, b4); + double[] acturalRange04 = generator.getOrdinatesRange(b0, b4); + + //test the range calculation + for(int i = 0 ; i < 4 ; i++){ + Assert.assertEquals(expectedRange02[i], acturalRange02[i], 0.001); + Assert.assertEquals(expectedRange03[i], acturalRange03[i], 0.001); + Assert.assertEquals(expectedRange14[i], acturalRange14[i], 0.001); + Assert.assertEquals(expectedRange04[i], acturalRange04[i], 0.001); + } + + //test the generated ordinates + for(int i = 0; i < 100 ; i++){ + String[] ordinate02 = generator.getValue(b2, b0, 0).split(" "); + Assert.assertTrue(Double.parseDouble(ordinate02[0]) >= -1.0 && Double.parseDouble(ordinate02[0]) <= 1.0 + && Double.parseDouble(ordinate02[1]) >= -1.0 && Double.parseDouble(ordinate02[1]) <= 1.0 ); + + String[] ordinate03 = generator.getValue(b3, b0, 0).split(" "); + Assert.assertTrue(Double.parseDouble(ordinate03[0]) >= -10.0 && Double.parseDouble(ordinate03[0]) <= 1.0 + && Double.parseDouble(ordinate03[1]) >= 1.0 && Double.parseDouble(ordinate03[1]) <= 10.0 ); + + String[] ordinate14 = generator.getValue(b4, b1, 0).split(" "); + Assert.assertTrue(Double.parseDouble(ordinate14[0]) >= 0.0 && Double.parseDouble(ordinate14[0]) <= 100.0 + && Double.parseDouble(ordinate14[1]) >= -100.0 && Double.parseDouble(ordinate14[1]) <= 50.0 ); + + String[] ordinate04 = generator.getValue(b4, b0, 0).split(" "); + Assert.assertTrue(Double.parseDouble(ordinate04[0]) == 0.0 && Double.parseDouble(ordinate04[0]) == 0.0 + && Double.parseDouble(ordinate04[1]) == 0.0 && Double.parseDouble(ordinate04[1]) == 0.0 ); + } + } + + /** + * create sample data for LiteralGenerator + */ + private Map<BitSet, Map<BitSet, Set<String>>> createSampleData(){ + Map<BitSet, Map<BitSet, Set<String>>> data = new HashMap<>(); + //create data typed edge color + BitSet dteColo0 = new BitSet(); + dteColo0.flip(0); + + //create the corresponding map + Map<BitSet, Set<String>> mTColo2Literal0 = new HashMap<>(); + BitSet tColo2 = new BitSet(); + tColo2.flip(2); + Set<String> lits0 = new HashSet<>(); + lits0.add("-1 1"); + lits0.add("0 0"); + lits0.add("1 -1"); + mTColo2Literal0.put(tColo2, lits0); + + BitSet tColo3 = new BitSet(); + tColo3.flip(3); + Set<String> lits1 = new HashSet<>(); + lits1.add("-10 1"); + lits1.add("-5 5"); + lits1.add("1 10"); + mTColo2Literal0.put(tColo3, lits1); + data.put(dteColo0, mTColo2Literal0); + + //create data typed edge color + BitSet dteColo1 = new BitSet(); + dteColo1.flip(1); + + //create the corresponding map + Map<BitSet, Set<String>> mTColo2Literal1 = new HashMap<>(); + BitSet tColo4 = new BitSet(); + tColo4.flip(4); + Set<String> lits2 = new HashSet<>(); + lits2.add("100 50"); + lits2.add("0 -100"); + lits2.add("1 -50"); + mTColo2Literal1.put(tColo4, lits2); + data.put(dteColo1, mTColo2Literal1); + + return data; + } +}