diff --git a/bindings/python/__init__standalone.py.in b/bindings/python/__init__standalone.py.in index 2f7612b9b..5e7ce36f3 100644 --- a/bindings/python/__init__standalone.py.in +++ b/bindings/python/__init__standalone.py.in @@ -70,6 +70,7 @@ storage_path.append(str(thispath / "share/dlite/storages/*.json")) # DLITE_TEMPLATE_DIRS: Search path for DLite templates. +from .dlite import * # noqa: F401, F403 from .factory import classfactory, objectfactory, instancefactory # noqa: F401 del thispath diff --git a/bindings/python/dlite-entity-python.i b/bindings/python/dlite-entity-python.i index 738954832..860e5c439 100644 --- a/bindings/python/dlite-entity-python.i +++ b/bindings/python/dlite-entity-python.i @@ -60,6 +60,13 @@ class Metadata(Instance): def __repr__(self): return f"" + def getprop(self, name): + """Returns the metadata property object with the given name.""" + lst = [p for p in self.properties["properties"] if p.name == name] + if lst: + return lst[0] + raise DLiteError(f"Metadata {self.uri} has no such property: {name}") + def standardise(v, prop, asdict=False): """Represent property value `v` as a standard python type. @@ -98,7 +105,10 @@ def get_instance(id: "str", metaid: "str"=None, check_storages: "bool"=True) -> Returns: DLite instance. """ - inst = _dlite.get_instance(id, metaid, check_storages) + if isinstance(id, dlite.Instance): + inst = id + else: + inst = _dlite.get_instance(id, metaid, check_storages) if inst is None: raise DLiteError(f"no such instance: {id}") elif inst.is_meta: @@ -284,6 +294,9 @@ def get_instance(id: "str", metaid: "str"=None, check_storages: "bool"=True) -> if isinstance(dims, dict): meta = get_instance(metaid) dims = [dims[dim.name] for dim in meta.properties['dimensions']] + # Allow metaid to be an Instance + if isinstance(metaid, dlite.Instance): + metaid = metaid.uri return Instance( metaid=metaid, dims=dims, id=id, dimensions=(), properties=() # arrays must not be None diff --git a/bindings/python/mappings.py b/bindings/python/mappings.py index 71d644b6a..7551e1a1f 100644 --- a/bindings/python/mappings.py +++ b/bindings/python/mappings.py @@ -1,18 +1,32 @@ """Implements mappings between entities. + +Units are currently handled with pint.Quantity. The benefit of this +compared to explicit unit conversions, is that units will be handled +transparently by mapping functions, without any need to specify units +of input and output parameters. + +Shapes are automatically handled by expressing non-scalar quantities +with numpy. + """ from __future__ import annotations -import types import itertools +import types import warnings from collections import defaultdict from collections.abc import Sequence -from typing import Union, Dict, List +from enum import Enum +from typing import Any, Callable, Dict, List, Optional, Union + +from pint import Quantity import dlite +from dlite.triplestore import Triplestore, DM, FNO, MAP, RDF, RDFS +from dlite.utils import infer_dimensions -class MappingError(Exception): +class MappingError(dlite.DLiteError): """Base class for mapping errors.""" @@ -32,80 +46,628 @@ class InconsistentTriplesError(MappingError): """Inconsistcy in RDF triples.""" +class MissingRelationError(MappingError): + """There are missing relations in RDF triples.""" + + +class StepType(Enum): + """Type of mapping step when going from the output to the inputs.""" + MAPSTO = 1 + INV_MAPSTO = -1 + INSTANCEOF = 2 + INV_INSTANCEOF = -2 + SUBCLASSOF = 3 + INV_SUBCLASSOF = -3 + FUNCTION = 4 + + +class Value: + """Represents the value of an instance property. + + Arguments: + value: Property value. + unit: Property unit. + iri: IRI of ontological concept that this value is an instance of. + property_iri: IRI of datamodel property that this value is an + instance of. + cost: Cost of accessing this value. + """ + def __init__(self, value, unit=None, iri=None, property_iri=None, cost=0.0): + self.value = value + self.unit = unit + self.iri = iri + self.property_iri = property_iri + self.cost = cost + + def show(self, routeno=None, name=None, indent=0): + """Returns a string representation of the Value. + + Arguments: + routeno: Unused. The argument exists for consistency with + the corresponding method in Step. + name: Name of value. + indent: Indentation level. + """ + s = [] + ind = ' '*indent + s.append(ind + f'{name if name else "Value"}:') + s.append(ind + f' iri: {self.iri}') + s.append(ind + f' property_iri: {self.property_iri}') + s.append(ind + f' unit: {self.unit}') + s.append(ind + f' cost: {self.cost}') + s.append(ind + f' value: {self.value}') + return '\n'.join(s) + + class MappingStep: - """A step in a mapping route from a target to one or more source properties. + """A step in a mapping route from a target to one or more sources. A mapping step corresponds to one or more RDF triples. In the - simple case of a `:mapsTo` or `rdfs:isSubclassOf` relation, it is + simple case of a `mo:mapsTo` or `rdfs:isSubclassOf` relation, it is only one triple. For transformations that has several input and output, a set of triples are expected. - Subproperty relations should be resolved using the ontology before - creating a mapping step. + Arguments: + output_iri: IRI of the output concept. + steptype: One of the step types from the StepType enum. + function: Callable that evaluates the output from the input. + cost: The cost related to this mapping step. Should be either a + float or a callable taking the same arguments as `function` as + input returning the cost as a float. + output_unit: Output unit. - Args: - inputs: Sequence of inputs to this mapping step. Should either be - MappingStep objects for the peceeding mapping steps or the URI - of source property. - output: IRI of the output concept. - predicate: Either "mapsTo", "subClassOf" or "hasInput", corresponding - to a mapping, inference or transformation step. - triples: Optional. Sequence of RDF triples for this step. Might be - used for visualisation. - unit: The output of this step. - config: Configuration of a transformation step. - - Attributes: - inputs: Sequence of inputs to this mapping step. Should either be - MappingStep objects for the peceeding mapping steps or the URI - of source property. - predicate: Either "mapsTo", "subClassOf" or "hasInput", corresponding - to a mapping, inference or transformation step. - output: IRI of the output concept. - triples: Copy of the `triples` argument. - unit: The output of this step. - config: Configuration of a transformation step. - next: List with the next mapping step. + The arguments can also be assigned as attributes. """ - def __init__(self, - inputs: Sequence[Union[str, MappingStep]], - output: str, - predicate: Union['mapsTo', 'subClassOf', 'hasInput'] = 'mapsTo', - triples: Sequence[(str, str, str)] = (), - unit: str = None, - config: dict = None, - ) -> None: - self.inputs = [] - self.predicate = predicate - self.output = output - self.triples = [(t[0], t[1], t[2]) for t in triples] - self.unit = unit - self.config = config - for input in inputs: - self.add_input(input) - self.next = [] - - def add_input(self, step: MappingStep) -> None: - """Adds an input mapping step.""" - self.inputs.append(input) + def __init__( + self, + output_iri: Optional[str] = None, + steptype: Optional[StepType] = None, + function: Optional[Callable] = None, + cost: Union[Any, Callable] = 1.0, + output_unit: Optional[str] = None, + ): + self.output_iri = output_iri + self.steptype = steptype + self.function = function + self.cost = cost + self.output_unit = output_unit + self.input_routes = [] # list of inputs dicts + self.join_mode = False # whether to join upcoming input + self.joined_input = {} + + def add_inputs(self, inputs): + """Add input dict for an input route.""" + assert isinstance(inputs, dict) + self.input_routes.append(inputs) + + def add_input(self, input, name=None): + """Add an input (MappingStep or Value), where `name` is the name + assigned to the argument. + + If the `join_mode` attribute is false, a new route is created with + only one input. + + If the `join_mode` attribute is true, the input is remembered, but + first added when join_input() is called. + """ + assert isinstance(input, (MappingStep, Value)) + argname = name if name else f'arg{len(self.joined_input)+1}' + if self.join_mode: + self.joined_input[argname] = input + else: + self.add_inputs({argname: input}) + + def join_input(self): + """Join all input added with add_input() since `join_mode` was set true. + Resets `join_mode` to false.""" + if not self.join_mode: + raise MappingError('Calling join_input() when join_mode is false.') + self.join_mode = False + self.add_inputs(self.joined_input) + self.joined_input = {} + + def eval(self, routeno=None, unit=None, magnitude=False, quantity=Quantity): + """Returns the evaluated value of given input route number. + + Args: + routeno: The route number to evaluate. If None (default) + the route with the lowest cost is evalueated. + unit: return the result in the given unit. + Implies `magnitude=True`. + magnitude: Whether to only return the magitude of the evaluated + value (with no unit). + quantity: Quantity class to use for evaluation. Defaults to pint. + """ + if routeno is None: + (_, routeno), = self.lowest_costs(nresults=1) + inputs, idx = self.get_inputs(routeno) + values = get_values(inputs, idx, quantity=quantity) + if self.function: + value = self.function(**values) + elif len(values) == 1: + value, = values.values() + else: + raise TypeError( + f"Expected inputs to be a single argument: {values}") + + if isinstance(value, quantity) and unit: + return value.m_as(unit) + elif isinstance(value, quantity) and magnitude: + return value.m + else: + return value + + def get_inputs(self, routeno): + """Returns input and input index `(inputs, idx)` for route number + `routeno`.""" + n = 0 + for inputs in self.input_routes: + n0 = n + n += get_nroutes(inputs) + if n > routeno: + return inputs, routeno - n0 + raise ValueError(f"routeno={routeno} exceeds number of routes") + + def get_input_iris(self, routeno): + """Returns a dict mapping input names to iris for the given route + number.""" + inputs, idx = self.get_inputs(routeno) + return {k: v.output_iri if isinstance(v, MappingStep) else v.iri + for k, v in inputs.items()} + + def number_of_routes(self): + """Returns total number of routes to this mapping step.""" + n = 0 + for inputs in self.input_routes: + n += get_nroutes(inputs) + return n + + def lowest_costs(self, nresults=5): + """Returns a list of `(cost, routeno)` tuples with up to the `nresult` + lowest costs and their corresponding route numbers.""" + result = [] + n = 0 + for inputs in self.input_routes: + owncost = 1 + for cost, idx in get_lowest_costs(inputs, nresults=nresults): + if isinstance(self.cost, Callable): + values = get_values(inputs, idx, magnitudes=True) + owncost = self.cost(**values) + else: + owncost = self.cost + result.append((cost + owncost, n + idx)) + n += get_nroutes(inputs) + return sorted(result)[:nresults] + + def show(self, routeno=None, name=None, indent=0): + """Returns a string representation of the mapping routes to this step. + + Arguments: + routeno: show given route. The default is to show all routes. + name: Name of the last mapping step (mainly for internal use). + indent: How of blanks to prepend each line with (mainly for + internal use). + """ + s = [] + ind = ' '*indent + s.append(ind + f'{name if name else "Step"}:') + s.append(ind + f' steptype: ' + f'{self.steptype.name if self.steptype else None}') + s.append(ind + f' output_iri: {self.output_iri}') + s.append(ind + f' output_unit: {self.output_unit}') + s.append(ind + f' cost: {self.cost}') + if routeno is None: + s.append(ind + f' routes:') + for inputs in self.input_routes: + t = '\n'.join([input_.show(name=name_, indent=indent+6) + for name_, input_ in inputs.items()]) + s.append(ind + ' - ' + t[indent+6:]) + else: + s.append(ind + f' inputs:') + inputs, idx = self.get_inputs(routeno) + t = '\n'.join([input_.show(routeno=idx, name=name_, indent=indent+6) + for name_, input_ in inputs.items()]) + s.append(ind + ' - ' + t[indent+6:]) + return '\n'.join(s) + + +def get_nroutes(inputs): + """Help function returning the number of routes for an input dict.""" + m = 1 + for input in inputs.values(): if isinstance(input, MappingStep): - input.next.append(self) - - def get_sources(self) -> List[str]: - """Returns a list of URIs of all source metadata.""" - sources = [] - if not self.inputs: - warnings.warn(f'') - for input in self.inputs: - if isinstance(input, MappingStep): - sources.extend(input.get_sources()) + m *= input.number_of_routes() + return m + + +def get_values(inputs, routeno, quantity=Quantity, magnitudes=False): + """Help function returning a dict mapping the input names to actual value + of expected input unit. + + There exists `get_nroutes(inputs)` routes to populate `inputs`. + `routeno` is the index of the specific route we will use to obtain the + values.""" + values = {} + for k, v in inputs.items(): + if isinstance(v, MappingStep): + value = v.eval(routeno=routeno, quantity=quantity) + values[k] = ( + value.to(v.output_unit) + if v.output_unit and isinstance(v, quantity) else value + ) + else: + values[k] = quantity(v.value, v.unit) + + if magnitudes: + values = {k: v.m if isinstance(v, quantity) else v + for k, v in values.items()} + + return values + + +def get_lowest_costs(inputs, nresults=5): + """Returns a list of `(cost, routeno)` tuples with up to the `n` + lowest costs and their corresponding route numbers.""" + result = [] + vcost = 0 + for input in inputs.values(): + if isinstance(input, MappingStep): + result.extend(input.lowest_costs(nresults=nresults)) + else: + vcost += input.cost + if result: + result.sort() + result = [(cost + vcost, idx) for cost, idx in result[:nresults]] + else: + result.append((vcost, 0)) + return result + + +def fno_mapper(triplestore): + """Finds all function definitions in `triplestore` based on the function + ontololy (FNO). + + Return a dict mapping output IRIs to a list of + + (function_iri, [input_iris, ...]) + + tuples. + """ + # Temporary dicts for fast lookup + Dfirst = {s: o for s, o in triplestore.subject_objects(RDF.first)} + Drest = {s: o for s, o in triplestore.subject_objects(RDF.rest)} + Dexpects = defaultdict(list) + Dreturns = defaultdict(list) + for s, o in triplestore.subject_objects(FNO.expects): + Dexpects[s].append(o) + for s, o in triplestore.subject_objects(FNO.returns): + Dreturns[s].append(o) + + d = defaultdict(list) + for func, lst in Dreturns.items(): + input_iris = [] + for exp in Dexpects.get(func, ()): + if exp in Dfirst: + while exp in Dfirst: + input_iris.append(Dfirst[exp]) + if exp not in Drest: + break + exp = Drest[exp] else: - sources.append(input) - return sources + # Support also misuse of FNO, where fno:expects refers + # directly to input individuals + input_iris.append(exp) + + for ret in lst: + if ret in Dfirst: + while ret in Dfirst: + d[Dfirst[ret]].append((func, input_iris)) + if ret not in Drest: + break + ret = Drest[ret] + else: + # Support also misuse of FNO, where fno:returns refers + # directly to the returned individual + d[ret].append((func, input_iris)) + + return d + + +def mapping_route( + target, + sources, + triplestore, + function_repo=None, + function_mappers=(fno_mapper, ), + default_costs={'function': 10.0, 'mapsTo': 2.0, 'instanceOf': 1.0, + 'subClassOf': 1.0, 'value': 0.0}, + mapsTo=MAP.mapsTo, + instanceOf=DM.instanceOf, + subClassOf=RDFS.subClassOf, + #description=DCTERMS.description, + label=RDFS.label, + hasUnit=DM.hasUnit, + hasCost=':hasCost', +): + """Find routes of mappings from any source in `sources` to `target`. + + This implementation supports transitivity, subclasses. + + Arguments: + target: IRI of the target in `triplestore`. + sources: Dict mapping source IRIs to source values. + triplestore: Triplestore instance. + It is safe to pass a generator expression too. + function_repo: Dict mapping function IRIs to corresponding Python + function. Default is to use `triplestore.function_repo`. + function_mappers: Sequence of mapping functions that takes + `triplestore` as argument and return a dict mapping output IRIs + to a list of `(function_iri, [input_iris, ...])` tuples. + mapsTo: IRI of 'mapsTo' in `triplestore`. + instanceOf: IRI of 'instanceOf' in `triplestore`. + subClassOf: IRI of 'subClassOf' in `triples`. Set it to None if + subclasses should not be considered. + label: IRI of 'label' in `triplestore`. Used for naming function + input parameters. The default is to use rdfs:label. + hasUnit: IRI of 'hasUnit' in `triples`. + hasCost: IRI of 'hasCost' in `triples`. + + Returns: + A MappingStep instance. + """ + if function_repo is None: + function_repo = triplestore.function_repo + + # Create lookup tables for fast access to properties + # This only transverse `tiples` once + soMaps = defaultdict(list) # (s, mapsTo, o) ==> soMaps[s] -> [o, ..] + osMaps = defaultdict(list) # (o, mapsTo, s) ==> osMaps[o] -> [s, ..] + osSubcl = defaultdict(list) # (o, subClassOf, s) ==> osSubcl[o] -> [s, ..] + soInst = dict() # (s, instanceOf, o) ==> soInst[s] -> o + osInst = defaultdict(list) # (o, instanceOf, s) ==> osInst[o] -> [s, ..] + for s, o in triplestore.subject_objects(mapsTo): + soMaps[s].append(o) + osMaps[o].append(s) + for s, o in triplestore.subject_objects(subClassOf): + osSubcl[o].append(s) + for s, o in triplestore.subject_objects(instanceOf): + if s in soInst: + raise InconsistentTriplesError( + f'The same individual can only relate to one datamodel ' + f'property via {instanceOf} relations.') + soInst[s] = o + osInst[o].append(s) + soName = {s: o for s, o in triplestore.subject_objects(label)} + soUnit = {s: o for s, o in triplestore.subject_objects(hasUnit)} + soCost = {s: o for s, o in triplestore.subject_objects(hasCost)} + + def walk(target, visited, step): + """Walk backward in rdf graph from `node` to sources.""" + if target in visited: return + visited.add(target) + + def addnode(node, steptype, stepname): + if node in visited: + return + step.steptype = steptype + step.cost = soCost.get(target, default_costs[stepname]) + if node in sources: + value = Value(value=sources[node], unit=soUnit.get(node), + iri=node, property_iri=soInst.get(node), + cost=soCost.get(node, default_costs['value'])) + step.add_input(value, name=soName.get(node)) + else: + prevstep = MappingStep(output_iri=node, + output_unit=soUnit.get(node)) + step.add_input(prevstep, name=soName.get(node)) + walk(node, visited, prevstep) + + for node in osInst[target]: + addnode(node, StepType.INV_INSTANCEOF, 'instanceOf') + + for node in soMaps[target]: + addnode(node, StepType.MAPSTO, 'mapsTo') + + for node in osMaps[target]: + addnode(node, StepType.INV_MAPSTO, "mapsTo") + + for node in osSubcl[target]: + addnode(node, StepType.INV_SUBCLASSOF, 'subClassOf') + + for fmap in function_mappers: + for func, input_iris in fmap(triplestore)[target]: + step.steptype = StepType.FUNCTION + step.cost = soCost.get(func, default_costs['function']) + step.function = function_repo[func] + step.join_mode = True + for i, input_iri in enumerate(input_iris): + step0 = MappingStep(output_iri=input_iri, + output_unit=soUnit.get(input_iri)) + step.add_input(step0, name=soName.get(input_iri)) + walk(input_iri, visited, step0) + step.join_input() + + visited = set() + step = MappingStep(output_iri=target, output_unit=soUnit.get(target)) + if target in soInst: + # It is only initially we want to follow instanceOf in forward + # direction. + visited.add(target) # do we really wan't this? + source = soInst[target] + step.steptype = StepType.INSTANCEOF + step.cost = soCost.get(source, default_costs['instanceOf']) + step0 = MappingStep(output_iri=source, output_unit=soUnit.get(source)) + step.add_input(step0, name=soName.get(target)) + step = step0 + target = source + if target not in soMaps: + raise MissingRelationError(f'Missing "mapsTo" relation on: {target}') + walk(target, visited, step) + + return step + + +def instance_routes(meta, instances, triplestore, allow_incomplete=False, + quantity=Quantity, **kwargs): + """Find all mapping routes for populating an instance of `meta`. + + Arguments: + meta: Metadata for the instance we will create. + instances: sequence of instances that the new intance will be + populated from. + triplestore: Triplestore containing the mappings. + allow_incomplete: Whether to allow not populating all properties + of the returned instance. + quantity: Class implementing quantities with units. Defaults to + pint.Quantity. + kwargs: Keyword arguments passed to mapping_route(). + + Returns: + A dict mapping property names to a MappingStep instance. + """ + if isinstance(instances, dlite.Instance): + instances = [instances] + + sources = {} + for inst in instances: + props = {p.name: p for p in inst.meta['properties']} + for k, v in inst.properties.items(): + sources[f'{inst.meta.uri}#{k}'] = quantity(v, props[k].unit) + + routes = {} + for prop in meta['properties']: + target = f'{meta.uri}#{prop.name}' + try: + route = mapping_route(target, sources, triplestore, **kwargs) + except MissingRelationError: + if allow_incomplete: + continue + raise + if not allow_incomplete and not route.number_of_routes(): + raise InsufficientMappingError(f'no mappings for {target}') + routes[prop.name] = route + + return routes + + +def instantiate_route(meta, routes, routedict=None, id=None, quantity=Quantity): + """Create a new instance of `meta` from selected mapping route returned by + instance_routes(). + + Arguments: + meta: Metadata to instantiate. + routes: Dict returned by instance_routes(). It should map property + names to MappingStep instances. + routedict: Dict mapping property names to route number to select for + the given property. The default is to select the route with + lowest cost. + id: URI of instance to create. + quantity: Class implementing quantities with units. Defaults to + pint.Quantity. + + Returns: + New instance. + """ + if isinstance(meta, str): + meta = dlite.get_instance(meta) + + if routedict is None: + routedict = {} + + values = {} + for prop in meta['properties']: + if prop.name in routes: + step = routes[prop.name] + values[prop.name] = step.eval(routeno=routedict.get(prop.name), + unit=prop.unit, + quantity=quantity) + dims = infer_dimensions(meta, values) + inst = meta(dims=dims, id=id) + + for k, v in routes.items(): + inst[k] = v.eval(magnitude=True, unit=meta.getprop(k).unit) + + return inst + + +def instantiate(meta, instances, triplestore, routedict=None, id=None, + allow_incomplete=False, quantity=Quantity, **kwargs): + """Create a new instance of `meta` populated with the selected mapping + routes. + + This is a convenient function that combines instance_routes() and + instantiate_route(). If you want to investigate the possible routes, + you will probably want to call instance_routes() and + instantiate_route() instead. + + Arguments: + meta: Metadata to instantiate. + instances: Sequence of instances with source values. + triplestore: Triplestore instance. + It is safe to pass a generator expression too. + routedict: Dict mapping property names to route number to select for + the given property. The default is to select the route with + lowest cost. + id: URI of instance to create. + allow_incomplete: Whether to allow not populating all properties + of the returned instance. + quantity: Class implementing quantities with units. Defaults to + pint.Quantity. + + Keyword arguments (passed to instance_routes()): + function_repo: Dict mapping function IRIs to corresponding Python + function. Default is to use `triplestore.function_repo`. + function_mappers: Sequence of mapping functions that takes + `triplestore` as argument and return a dict mapping output IRIs + to a list of `(function_iri, [input_iris, ...])` tuples. + mapsTo: IRI of 'mapsTo' in `triplestore`. + instanceOf: IRI of 'instanceOf' in `triplestore`. + subClassOf: IRI of 'subClassOf' in `triplestore`. Set it to None if + subclasses should not be considered. + label: IRI of 'label' in `triplestore`. Used for naming function + input parameters. The default is to use rdfs:label. + hasUnit: IRI of 'hasUnit' in `triplestore`. + hasCost: IRI of 'hasCost' in `triplestore`. + + Returns: + New instance. + """ + if isinstance(meta, str): + meta = dlite.get_instance(meta) + + routes = instance_routes(meta=meta, + instances=instances, + triplestore=triplestore, + allow_incomplete=allow_incomplete, + quantity=quantity, + **kwargs) + return instantiate_route(meta=meta, routes=routes, routedict=routedict, + id=id, quantity=quantity) + + + + +# ------------- Old implementation ----------------- + +def unitconvert_pint(dest_unit, value, unit): + """Returns `value` converted to `dest_unit`. + + A unitconvert function based on Pint. Alternative functions + based on ontologies may be implemented. - def eval(self, input_values, input_units=None, unit=None): - """Returns the evaluated value.""" - pass + Args: + dest_unit: Destination unit that `value` should be converted to. + value: Source value. + unit: The unit of the source value. + """ + import pint + ureg = pint.UnitRegistry() + u1 = ureg(unit) + u2 = ureg(dest_unit) + return (value * u1).to(u2).m + + +unitconvert = unitconvert_pint def match_factory(triples, match_first=False): @@ -148,222 +710,7 @@ def match(s=None, p=None, o=None): return match -def mapping_route( - target, sources, triples, - mapsTo=':mapsTo', - subClassOf='http://www.w3.org/2000/01/rdf-schema#subClassOf', - subPropertyOf='http://www.w3.org/2000/01/rdf-schema#subPropertyOf', - hasInput=':hasInput', - hasOutput=':hasOutput'): - """Finds the route of mappings from any source in `sources` to `target`. - This implementation takes transitivity, subclasses and - subproperties into accaount. - - Args: - target: IRI of the target in `triples`. - sources: Sequence of source IRIs in `triples`. - triples: Sequence of (subject, predicate, object) triples. - It is safe to pass a generator expression too. - mapsTo: How 'mapsTo' is written in `triples`. - subClassOf: How 'subClassOf' is written in `triples`. Set it - to None if subclasses should not be considered. - subPropertyOf: How 'subPropertyOf' is written in `triples`. Set it - to None if subproperties of `mapsTo` should not be considered. - hasInput: How 'hasInput' is written in `triples`. - hasOutput: How 'hasOutput' is written in `triples`. - - Returns: - list: Names of all sources that maps to `target`. - - list: A nested list with different mapping routes from `target` - to a source, where a mapping route is expressed as a - list of triples. For example: - - [(target, mapsTo, 'onto:A'), - ('onto:A', mapsTo, 'onto:B'), - (source1, mapsTo, 'onto:B')] - - Bugs: - In the current implementation will the returned mapping route - report sub properties of `mapsTo` as `mapsTo`. Some - postprocessing is required to fix this. - """ - sources = set(sources) - - # Create a set of 'relations' to consider, consisting of mapsTo and - # its sub properties - if subPropertyOf: - def walk(src, d): - yield src - for s in d[src]: - yield from walk(s, d) - - def get_relations(rel): - """Returns a set of `rel` and its subproperties.""" - oSPs = defaultdict(set) - for s, p, o in triples: - if p == subPropertyOf: - oSPs[o].add(s) - return set(walk(rel, oSPs)) - - if isinstance(triples, types.GeneratorType): - # Convert generator to a list such that we can transverse it twice - triples = list(triples) - - #oSPs = defaultdict(set) # (o, subPropertyOf, s) ==> oSPs[o] -> {s, ..} - #for s, p, o in triples: - # if p == subPropertyOf: - # oSPs[o].add(s) - #relations = set(walk(mapsTo, oSPs)) - #del oSPs - relations = get_relations(mapsTo) - else: - relations = set([mapsTo]) - - # Create lookup tables for fast access to properties - # This only transverse `tiples` once - sRo = defaultdict(list) # (s, mapsTo, o) ==> sRo[s] -> [o, ..] - oRs = defaultdict(list) # (o, mapsTo, s) ==> oRs[o] -> [s, ..] - sSCo = defaultdict(list) # (s, subClassOf, o) ==> sSCo[s] -> [o, ..] - oSCs = defaultdict(list) # (o, subClassOf, s) ==> oSCs[o] -> [s, ..] - for s, p, o in triples: - if p in relations: - sRo[s].append(o) - oRs[o].append(s) - elif p == subClassOf: - sSCo[s].append(o) - oSCs[o].append(s) - - # The lists to return, populated with walk_forward() and walk_backward() - mapped_sources = [] - mapped_routes = [] - - def walk_forward(entity, visited, route): - """Walk forward from `entity` in the direction of mapsTo.""" - if entity not in visited: - walk_backward(entity, visited, route) - for e in sRo[entity]: - walk_forward( - e, visited.union(set([entity])), - route + [(entity, mapsTo, e)]) - for e in oSCs[entity]: - walk_forward( - e, visited.union(set([entity])), - route + [(e, subClassOf, entity)]) - - def walk_backward(entity, visited, route): - """Walk backward from `entity` to a source, against the direction of - mapsTo.""" - if entity not in visited: - if entity in sources: - mapped_sources.append(entity) - mapped_routes.append(route) - else: - for e in oRs[entity]: - walk_backward( - e, visited.union(set([entity])), - route + [(e, mapsTo, entity)]) - for e in sSCo[entity]: - walk_backward( - e, visited.union(set([entity])), - route + [(entity, subClassOf, e)]) - - walk_forward(target, set(), []) - - return mapped_sources, mapped_routes - - - -# -# def mapping_targets(source, triples, mapsTo=':mapsTo', return_routes=False): -# """Finds all targets that `source` maps to. -# -# This implementation takes the transitivity of mapsTo into accaount. -# -# Args: -# source: IRI of source in `triples`. -# triples: Sequence of (subject, predicate, object) triples. -# mapsTo: How the 'mapsTo' predicate is written in `triples`. -# return_routes: Whether to also return a list of tuples showing the -# mapping route. -# -# Returns: -# list: Name of all targets that `source` maps to. -# list: (optional) If `return_route` is true, a list of mapping routes -# corresponding to the list of targets is also returned. -# Each route is expressed as a list of triples. For example -# can the route from 'a' to 'b' be expressed as -# -# [[('a', ':mapsTo', 'onto:A'), ('b', ':mapsTo', 'onto:A')]] -# """ -# import itertools -# match = match_factory(triples) # match function -# -# # Trivial implementation -# #for _, _, cls in match(s=source, p=mapsTo): -# # for target, _, _ in match(p=mapsTo, o=cls): -# # targets.append(target) -# # if return_routes: -# # routes.append([(source, mapsTo, cls), (target, mapsTo, cls)]) -# -# # Recursive implementation taking transitivity into account -# targets = [] -# routes = [] -# -# def add_target(target, route): -# targets.append(target) -# if return_routes: -# routes.append(route) -# -# def find_target(cls, route, visited): -# """Find all targets that maps to `cls`. -# Returns true if cls correspond to a final target, otherwise false.""" -# m = match(p=mapsTo, o=cls) -# try: -# s, p, o = m.__next__() -# except StopIteration: -# return True -# -# # Use itertools.chain() to put (s, p, o) in what we are iterating over -# for s, p, o in itertools.chain(iter([(s, p, o)]), m): -# if s in visited: -# pass -# else: -# if find_target(s, route + [s, p, o], visited + [s]): -# add_target(s, route + [s, p, o]) -# return False -# -# def find(s, route, visited): -# """Find all classes that `s` maps to.""" -# for s, p, o in match(s=s, p=mapsTo): -# find_target(o, route + [(s, p, o)], visited + [s]) -# find(o, route + [(s, p, o)], visited + [s]) -# -# find(source, [], []) -# -# if return_routes: -# return targets, routes -# else: -# return targets -# - -def unitconvert_pint(dest_unit, value, unit): - """Returns `value` converted to `dest_unit`. - - A unitconvert function based on Pint. Alternative functions - based on ontologies may be implemented. - - Args: - dest_unit: Destination unit that `value` should be converted to. - value: Source value. - unit: The unit of the source value. - """ - import pint - ureg = pint.UnitRegistry() - u1 = ureg(unit) - u2 = ureg(dest_unit) - return (value * u1).to(u2).m def assign_dimensions(dims: Dict, @@ -442,6 +789,11 @@ def make_instance(meta, instances, mappings=(), strict=True, cannot be misused for code injection. - Add a function that visualise the possible mapping paths. """ + warnings.warn( + "make_instance() is deprecated. Use instantiate() instead.", + DeprecationWarning, + ) + match = match_factory(mappings) # match function if isinstance(instances, dlite.Instance): diff --git a/bindings/python/tests/test_property_mappings.py b/bindings/python/tests/test_property_mappings.py index c555bd6fb..f9b704091 100755 --- a/bindings/python/tests/test_property_mappings.py +++ b/bindings/python/tests/test_property_mappings.py @@ -1,6 +1,14 @@ #!/usr/bin/env python +import sys +import importlib.util from pathlib import Path +import numpy as np + +import dlite +import dlite.mappings as dm +from dlite.triplestore import Triplestore + try: import pint except ImportError as exc: @@ -12,36 +20,230 @@ import dlite.mappings as dm -# Configure search paths +# Configure paths thisdir = Path(__file__).parent.absolute() -dlite.storage_path.append(f'{thisdir}/*.json') - - -mappings = [ - ('http://onto-ns.com/meta/0.1/Molecule#name', ':mapsTo', - 'chem:Identifier'), - ('http://onto-ns.com/meta/0.1/Molecule#groundstate_energy', ':mapsTo', - 'chem:GroundStateEnergy'), - ('http://onto-ns.com/meta/0.1/Substance#id', ':mapsTo', - 'chem:Identifier'), - ('http://onto-ns.com/meta/0.1/Substance#molecule_energy', ':mapsTo', - 'chem:GroundStateEnergy'), -] +#exdir = thisdir / '../../../examples/dehydrogenation' +# +## Import module with instances from dehydrogenation example +#module_name = 'molecular_energies' +#file_path = f'{exdir}/1-simple-workflow/molecular_energies.py' +# +#spec = importlib.util.spec_from_file_location(module_name, file_path) +#module = importlib.util.module_from_spec(spec) +#sys.modules[module_name] = module +#spec.loader.exec_module(module) +# +#CH4 = module.coll['CH4'] +#Molecule = CH4.meta +# +# +# +## Load entities and instantiate a molecule +#dlite.storage_path.append(f'{exdir}/entities/*.json') +#Molecule = dlite.get_instance('http://onto-ns.com/meta/0.1/Molecule') +#Substance = dlite.get_instance('http://onto-ns.com/meta/0.1/Substance') +# +#inst = Molecule(dims={'natoms': 3, 'ncoords': 3}) +#inst.name = '' +# +# +## Create triplestore using the rdflib backend +#ts = Triplestore('rdflib') +# +## Define some prefixed namespaces +#CHEM = ts.bind('chem', 'http://onto-ns.com/onto/chemistry#') +# +## Add mappings +#ts.add_mapsTo(CHEM.Identifier, Molecule, 'name') +#ts.add_mapsTo(CHEM.GroundStateEnergy, Molecule, 'groundstate_energy') +#ts.add_mapsTo(CHEM.Identifier, Substance, 'id') +#ts.add_mapsTo(CHEM.GroundStateEnergy, Substance, 'molecule_energy') +# +# +# +# +#mappings = [ +# ('http://onto-ns.com/meta/0.1/Molecule#name', ':mapsTo', +# 'chem:Identifier'), +# ('http://onto-ns.com/meta/0.1/Molecule#groundstate_energy', ':mapsTo', +# 'chem:GroundStateEnergy'), +# ('http://onto-ns.com/meta/0.1/Substance#id', ':mapsTo', +# 'chem:Identifier'), +# ('http://onto-ns.com/meta/0.1/Substance#molecule_energy', ':mapsTo', +# 'chem:GroundStateEnergy'), +#] +# +# +#match = dm.match_factory(mappings) +#match_first = dm.match_factory(mappings, match_first=True) -match = dm.match_factory(mappings) -match_first = dm.match_factory(mappings, match_first=True) +# Check unitconvert_pint +assert dm.unitconvert('km', 34, 'm') == 0.034 +assert dm.unitconvert('s', 1, 'hour') == 3600 +# The Windows test has problems understanding the UFT-8 encoding "Å" below. +# Skip it on Windows for now... +if sys.platform != "win32": + assert dm.unitconvert("Å", 34, 'um') == 34e4 +# Test to manually set up mapping steps +v = dm.Value(3.0, 'm/s', 'emmo:Velocity', cost=1) +t = dm.Value(1.1, 's', 'emmo:Time', cost=2) +t2 = dm.Value(2.2, 's', 'emmo:Time', cost=4) +l = dm.Value(4.0, 'm', 'emmo:Length', cost=8) + +step1 = dm.MappingStep( + output_iri='emmo:Length', + steptype=dm.StepType.FUNCTION, + function=lambda v, t: v*t, + cost=lambda v, t: 2*v*t, + output_unit='m', +) +step1.add_inputs({'v': v, 't': t}) +step1.add_inputs({'v': v, 't': t2}) + +step2 = dm.MappingStep( + output_iri=':Length', + steptype=dm.StepType.MAPSTO, + cost=2, + output_unit='m', +) +step2.add_inputs({'l': step1}) + + +step3 = dm.MappingStep( + output_iri=':ReducedLength', + steptype=dm.StepType.FUNCTION, + function=lambda l: 0.7*l, + cost=10, + output_unit='m', +) +step3.add_inputs({'l': step1}) +step3.add_inputs({'l': step2}) +step3.add_inputs({'l': l}) + + +def isclose(a, b, rtol=1e-3): + """Returns true if the relative difference between `a` and `b` is less + than `rtol`.""" + return True if abs((b - a)/b) <= rtol else False + + +assert step1.number_of_routes() == 2 +assert step2.number_of_routes() == 2 +assert step3.number_of_routes() == 5 + +assert isclose(dm.Quantity(3*1.1, 'm'), step1.eval(0)) +assert isclose(dm.Quantity(3*2.2, 'm'), step1.eval(1)) +assert isclose(dm.Quantity(0.7*3*1.1, 'm'), step3.eval(0)) +assert isclose(dm.Quantity(0.7*3*2.2, 'm'), step3.eval(1)) +assert isclose(dm.Quantity(0.7*3*1.1, 'm'), step3.eval(2)) +assert isclose(dm.Quantity(0.7*3*2.2, 'm'), step3.eval(3)) +assert isclose(dm.Quantity(0.7*4.0, 'm'), step3.eval(4)) +assert isclose(dm.Quantity(0.7*4.0, 'm'), step3.eval()) + +costs = step3.lowest_costs(10) +assert len(costs) == 5 +assert [idx for cost, idx in costs] == [4, 0, 2, 1, 3] +assert isclose(18.0, costs[0][0]) +assert isclose(19.6, costs[1][0]) +assert isclose(21.6, costs[2][0]) +assert isclose(28.2, costs[3][0]) +assert isclose(30.2, costs[4][0]) + + + +#routes = dm.mapping_route( +# target='http://onto-ns.com/meta/0.1/Substance#molecule_energy', +# sources=['http://onto-ns.com/meta/0.1/Molecule#groundstate_energy'], +# triples=mappings) + + +# --------------------------------------- +r = np.array([10, 20, 30, 40, 50, 60]) # particle radius [nm] +n = np.array([1, 3, 7, 6, 2, 1]) # particle number density [1e21 #/m^3] + + +rv = dm.Value(r, 'nm', 'inst1') +nv = dm.Value(n, '1/m^3', 'inst2') + + +def average_radius(r, n): + return np.sum(r * n) / np.sum(n) + + +mapsTo = 'http://emmo.info/domain-mappings#mapsTo' +instanceOf = 'http://emmo.info/datamodel#instanceOf' +subClassOf = 'http://www.w3.org/2000/01/rdf-schema#subClassOf' +#description = 'http://purl.org/dc/terms/description' +label = 'http://www.w3.org/2000/01/rdf-schema#label' +hasUnit = 'http://emmo.info/datamodel#hasUnit' +hasCost = ':hasCost' +RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' +type = RDF + 'type' +next = RDF + 'next' +first = RDF + 'first' +rest = RDF + 'rest' +nil = RDF + 'nil' +expects = 'https://w3id.org/function/ontology#expects' +returns = 'https://w3id.org/function/ontology#returns' + + +triples = [ + # Mappings for data models + ('inst1', mapsTo, 'mo:ParticleRadius'), + ('inst2', mapsTo, 'mo:NumberDensity'), + ('inst3', mapsTo, 'mo:AverageParticleRadius'), + + ('inst1', hasUnit, 'um'), + ('inst2', hasUnit, '1/m**3'), + ('inst3', hasUnit, 'um'), + + # Mappings for the function + (':r', mapsTo, 'mo:ParticleRadius'), + (':n', mapsTo, 'mo:NumberDensity'), + (':ravg', mapsTo, 'mo:AverageParticleRadius'), + + ('average_radius_function', type, 'fno:Function'), + ('average_radius_function', expects, 'parameter_list'), + ('average_radius_function', returns, 'output_list'), + ('parameter_list', type, 'rdf:List'), + ('parameter_list', first, ':r'), + ('parameter_list', rest, 'lst2'), + ('lst2', type, 'rdf:List'), + ('lst2', first, ':n'), + ('lst2', rest, nil), + (':r', type, 'fno:Parameter'), + (':r', label, 'r'), + #(':r', hasUnit, 'um'), + (':n', type, 'fno:Parameter'), + (':n', label, 'n'), + #(':n', hasUnit, '1/m**3'), + ('output_list', type, 'rdf:List'), + ('output_list', first, ':ravg'), + ('output_list', rest, nil), + (':ravg', type, 'fno:Output'), + #(':ravg', hasUnit, 'm'), +] + +ts2 = Triplestore('rdflib') +ts2.add_triples(triples) + + +# Check fno_mapper +d = dm.fno_mapper(ts2) +assert d[':ravg'] == [('average_radius_function', [':r', ':n'])] -# Check unitconvert_pint -assert dm.unitconvert_pint("km", 34, 'm') == 0.034 -assert dm.unitconvert_pint("Å", 34, 'um') == 34e4 -assert dm.unitconvert_pint("s", 1, 'hour') == 3600 +step = dm.mapping_route( + target='inst3', + sources={'inst1': r, 'inst2': n}, + triplestore=ts2, + function_repo={'average_radius_function': average_radius}, +) -routes = dm.mapping_route( - target='http://onto-ns.com/meta/0.1/Substance#molecule_energy', - sources=['http://onto-ns.com/meta/0.1/Molecule#groundstate_energy'], - triples=mappings) +print(step.show()) +print(step.eval()) +assert step.eval(unit='m') == 34e-6 diff --git a/bindings/python/tests/test_utils.py b/bindings/python/tests/test_utils.py index 105721b30..e7dca122c 100644 --- a/bindings/python/tests/test_utils.py +++ b/bindings/python/tests/test_utils.py @@ -5,7 +5,8 @@ import dlite from dlite.utils import ( - instance_from_dict, to_metadata, HAVE_DATACLASSES, HAVE_PYDANTIC + instance_from_dict, to_metadata, infer_dimensions, + HAVE_DATACLASSES, HAVE_PYDANTIC ) @@ -190,3 +191,27 @@ Atoms2 = to_metadata(AtomsEntity2) assert Atoms2.is_meta assert Atoms2.meta.uri == dlite.ENTITY_SCHEMA + + +# Test infer_dimensions() +# TODO - test also exceptions +dims = infer_dimensions( + meta=inst.meta, + values={'a-string-array': [('a', 'b'), ('c', 'd'), ('e', 'f')]}, +) +assert dims == dict(N=3, M=2) + +dims = infer_dimensions( + meta=inst.meta, + values={'a-string-array': [('a', 'b'), ('c', 'd'), ('e', 'f')], + 'a-fixstring-array': [ + ('a', 'b', 'c'), ('a', 'b', 'c'), ('a', 'b', 'c')]}, +) +assert dims == dict(N=3, M=2) + +dims = infer_dimensions( + meta=inst.meta, + values={'an-int-array': [1, 2, 3, 4], + 'a-fixstring-array': [('Al', 'Mg'), ('Si', 'Cu')]}, +) +assert dims == dict(N=2, M=4) diff --git a/bindings/python/triplestore/triplestore.py b/bindings/python/triplestore/triplestore.py index 26cff8f6b..581a2b1a0 100644 --- a/bindings/python/triplestore/triplestore.py +++ b/bindings/python/triplestore/triplestore.py @@ -141,7 +141,7 @@ def __getitem__(self, key): return self.uri + key def __repr__(self): - return f"Namespace({self.iri})" + return f"Namespace({self.uri})" def __str__(self): return self.uri @@ -245,7 +245,9 @@ def to_python(self): XSD.anyURI, XSD.language, XSD.Name, XSD.NMName, XSD.normalizedString, XSD.string, XSD.token, XSD.NMTOKEN, ): - warnings.warn(f"unknown datatype: {self.datatype} - assuming string") + warnings.warn( + f"unknown datatype: {self.datatype} - assuming string" + ) return v def n3(self): @@ -293,7 +295,8 @@ def __init__(self, name: str, base_iri: str = None, **kwargs): name: Module name for backend. base_iri: Base IRI used by the add_function() method when adding new triples. - kwargs: Keyword arguments passed to the backend's __init__() method. + kwargs: Keyword arguments passed to the backend's __init__() + method. """ module = import_module(name if "." in name else "dlite.triplestore.backends." + name) @@ -302,6 +305,9 @@ def __init__(self, name: str, base_iri: str = None, **kwargs): self.namespaces = {} self.backend_name = name self.backend = cls(**kwargs) + # Keep functions in the triplestore for convienence even though + # they usually do not belong to the triplestore per se. + self.function_repo = {} for prefix, ns in self.default_namespaces.items(): self.bind(prefix, ns) @@ -394,8 +400,8 @@ def _check_method(self, name): """Check that backend implements the given method.""" if not hasattr(self.backend, name): raise NotImplementedError( - f"Triplestore backend \"{self.backend_name}\" doesn't implement " - f"a \"{name}()\" method.") + f'Triplestore backend "{self.backend_name}" do not ' + f'implement a "{name}()" method.') def add(self, triple: "Triple"): """Add `triple` to triplestore.""" @@ -545,9 +551,14 @@ def add_function(self, base_iri: standard: Name of ontology to use when describing the function. Defaults to the Function Ontology (FnO). + + Returns: + func_iri: IRI of the added function. """ method = getattr(self, f"_add_function_{standard}") - return method(func, expects, returns, base_iri) + func_iri = method(func, expects, returns, base_iri) + self.function_repo[func_iri] = func + return func_iri def _add_function_fno(self, func, expects, returns, base_iri): """Implementing add_function() for FnO.""" @@ -598,6 +609,8 @@ def _add_function_fno(self, func, expects, returns, base_iri): self.add((lst, RDF.rest, lst_next)) lst = lst_next + return func_iri + def infer_iri(obj): """Return IRI of the individual that stands for object `obj`.""" diff --git a/bindings/python/utils.py b/bindings/python/utils.py index 487517317..01a0bc52c 100644 --- a/bindings/python/utils.py +++ b/bindings/python/utils.py @@ -20,9 +20,19 @@ else: HAVE_PYDANTIC = True +import numpy as np + import dlite +class CannotInferDimensionError(dlite.DLiteError): + """Cannot infer instance dimensions.""" + + +class InvalidNumberOfDimensionsError(dlite.DLiteError): + """Invalid number of instance dimensions.""" + + def instance_from_dict(d, id=None, single=None, check_storages=True): """Returns a new DLite instance created from dict. @@ -215,4 +225,59 @@ class EntitySchema(BaseModel): def get_package_paths(): + """Returns a dict with all the DLite builtin path variables.""" return {k: v for k, v in dlite.__dict__.items() if k.endswith('_path')} + + +def infer_dimensions(meta, values, strict=True): + """Infer the dimensions if we should create an instance of `meta` with + the given `values`. + + Arguments: + meta: URI or metadata object. + values: Dict mapping property names to values. Not all property + names needs to be mapped. + strict: Whether to require that all keys in `values` correspond + to a property name in `meta`. + + Returns: + Dict mapping dimension names to dimension values. + + Raises: + InvalidNumberOfDimensionsError: Inconsistent number of dimensions. + CannotInferDimensionError: Cannot infer instance dimensions. + """ + if isinstance(meta, str): + meta = dlite.get_instance(meta) + + if strict: + propnames = {propname for propname in values.keys()} + extra_props = propnames.difference( + {prop.name for prop in meta['properties']}) + if extra_props: + raise CannotInferDimensionError( + f'invalid property names in `values`: {extra_props}') + + dims = {} + for prop in meta['properties']: + if prop.name in values and prop.ndims: + v = np.array(values[prop.name]) + if len(v.shape) != prop.ndims: + raise InvalidNumberOfDimensionsError( + f'property {prop.name} has {prop.ndims} dimensions, but ' + f'{len(v.shape)} was provided') + for i, dimname in enumerate(prop.dims): + if dimname in dims and v.shape[i] != dims[dimname]: + raise CannotInferDimensionError( + f'inconsistent assignment of dimension "{dimname}" ' + f'when checking property "{prop.name}"') + dims[dimname] = v.shape[i] + + dimnames = {d.name for d in meta['dimensions']} + if len(dims) != len(meta['dimensions']): + missing_dims = dimnames.difference(dims.keys()) + raise CannotInferDimensionError( + f'insufficient number of properties provided to infer dimensions: ' + f'{missing_dims}') + + return dims diff --git a/python/setup.py b/python/setup.py index f18ac35ed..e6d9ce39a 100644 --- a/python/setup.py +++ b/python/setup.py @@ -163,6 +163,7 @@ def build_extension(self, ext: CMakeExtension) -> None: "pandas", "pymongo", "rdflib", + "pint", ] requirements = ["numpy"] diff --git a/requirements.txt b/requirements.txt index 5e9eb2bc2..0424a16a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ psycopg2-binary pandas pymongo rdflib +pint