Skip to content

Commit

Permalink
possibility to add object to an existing epc instance (#10)
Browse files Browse the repository at this point in the history
* possibility to add object to an existing epc instance

* bugfix for vertical Axis

* allow python 3.9
  • Loading branch information
valentin-gauthier-geosiris authored Jan 29, 2025
1 parent 5e6be2f commit 0e45d74
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 29 deletions.
16 changes: 12 additions & 4 deletions energyml-utils/example/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dataclasses import fields

from energyml.eml.v2_3.commonv2 import *
from energyml.eml.v2_3.commonv2 import AbstractObject
from energyml.resqml.v2_0_1.resqmlv2 import DoubleHdf5Array
from energyml.resqml.v2_2.resqmlv2 import (
TriangulatedSetRepresentation,
Expand All @@ -14,8 +15,8 @@
AbstractColorMap,
)

from src.energyml.utils.data.hdf import *
from src.energyml.utils.data.helper import get_projected_uom
# from src.energyml.utils.data.hdf import *
from src.energyml.utils.data.helper import get_projected_uom, is_z_reversed
from src.energyml.utils.epc import *
from src.energyml.utils.introspection import *
from src.energyml.utils.manager import *
Expand All @@ -27,7 +28,7 @@
correct_dor,
)
from src.energyml.utils.xml import *
from utils.data.datasets_io import HDF5FileReader
from src.energyml.utils.data.datasets_io import HDF5FileReader

fi_cit = Citation(
title="An interpretation",
Expand Down Expand Up @@ -375,6 +376,12 @@ def test_local_depth_crs():
print(e)


def test_crs():
from energyml.eml.v2_3.commonv2 import LocalEngineeringCompoundCrs
crs = random_value_from_class(LocalEngineeringCompoundCrs)
print(is_z_reversed(crs))


def test_get_projected_uom():
# Fails because the xsi:type="VerticalCrsEpsgCode" doesn't
# contain the namespace : xsi:type="eml:VerticalCrsEpsgCode"
Expand Down Expand Up @@ -468,4 +475,5 @@ def class_field():
# test_obj_attribs()
# test_copy_values()
# class_field()
test_get_projected_uom()
# test_get_projected_uom()
test_crs()
7 changes: 6 additions & 1 deletion energyml-utils/example/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def csv_to_dataset():
parser = argparse.ArgumentParser()
parser.add_argument("--csv", "-f", type=str, help="Csv file path")
parser.add_argument("--output", "-o", type=str, help="Output file path")
parser.add_argument("--prefix", "-p", type=str, default="", help="Output file path")
parser.add_argument("--prefix", "-p", type=str, default="", help="Dataset path prefix")
parser.add_argument("--csv-delimiter", "-d", type=str, default=",", help="CSV delimiter")
parser.add_argument(
"--mapping",
Expand All @@ -227,10 +227,15 @@ def csv_to_dataset():

args = parser.parse_args()

print(args.csv_delimiter)
print(args.mapping_line)

mapping = args.mapping_line or args.mapping
if mapping is not None:
mapping = json.loads(mapping)

print(mapping)

output_file_path = args.output
if output_file_path.lower().endswith(".parquet") or output_file_path.lower().endswith(".pqt"):
csv_to_parquet(
Expand Down
4 changes: 2 additions & 2 deletions energyml-utils/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "poetry_dynamic_versioning.backend"

[tool.poetry]
name = "energyml-utils"
version = "1.0.1.dev4" # Set at build time
version = "0.0.0" # Set at build time
description = "Energyml helper"
authors = [
"Valentin Gauthier <valentin.gauthier@geosiris.com>"
Expand Down Expand Up @@ -50,7 +50,7 @@ parquet = ["pyarrow", "numpy", "pandas"]
hdf5 = ["h5py"]

[tool.poetry.dependencies]
python = "^3.10"
python = "^3.9"
xsdata = {version = "^24.0", extras = ["cli", "lxml"]}
energyml-opc = "^1.12.0"
h5py = { version = "^3.7.0", optional = false }
Expand Down
38 changes: 19 additions & 19 deletions energyml-utils/src/energyml/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@

RGX_XML_HEADER = r"^\s*<\?xml(\s+(encoding\s*=\s*\"(?P<encoding>[^\"]+)\"|version\s*=\s*\"(?P<version>[^\"]+)\"|standalone\s*=\s*\"(?P<standalone>[^\"]+)\"))+" # pylint: disable=C0301

RGX_IDENTIFIER = f"{RGX_UUID}(.(?P<version>\w+)?)?"
RGX_IDENTIFIER = rf"{RGX_UUID}(.(?P<version>\w+)?)?"


# __ ______ ____
Expand Down Expand Up @@ -225,23 +225,21 @@ class EPCRelsRelationshipType(Enum):
EXTENDED_CORE_PROPERTIES = "extended-core-properties"

def get_type(self) -> str:
match self:
case EPCRelsRelationshipType.EXTENDED_CORE_PROPERTIES:
return "http://schemas.f2i-consulting.com/package/2014/relationships/" + str(self.value)
case EPCRelsRelationshipType.CORE_PROPERTIES:
return "http://schemas.openxmlformats.org/package/2006/relationships/metadata/" + str(self.value)
case (
EPCRelsRelationshipType.CHUNKED_PART
| EPCRelsRelationshipType.DESTINATION_OBJECT
| EPCRelsRelationshipType.SOURCE_OBJECT
| EPCRelsRelationshipType.ML_TO_EXTERNAL_PART_PROXY
| EPCRelsRelationshipType.EXTERNAL_PART_PROXY_TO_ML
| EPCRelsRelationshipType.EXTERNAL_RESOURCE
| EPCRelsRelationshipType.DestinationMedia
| EPCRelsRelationshipType.SOURCE_MEDIA
| _
):
return "http://schemas.energistics.org/package/2012/relationships/" + str(self.value)
if self == EPCRelsRelationshipType.EXTENDED_CORE_PROPERTIES:
return "http://schemas.f2i-consulting.com/package/2014/relationships/" + str(self.value)
elif EPCRelsRelationshipType.CORE_PROPERTIES:
return "http://schemas.openxmlformats.org/package/2006/relationships/metadata/" + str(self.value)
# elif (
# self == EPCRelsRelationshipType.CHUNKED_PART
# or self == EPCRelsRelationshipType.DESTINATION_OBJECT
# or self == EPCRelsRelationshipType.SOURCE_OBJECT
# or self == EPCRelsRelationshipType.ML_TO_EXTERNAL_PART_PROXY
# or self == EPCRelsRelationshipType.EXTERNAL_PART_PROXY_TO_ML
# or self == EPCRelsRelationshipType.EXTERNAL_RESOURCE
# or self == EPCRelsRelationshipType.DestinationMedia
# or self == EPCRelsRelationshipType.SOURCE_MEDIA
# ):
return "http://schemas.energistics.org/package/2012/relationships/" + str(self.value)


@dataclass
Expand Down Expand Up @@ -306,6 +304,8 @@ def parse_content_or_qualified_type(cqt: str) -> Optional[re.Match[str]]:
try:
parsed = parse_content_type(cqt)
except:
pass
if parsed is None:
try:
parsed = parse_qualified_type(cqt)
except:
Expand Down Expand Up @@ -335,7 +335,7 @@ def get_domain_version_from_content_or_qualified_type(cqt: str) -> Optional[str]

def split_identifier(identifier: str) -> Tuple[str, Optional[str]]:
match = re.match(RGX_IDENTIFIER, identifier)
return match.group(URI_RGX_GRP_UUID), match.group(URI_RGX_GRP_VERSION),
return (match.group(URI_RGX_GRP_UUID), match.group(URI_RGX_GRP_VERSION), )


def now(time_zone=datetime.timezone.utc) -> float:
Expand Down
6 changes: 5 additions & 1 deletion energyml-utils/src/energyml/utils/data/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,11 @@ def is_z_reversed(crs: Optional[Any]) -> bool:
# resqml >= 22
vert_axis = search_attribute_matching_name(crs, "VerticalAxis.Direction")
if len(vert_axis) > 0:
reverse_z_values = vert_axis[0].lower() == "down"
vert_axis_str = str(vert_axis[0])
if "." in vert_axis_str:
vert_axis_str = vert_axis_str.split(".")[-1]

reverse_z_values = vert_axis_str.lower() == "down"

return reverse_z_values

Expand Down
70 changes: 69 additions & 1 deletion energyml-utils/src/energyml/utils/epc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import datetime
import json
import logging
import os
import random
import re
import traceback
import zipfile
Expand Down Expand Up @@ -43,6 +45,7 @@
from .data.datasets_io import (
read_external_dataset_array,
)
from .exception import UnparsableFile
from .introspection import (
get_class_from_content_type,
get_obj_type,
Expand All @@ -69,7 +72,7 @@
serialize_xml,
read_energyml_xml_str,
read_energyml_xml_bytes,
read_energyml_json_str,
read_energyml_json_str, read_energyml_json_bytes, JSON_VERSION,
)
from .workspace import EnergymlWorkspace
from .xml import is_energyml_content_type
Expand Down Expand Up @@ -130,6 +133,71 @@ def __str__(self):
# + f"\n{[serialize_json(ar) for ar in self.additional_rels]}"
)

def add_file(self, obj: Union[List, bytes, BytesIO, str, RawFile]):
"""
Add one ore multiple files to the epc file.
For non energyml file, it is better to use the RawFile class.
The input can be a single file content, file path, or a list of them
:param obj:
:return:
"""
if isinstance(obj, list):
for o in obj:
self.add_file(o)
elif isinstance(obj, bytes) or isinstance(obj, BytesIO):
try:
xml_obj = read_energyml_xml_bytes(obj)
self.energyml_objects.append(xml_obj)
except:
try:
if isinstance(obj, BytesIO):
obj.seek(0)
json_obj = read_energyml_json_bytes(obj, json_version=JSON_VERSION.OSDU_OFFICIAL)
self.add_file(json_obj)
except:
# if isinstance(obj, BytesIO):
# obj.seek(0)
# self.add_file(RawFile(path=f"pleaseRenameThisFile_{str(random.random())}", content=obj))
raise UnparsableFile()
elif isinstance(obj, RawFile):
self.raw_files.append(obj)
elif isinstance(obj, str):
# Can be a path or a content
if os.path.exists(obj):
with open(obj, "rb") as f:
file_content = f.read()
f_name = os.path.basename(obj)
_, f_ext = os.path.splitext(f_name)
if f_ext.lower().endswith(".xml") or f_ext.lower().endswith(".json"):
try:
self.add_file(file_content)
except UnparsableFile:
self.add_file(RawFile(f_name, BytesIO(file_content)))
elif not f_ext.lower().endswith(".rels"):
self.add_file(RawFile(f_name, BytesIO(file_content)))
else:
logging.error(f"Not supported file extension {f_name}")
else:
try:
xml_obj = read_energyml_xml_str(obj)
self.energyml_objects.append(xml_obj)
except:
try:
if isinstance(obj, BytesIO):
obj.seek(0)
json_obj = read_energyml_json_str(obj, json_version=JSON_VERSION.OSDU_OFFICIAL)
self.add_file(json_obj)
except:
if isinstance(obj, BytesIO):
obj.seek(0)
self.add_file(RawFile(path=f"pleaseRenameThisFile_{str(random.random())}.txt", content=obj))
elif str(type(obj).__module__).startswith("energyml."):
# We should test "energyml.(resqml|witsml|prodml|eml|common)" but I didn't to avoid issues if
# another specific package comes in the future
self.energyml_objects.append(obj)
else:
logging.error(f"unsupported type {str(type(obj))}")

# EXPORT functions

def gen_opc_content_type(self) -> Types:
Expand Down
5 changes: 5 additions & 0 deletions energyml-utils/src/energyml/utils/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,8 @@ def __init__(self, qt: Optional[str] = None):
class NotParsableType(Exception):
def __init__(self, t: Optional[str] = None):
super().__init__(f"type: {t}")


class UnparsableFile(Exception):
def __init__(self, t: Optional[str] = None):
super().__init__(f"File is not parsable for an EPC file. Please use RawFile class for non energyml files.")
31 changes: 30 additions & 1 deletion energyml-utils/src/energyml/utils/introspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,13 +1105,42 @@ def get_qualified_type_from_class(cls: Union[type, Any], print_dev_version=True)
)


def get_object_uri(obj: any, dataspace: Optional[str] = None) -> Uri:
def get_object_uri(obj: any, dataspace: Optional[str] = None) -> Optional[Uri]:
""" Returns an ETP URI """
return parse_uri(
f"eml:///dataspace('{dataspace or ''}')/{get_qualified_type_from_class(obj)}({get_obj_uuid(obj)})"
)


def dor_to_uris(dor: Any, dataspace: Optional[str] = None) -> Optional[Uri]:
"""
Transform a DOR into an etp uri
"""
result = None
try:
value = get_object_attribute_no_verif(dor, "qualified_type")
result = parse_qualified_type(value)
except Exception as e:
print(e)
try:
value = get_object_attribute_no_verif(dor, "content_type")
result = parse_content_type(value)
except Exception as e2:
print(e2)

if result is None:
return None

return Uri(
dataspace=dataspace,
domain=result.group("domain"),
domain_version=result.group("domainVersion"),
object_type=result.group("type"),
uuid=dor.uuid,
version=dor.object_version,
)


def get_content_type_from_class(cls: Union[type, Any], print_dev_version=True, nb_max_version_digits=2):
if not isinstance(cls, type):
cls = type(cls)
Expand Down
3 changes: 3 additions & 0 deletions energyml-utils/src/energyml/utils/uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def is_object_uri(self):
and self.uuid is not None
)

def get_qualified_type(self):
return f"{self.domain}{self.domain_version}.{self.object_type}"

def __str__(self):
res = "eml:///"
if self.dataspace is not None and len(self.dataspace) > 0:
Expand Down

0 comments on commit 0e45d74

Please sign in to comment.