From c87e2f5e6029aae69a1826935146881b4e726972 Mon Sep 17 00:00:00 2001 From: imcovangent <I.vanGent@tudelft.nl> Date: Thu, 30 Nov 2017 23:40:57 +0100 Subject: [PATCH] Development for knowledge base deprication. Former-commit-id: 6d49b99ef34aeeaaf1f8b44229366d1a2b55cce8 --- kadmos/cmdows/cmdows.py | 131 ++++++++++++++++++++++++++++++++--- kadmos/graph/graph_kadmos.py | 96 +++++++++++++++++++++---- kadmos/utilities/strings.py | 14 ++++ 3 files changed, 220 insertions(+), 21 deletions(-) create mode 100644 kadmos/utilities/strings.py diff --git a/kadmos/cmdows/cmdows.py b/kadmos/cmdows/cmdows.py index 2b9cd664c..723756081 100644 --- a/kadmos/cmdows/cmdows.py +++ b/kadmos/cmdows/cmdows.py @@ -4,11 +4,11 @@ import os import copy from lxml import etree from lxml.etree import ElementTree -from collections import Counter - +from collections import Counter, OrderedDict # Settings for the logger -from kadmos.utilities.xml import get_uid_search_xpath +from kadmos.utilities.strings import find_between, find_until +from kadmos.utilities.xml import get_uid_search_xpath, Element logger = logging.getLogger(__name__) @@ -119,6 +119,84 @@ class CMDOWS(object): logger.warning('The following uIDs do not exist although they are referred to: ' + ', '.join(invalids)) return result + def add_contact(self, name, email, uid, company=None, department=None, function=None, address=None, telephone=None, country=None, roles=None): + """Method to add a contact element to the organization branch.""" + + # Assert that there is a path to the contacts element or add one + parent_element = self.ensure_abs_xpath('/cmdows/header/organization/contacts') + + # Assert that there is no existing element with the uid + try: + self.get_element_of_uid(uid) + raise AssertionError('UID {} is already used in the CMDOWS file at {}'.format(uid, self.get_xpath_of_uid(uid))) + except: + pass + + # Add the contact details + contact_element = Element('contact', uID=uid) + contact_element._add_element('name', name) + contact_element._add_element('email', email) + contact_element._add_element('company', company) if company else None + contact_element._add_element('department', department) if department else None + contact_element._add_element('function', function) if function else None + contact_element._add_element('address', address) if address else None + contact_element._add_element('telephone', telephone) if telephone else None + contact_element._add_element('country', country) if country else None + parent_element.append(contact_element) + + if roles: + if isinstance(roles, list): + [self.add_actor(uid, role) for role in roles] + elif isinstance(roles, basestring): + self.add_actor(uid, roles) + else: + raise IOError('Invalid type for roles provided: {}.'.format(type(roles))) + return + + def add_actor(self, contact_uid, role): + """Method to add a role element to the organization branch.""" + + # Input assertions + self.get_element_of_uid(contact_uid) + assert isinstance(role, basestring), 'Role should be of type string.' + assert role in ['architect', 'integrator', 'collaborativeEngineer', 'toolSpecialist', 'customer'], 'Role {} does not exist.'.format(role) + + # Assert that there is a path to the roles element or add one + parent_element = self.ensure_abs_xpath('/cmdows/header/organization/organigram/' + role + 's') + + # Add the role + contact_element = Element(role) + contact_element._add_element('contactUID', contact_uid) + parent_element.append(contact_element) + return + + def ensure_abs_xpath(self, xpath): + """Method to ensure that the elements given by an absolute XPath exist.""" + split_path = xpath.split('/') + assert not split_path[0] and split_path[-1] and len(split_path) > 1, \ + 'Invalid XPath ({}) provided. XPath should start with / sign and have at least one entry.'.format(xpath) + for idx, eltag in enumerate(split_path[1:]): + local_xpath = '/'.join(split_path[0:idx + 2]) + el = self.root.xpath(local_xpath) + assert len(el) <= 1, 'Non-unique XPath {} provided.'.format(local_xpath) + if not el: + previous_xpath = '/'.join(split_path[0:idx + 1]) + el_pr = self.root.xpath(previous_xpath) + self.add_subelement(el_pr[0], eltag) + return self.root.xpath(local_xpath)[0] + + def add_subelement(self, element, subelement_xpath_tag): + if "@uID" in subelement_xpath_tag: + el_tag = find_until(subelement_xpath_tag, '[') + uid_attr = find_between(subelement_xpath_tag, '[@uID="', '"]') + element.append(Element(el_tag, uID=uid_attr)) + elif "[" in subelement_xpath_tag: + el_tag = find_until(subelement_xpath_tag, '[') + element.append(Element(el_tag)) + else: + element.append(Element(subelement_xpath_tag)) + return + def resolve_uids(self): """Method to rename duplicate UIDs in a CMDOWS file""" logger.warning('The resolve_uids method is a hack and should not be used.') @@ -200,16 +278,53 @@ class CMDOWS(object): if el: el[0].getparent().remove(el[0]) + def remove_element_based_on_uid(self, uid, expected_tag=None): + """Method to remove an element based on its UID.""" + el = self.get_element_of_uid(uid) + if expected_tag: + assert el.tag == expected_tag, 'Element should have tag {}, but has tag: {}.'.format(expected_tag, el.tag) + el.getparent().remove(el) + + def remove_element_based_on_xpath(self, xpath, expected_amount=None, expected_text=None, higher_level_removal=None): + """Method to remove an element based on its XPath.""" + els = self.root.xpath(xpath) + if expected_amount: + assert len(els) == expected_amount, '{} element(s) expected, found {}.'.format(expected_amount, len(els)) + for el in els: + remove = True if (el.text == expected_text or expected_text is None) else False + if remove: + el_upper = el.getparent() + if higher_level_removal: + for i in [0]*higher_level_removal: + el = el_upper + el_upper = el.getparent() + el_upper.remove(el) + + def remove_contact(self, contact_uid): + """Method to remove a contact based on its UID.""" + self.remove_element_based_on_uid(contact_uid, expected_tag='contact') + def remove_parameter(self, param_uid): """Method to remove a parameter based on its UID.""" - el = self.get_element_of_uid(param_uid) - el.getparent().remove(el) + self.remove_element_based_on_uid(param_uid, expected_tag='parameter') def remove_parameters(self, params_uids): """Method to remove a list of parameters based on their UID.""" - for param in params_uids: - el = self.get_element_of_uid(param) - el.getparent().remove(el) + for param_uid in params_uids: + self.remove_element_based_on_uid(param_uid, expected_tag='parameter') + + def remove_actor(self, role, uid): + # Input assertions + assert isinstance(role, basestring), 'Role should be of type string.' + assert role in ['architect', 'integrator', 'collaborativeEngineer', 'toolSpecialist', + 'customer'], 'Role {} does not exist.'.format(role) + # Remove actor if found + self.remove_element_based_on_xpath('/cmdows/header/organization/organigram/' + role + 's/' + role + '/contactUID', expected_text=uid, higher_level_removal=1) + + # Remove parent element if empty + els = self.root.xpath('/cmdows/header/organization/organigram/' + role + 's/' + role) + if not els: + self.remove_element_based_on_xpath('/cmdows/header/organization/organigram/' + role + 's', expected_amount=1) def get_element_of_uid(self, uid): """Method to get the element based on a UID value.""" diff --git a/kadmos/graph/graph_kadmos.py b/kadmos/graph/graph_kadmos.py index 0aae34a28..af6d7ccf8 100644 --- a/kadmos/graph/graph_kadmos.py +++ b/kadmos/graph/graph_kadmos.py @@ -927,7 +927,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin): # LOAD METHODS # # ---------------------------------------------------------------------------------------------------------------- # - def load_cmdows(self, cmdows): + def load_cmdows(self, cmdows, io_xsd_check): # Create organization node self._load_cmdows_header(cmdows) @@ -936,7 +936,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin): self._load_cmdows_parameters(cmdows) # Create function nodes - self._load_cmdows_executables(cmdows) + self._load_cmdows_executables(cmdows, io_xsd_check) # Create problem formulation dict if hasattr(self, '_load_cmdows_problem_def'): @@ -957,12 +957,12 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin): return - def _load_cmdows_executables(self, cmdows): + def _load_cmdows_executables(self, cmdows, io_xsd_check): - self._load_cmdows_competences(cmdows) + self._load_cmdows_competences(cmdows, io_xsd_check) self._load_cmdows_equations(cmdows) - def _load_cmdows_competences(self, cmdows): + def _load_cmdows_competences(self, cmdows, io_xsd_check): for function in cmdows.findall('executableBlocks/designCompetences/designCompetence'): self.add_node(function.get('uID'), @@ -986,7 +986,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin): # Determine assumed input file location (same folder as CMDOWS file) input_file_path = os.path.join(os.path.split(function.base)[0], function.findtext('ID') + '-input.xml') if os.path.isfile(input_file_path): - inputs = _read_io_xml_file(input_file_path, function.findtext('modeID')) + inputs = _read_io_xml_file(input_file_path, function.findtext('modeID'), io_xsd_check) for input in inputs['leafNodes']: # Add new parameter if it does not exist yet if not self.has_node(input['xpath']): @@ -1006,7 +1006,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin): # Determine assumed output file location (same folder as CMDOWS file) output_file_path = os.path.join(os.path.split(function.base)[0], function.findtext('ID') + '-output.xml') if os.path.isfile(output_file_path): - outputs = _read_io_xml_file(output_file_path, function.findtext('modeID')) + outputs = _read_io_xml_file(output_file_path, function.findtext('modeID'), io_xsd_check) for output in outputs['leafNodes']: # Add new parameter if it does not exist yet if not self.has_node(output['xpath']): @@ -3734,7 +3734,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin): # noinspection PyUnboundLocalVariable -def load(file_name, file_type=None, file_check_critical=True, source_folder=None): +def load(file_name, file_type=None, file_check_critical=True, source_folder=None, io_xsd_check=False): """Function to load a KadmosGraph from a file. Different input file types are implemented. They are listed in the following. @@ -3751,6 +3751,9 @@ def load(file_name, file_type=None, file_check_critical=True, source_folder=None :type file_check_critical: bool :param source_folder: source folder which contains the input file :type source_folder: str + :param io_xsd_check: boolean to check XML input/output files against a schema file (XSD) in case of CMDOWS file in + combination with in- and output XML files. + :type source_folder: bool :return: graph loaded from file :rtype: KadmosGraph @@ -3780,7 +3783,7 @@ def load(file_name, file_type=None, file_check_critical=True, source_folder=None # TODO: Load also header info and/or metadata of files logger.info('Loading the ' + file_type.upper() + ' file ' + file_path + '...') if file_type == 'cmdows': - graph, mpg = _load_cmdows(file_path) + graph, mpg = _load_cmdows(file_path, io_xsd_check) elif file_type == 'kdms': graph = _load_kdms(file_path) elif file_type == 'graphml': @@ -3848,7 +3851,7 @@ def _load_graphml(file_path): # noinspection PyUnboundLocalVariable -def _load_cmdows(file_path): +def _load_cmdows(file_path, io_xsd_check): from graph_data import RepositoryConnectivityGraph, FundamentalProblemGraph, MdaoDataGraph @@ -3878,7 +3881,7 @@ def _load_cmdows(file_path): IOError('The CMDOWS file ' + file_path + ' is missing basic elements and cannot be loaded.') # Load the graph (and MPG in case this one is provided) - mpg = graph.load_cmdows(cmdows) + mpg = graph.load_cmdows(cmdows, io_xsd_check) # Clean up graph try: @@ -3894,7 +3897,7 @@ def _load_cmdows(file_path): return graph, None -def _read_io_xml_file(file_path, mode): +def _read_io_xml_file(file_path, mode, xsd_check): # Check input assert os.path.isfile(file_path), 'File {} does not exist.'.format(file_path) @@ -3907,7 +3910,7 @@ def _read_io_xml_file(file_path, mode): if os.stat(file_path).st_size == 0: # check size of file return dataDict else: - tree = etree.parse(file_path) + tree = etree.parse(file_path, parser) # remove comments from tree comments = tree.xpath("//comment()") @@ -3915,12 +3918,20 @@ def _read_io_xml_file(file_path, mode): p = c.getparent() p.remove(c) + # Check file against XSD schema + if xsd_check: + schema_path = _get_data_schema(os.path.split(file_path)[0]) + _validate_file_against_schema(file_path, schema_path) + # iterate through tree and add data to dict, only touch leaf nodes leafNodesList = [] completeNodeList = [] for el in tree.iter(): data = {} path = tree.getpath(el) + if path == '/*': + raise NotImplementedError('The given XML file {} seems to contain namespaces, ' + 'this is not supported by KADMOS.'.format(os.path.split(file_path)[1])) # add uids to xpath path = _add_uids_to_xpath(path, el) @@ -4068,3 +4079,62 @@ def _check_execution_mode_for_element(element, tree, file, req_mode): else: return False + +def _get_data_schema(kb_dir): + """ + (PRIVATE) This function retrieves the data schema (.xsd) file from the KB folder and stores filename in instance. + :return: data_schema_path + """ + + schema_pattern = "(.xsd)$" + + # Determine name of XML Schema file + xsd_schema_found = False + for file_name in os.listdir(kb_dir): + if os.path.isfile(os.path.join(kb_dir, file_name)): + match = re.search(schema_pattern, file_name) + if match and not xsd_schema_found: + data_schema = file_name + xsd_schema_found = True + elif match and xsd_schema_found: + raise IOError('Multiple XML Schemas (.xsd files) found in the knowledge base ({}). ' + 'Only one .xsd file is allowed per knowledge base.'.format(os.path.split(kb_dir)[1])) + if not xsd_schema_found: + raise IOError('No XML Schemas (.xsd files) found in the knowledge base ({}). ' + 'One .xsd file is required per knowledge base.'.format(os.path.split(kb_dir)[1])) + else: + logger.info("XML Schema '{}' found.".format(data_schema)) + + return os.path.join(kb_dir, data_schema) + + +def _validate_file_against_schema(file_path, schema_path): + """ + (PRIVATE) Check the read-write XML file in the knowledge base against the XML Schema. + Argument is list/tuple of nodes to ignore in validation. Root node can not be ignored. + + :rtype: Error + """ + + # Parse the XML file + tree = etree.parse(file_path) + + # Parse the XML Schema + xmlschema_doc = etree.parse(schema_path) + xmlschema = etree.XMLSchema(xmlschema_doc) + + # Perform file validation + validated = xmlschema.validate(tree) + if validated: + logger.info('The XML file {} has been validated against the schema {}.'.format(os.path.split(file_path)[1], + os.path.split(schema_path)[1])) + else: + logger.debug('Schema validation errors:') + for error in xmlschema.error_log: + logger.debug('ERROR ON LINE {} in file {}: {}'.format(error.line, os.path.split(file_path)[1], + error.message.encode("utf-8"))) + raise AssertionError('The provided file {} is not valid w.r.t. the schema {} (set logger to debug for ' + 'additional info).'.format(os.path.split(file_path)[1], os.path.split(schema_path)[1])) + return + + diff --git a/kadmos/utilities/strings.py b/kadmos/utilities/strings.py new file mode 100644 index 000000000..15436c486 --- /dev/null +++ b/kadmos/utilities/strings.py @@ -0,0 +1,14 @@ +def find_between(s, first, last): + try: + start = s.index(first)+len(first) + end = s.index(last, start) + return s[start:end] + except ValueError: + return "" + +def find_until(s, until): + try: + until = s.index(until) + return s[0:until] + except ValueError: + return "" -- GitLab