From c87e2f5e6029aae69a1826935146881b4e726972 Mon Sep 17 00:00:00 2001
From: imcovangent <I.vanGent@tudelft.nl>
Date: Thu, 30 Nov 2017 23:40:57 +0100
Subject: [PATCH] Development for knowledge base deprication.

Former-commit-id: 6d49b99ef34aeeaaf1f8b44229366d1a2b55cce8
---
 kadmos/cmdows/cmdows.py      | 131 ++++++++++++++++++++++++++++++++---
 kadmos/graph/graph_kadmos.py |  96 +++++++++++++++++++++----
 kadmos/utilities/strings.py  |  14 ++++
 3 files changed, 220 insertions(+), 21 deletions(-)
 create mode 100644 kadmos/utilities/strings.py

diff --git a/kadmos/cmdows/cmdows.py b/kadmos/cmdows/cmdows.py
index 2b9cd664c..723756081 100644
--- a/kadmos/cmdows/cmdows.py
+++ b/kadmos/cmdows/cmdows.py
@@ -4,11 +4,11 @@ import os
 import copy
 from lxml import etree
 from lxml.etree import ElementTree
-from collections import Counter
-
+from collections import Counter, OrderedDict
 
 # Settings for the logger
-from kadmos.utilities.xml import get_uid_search_xpath
+from kadmos.utilities.strings import find_between, find_until
+from kadmos.utilities.xml import get_uid_search_xpath, Element
 
 logger = logging.getLogger(__name__)
 
@@ -119,6 +119,84 @@ class CMDOWS(object):
             logger.warning('The following uIDs do not exist although they are referred to: ' + ', '.join(invalids))
         return result
 
+    def add_contact(self, name, email, uid, company=None, department=None, function=None, address=None, telephone=None, country=None, roles=None):
+        """Method to add a contact element to the organization branch."""
+
+        # Assert that there is a path to the contacts element or add one
+        parent_element = self.ensure_abs_xpath('/cmdows/header/organization/contacts')
+
+        # Assert that there is no existing element with the uid
+        try:
+            self.get_element_of_uid(uid)
+            raise AssertionError('UID {} is already used in the CMDOWS file at {}'.format(uid, self.get_xpath_of_uid(uid)))
+        except:
+            pass
+
+        # Add the contact details
+        contact_element = Element('contact', uID=uid)
+        contact_element._add_element('name', name)
+        contact_element._add_element('email', email)
+        contact_element._add_element('company', company) if company else None
+        contact_element._add_element('department', department) if department else None
+        contact_element._add_element('function', function) if function else None
+        contact_element._add_element('address', address) if address else None
+        contact_element._add_element('telephone', telephone) if telephone else None
+        contact_element._add_element('country', country) if country else None
+        parent_element.append(contact_element)
+
+        if roles:
+            if isinstance(roles, list):
+                [self.add_actor(uid, role) for role in roles]
+            elif isinstance(roles, basestring):
+                self.add_actor(uid, roles)
+            else:
+                raise IOError('Invalid type for roles provided: {}.'.format(type(roles)))
+        return
+
+    def add_actor(self, contact_uid, role):
+        """Method to add a role element to the organization branch."""
+
+        # Input assertions
+        self.get_element_of_uid(contact_uid)
+        assert isinstance(role, basestring), 'Role should be of type string.'
+        assert role in ['architect', 'integrator', 'collaborativeEngineer', 'toolSpecialist', 'customer'], 'Role {} does not exist.'.format(role)
+
+        # Assert that there is a path to the roles element or add one
+        parent_element = self.ensure_abs_xpath('/cmdows/header/organization/organigram/' + role + 's')
+
+        # Add the role
+        contact_element = Element(role)
+        contact_element._add_element('contactUID', contact_uid)
+        parent_element.append(contact_element)
+        return
+
+    def ensure_abs_xpath(self, xpath):
+        """Method to ensure that the elements given by an absolute XPath exist."""
+        split_path = xpath.split('/')
+        assert not split_path[0] and split_path[-1] and len(split_path) > 1, \
+            'Invalid XPath ({}) provided. XPath should start with / sign and have at least one entry.'.format(xpath)
+        for idx, eltag in enumerate(split_path[1:]):
+            local_xpath = '/'.join(split_path[0:idx + 2])
+            el = self.root.xpath(local_xpath)
+            assert len(el) <= 1, 'Non-unique XPath {} provided.'.format(local_xpath)
+            if not el:
+                previous_xpath = '/'.join(split_path[0:idx + 1])
+                el_pr = self.root.xpath(previous_xpath)
+                self.add_subelement(el_pr[0], eltag)
+        return self.root.xpath(local_xpath)[0]
+
+    def add_subelement(self, element, subelement_xpath_tag):
+        if "@uID" in subelement_xpath_tag:
+            el_tag = find_until(subelement_xpath_tag, '[')
+            uid_attr = find_between(subelement_xpath_tag, '[@uID="', '"]')
+            element.append(Element(el_tag, uID=uid_attr))
+        elif "[" in subelement_xpath_tag:
+            el_tag = find_until(subelement_xpath_tag, '[')
+            element.append(Element(el_tag))
+        else:
+            element.append(Element(subelement_xpath_tag))
+        return
+
     def resolve_uids(self):
         """Method to rename duplicate UIDs in a CMDOWS file"""
         logger.warning('The resolve_uids method is a hack and should not be used.')
@@ -200,16 +278,53 @@ class CMDOWS(object):
         if el:
             el[0].getparent().remove(el[0])
 
+    def remove_element_based_on_uid(self, uid, expected_tag=None):
+        """Method to remove an element based on its UID."""
+        el = self.get_element_of_uid(uid)
+        if expected_tag:
+            assert el.tag == expected_tag, 'Element should have tag {}, but has tag: {}.'.format(expected_tag, el.tag)
+        el.getparent().remove(el)
+
+    def remove_element_based_on_xpath(self, xpath, expected_amount=None, expected_text=None, higher_level_removal=None):
+        """Method to remove an element based on its XPath."""
+        els = self.root.xpath(xpath)
+        if expected_amount:
+            assert len(els) == expected_amount, '{} element(s) expected, found {}.'.format(expected_amount, len(els))
+        for el in els:
+            remove = True if (el.text == expected_text or expected_text is None) else False
+            if remove:
+                el_upper = el.getparent()
+                if higher_level_removal:
+                    for i in [0]*higher_level_removal:
+                        el = el_upper
+                        el_upper = el.getparent()
+                el_upper.remove(el)
+
+    def remove_contact(self, contact_uid):
+        """Method to remove a contact based on its UID."""
+        self.remove_element_based_on_uid(contact_uid, expected_tag='contact')
+
     def remove_parameter(self, param_uid):
         """Method to remove a parameter based on its UID."""
-        el = self.get_element_of_uid(param_uid)
-        el.getparent().remove(el)
+        self.remove_element_based_on_uid(param_uid, expected_tag='parameter')
 
     def remove_parameters(self, params_uids):
         """Method to remove a list of parameters based on their UID."""
-        for param in params_uids:
-            el = self.get_element_of_uid(param)
-            el.getparent().remove(el)
+        for param_uid in params_uids:
+            self.remove_element_based_on_uid(param_uid, expected_tag='parameter')
+
+    def remove_actor(self, role, uid):
+        # Input assertions
+        assert isinstance(role, basestring), 'Role should be of type string.'
+        assert role in ['architect', 'integrator', 'collaborativeEngineer', 'toolSpecialist',
+                        'customer'], 'Role {} does not exist.'.format(role)
+        # Remove actor if found
+        self.remove_element_based_on_xpath('/cmdows/header/organization/organigram/' + role + 's/' + role + '/contactUID', expected_text=uid, higher_level_removal=1)
+
+        # Remove parent element if empty
+        els = self.root.xpath('/cmdows/header/organization/organigram/' + role + 's/' + role)
+        if not els:
+            self.remove_element_based_on_xpath('/cmdows/header/organization/organigram/' + role + 's', expected_amount=1)
 
     def get_element_of_uid(self, uid):
         """Method to get the element based on a UID value."""
diff --git a/kadmos/graph/graph_kadmos.py b/kadmos/graph/graph_kadmos.py
index 0aae34a28..af6d7ccf8 100644
--- a/kadmos/graph/graph_kadmos.py
+++ b/kadmos/graph/graph_kadmos.py
@@ -927,7 +927,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin):
     #                                             LOAD METHODS                                                         #
     # ---------------------------------------------------------------------------------------------------------------- #
 
-    def load_cmdows(self, cmdows):
+    def load_cmdows(self, cmdows, io_xsd_check):
 
         # Create organization node
         self._load_cmdows_header(cmdows)
@@ -936,7 +936,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin):
         self._load_cmdows_parameters(cmdows)
 
         # Create function nodes
-        self._load_cmdows_executables(cmdows)
+        self._load_cmdows_executables(cmdows, io_xsd_check)
 
         # Create problem formulation dict
         if hasattr(self, '_load_cmdows_problem_def'):
@@ -957,12 +957,12 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin):
 
         return
 
-    def _load_cmdows_executables(self, cmdows):
+    def _load_cmdows_executables(self, cmdows, io_xsd_check):
 
-        self._load_cmdows_competences(cmdows)
+        self._load_cmdows_competences(cmdows, io_xsd_check)
         self._load_cmdows_equations(cmdows)
 
-    def _load_cmdows_competences(self, cmdows):
+    def _load_cmdows_competences(self, cmdows, io_xsd_check):
 
         for function in cmdows.findall('executableBlocks/designCompetences/designCompetence'):
             self.add_node(function.get('uID'),
@@ -986,7 +986,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin):
                 # Determine assumed input file location (same folder as CMDOWS file)
                 input_file_path = os.path.join(os.path.split(function.base)[0], function.findtext('ID') + '-input.xml')
                 if os.path.isfile(input_file_path):
-                    inputs = _read_io_xml_file(input_file_path, function.findtext('modeID'))
+                    inputs = _read_io_xml_file(input_file_path, function.findtext('modeID'), io_xsd_check)
                     for input in inputs['leafNodes']:
                         # Add new parameter if it does not exist yet
                         if not self.has_node(input['xpath']):
@@ -1006,7 +1006,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin):
                 # Determine assumed output file location (same folder as CMDOWS file)
                 output_file_path = os.path.join(os.path.split(function.base)[0], function.findtext('ID') + '-output.xml')
                 if os.path.isfile(output_file_path):
-                    outputs = _read_io_xml_file(output_file_path, function.findtext('modeID'))
+                    outputs = _read_io_xml_file(output_file_path, function.findtext('modeID'), io_xsd_check)
                     for output in outputs['leafNodes']:
                         # Add new parameter if it does not exist yet
                         if not self.has_node(output['xpath']):
@@ -3734,7 +3734,7 @@ class KadmosGraph(nx.DiGraph, EquationMixin, VistomsMixin):
 
 
 # noinspection PyUnboundLocalVariable
-def load(file_name, file_type=None, file_check_critical=True, source_folder=None):
+def load(file_name, file_type=None, file_check_critical=True, source_folder=None, io_xsd_check=False):
     """Function to load a KadmosGraph from a file.
 
     Different input file types are implemented. They are listed in the following.
@@ -3751,6 +3751,9 @@ def load(file_name, file_type=None, file_check_critical=True, source_folder=None
     :type file_check_critical: bool
     :param source_folder: source folder which contains the input file
     :type source_folder: str
+    :param io_xsd_check: boolean to check XML input/output files against a schema file (XSD) in case of CMDOWS file in
+    combination with in- and output XML files.
+    :type source_folder: bool
 
     :return: graph loaded from file
     :rtype: KadmosGraph
@@ -3780,7 +3783,7 @@ def load(file_name, file_type=None, file_check_critical=True, source_folder=None
     # TODO: Load also header info and/or metadata of files
     logger.info('Loading the ' + file_type.upper() + ' file ' + file_path + '...')
     if file_type == 'cmdows':
-        graph, mpg = _load_cmdows(file_path)
+        graph, mpg = _load_cmdows(file_path, io_xsd_check)
     elif file_type == 'kdms':
         graph = _load_kdms(file_path)
     elif file_type == 'graphml':
@@ -3848,7 +3851,7 @@ def _load_graphml(file_path):
 
 
 # noinspection PyUnboundLocalVariable
-def _load_cmdows(file_path):
+def _load_cmdows(file_path, io_xsd_check):
 
     from graph_data import RepositoryConnectivityGraph, FundamentalProblemGraph, MdaoDataGraph
 
@@ -3878,7 +3881,7 @@ def _load_cmdows(file_path):
         IOError('The CMDOWS file ' + file_path + ' is missing basic elements and cannot be loaded.')
 
     # Load the graph (and MPG in case this one is provided)
-    mpg = graph.load_cmdows(cmdows)
+    mpg = graph.load_cmdows(cmdows, io_xsd_check)
 
     # Clean up graph
     try:
@@ -3894,7 +3897,7 @@ def _load_cmdows(file_path):
         return graph, None
 
 
-def _read_io_xml_file(file_path, mode):
+def _read_io_xml_file(file_path, mode, xsd_check):
 
     # Check input
     assert os.path.isfile(file_path), 'File {} does not exist.'.format(file_path)
@@ -3907,7 +3910,7 @@ def _read_io_xml_file(file_path, mode):
     if os.stat(file_path).st_size == 0:  # check size of file
         return dataDict
     else:
-        tree = etree.parse(file_path)
+        tree = etree.parse(file_path, parser)
 
     # remove comments from tree
     comments = tree.xpath("//comment()")
@@ -3915,12 +3918,20 @@ def _read_io_xml_file(file_path, mode):
         p = c.getparent()
         p.remove(c)
 
+    # Check file against XSD schema
+    if xsd_check:
+        schema_path = _get_data_schema(os.path.split(file_path)[0])
+        _validate_file_against_schema(file_path, schema_path)
+
     # iterate through tree and add data to dict, only touch leaf nodes
     leafNodesList = []
     completeNodeList = []
     for el in tree.iter():
         data = {}
         path = tree.getpath(el)
+        if path == '/*':
+            raise NotImplementedError('The given XML file {} seems to contain namespaces, '
+                                      'this is not supported by KADMOS.'.format(os.path.split(file_path)[1]))
 
         # add uids to xpath
         path = _add_uids_to_xpath(path, el)
@@ -4068,3 +4079,62 @@ def _check_execution_mode_for_element(element, tree, file, req_mode):
     else:
         return False
 
+
+def _get_data_schema(kb_dir):
+    """
+    (PRIVATE) This function retrieves the data schema (.xsd) file from the KB folder and stores filename in instance.
+    :return:  data_schema_path
+    """
+
+    schema_pattern = "(.xsd)$"
+
+    # Determine name of XML Schema file
+    xsd_schema_found = False
+    for file_name in os.listdir(kb_dir):
+        if os.path.isfile(os.path.join(kb_dir, file_name)):
+            match = re.search(schema_pattern, file_name)
+            if match and not xsd_schema_found:
+                data_schema = file_name
+                xsd_schema_found = True
+            elif match and xsd_schema_found:
+                raise IOError('Multiple XML Schemas (.xsd files) found in the knowledge base ({}). '
+                              'Only one .xsd file is allowed per knowledge base.'.format(os.path.split(kb_dir)[1]))
+    if not xsd_schema_found:
+        raise IOError('No XML Schemas (.xsd files) found in the knowledge base ({}). '
+                      'One .xsd file is required per knowledge base.'.format(os.path.split(kb_dir)[1]))
+    else:
+        logger.info("XML Schema '{}' found.".format(data_schema))
+
+    return os.path.join(kb_dir, data_schema)
+
+
+def _validate_file_against_schema(file_path, schema_path):
+    """
+    (PRIVATE) Check the read-write XML file in the knowledge base against the XML Schema.
+    Argument is list/tuple of nodes to ignore in validation. Root node can not be ignored.
+
+    :rtype: Error
+    """
+
+    # Parse the XML file
+    tree = etree.parse(file_path)
+
+    # Parse the XML Schema
+    xmlschema_doc = etree.parse(schema_path)
+    xmlschema = etree.XMLSchema(xmlschema_doc)
+
+    # Perform file validation
+    validated = xmlschema.validate(tree)
+    if validated:
+        logger.info('The XML file {} has been validated against the schema {}.'.format(os.path.split(file_path)[1],
+                    os.path.split(schema_path)[1]))
+    else:
+        logger.debug('Schema validation errors:')
+        for error in xmlschema.error_log:
+            logger.debug('ERROR ON LINE {} in file {}: {}'.format(error.line, os.path.split(file_path)[1],
+                                                                  error.message.encode("utf-8")))
+        raise AssertionError('The provided file {} is not valid w.r.t. the schema {} (set logger to debug for '
+                             'additional info).'.format(os.path.split(file_path)[1], os.path.split(schema_path)[1]))
+    return
+
+
diff --git a/kadmos/utilities/strings.py b/kadmos/utilities/strings.py
new file mode 100644
index 000000000..15436c486
--- /dev/null
+++ b/kadmos/utilities/strings.py
@@ -0,0 +1,14 @@
+def find_between(s, first, last):
+    try:
+        start = s.index(first)+len(first)
+        end = s.index(last, start)
+        return s[start:end]
+    except ValueError:
+        return ""
+
+def find_until(s, until):
+    try:
+        until = s.index(until)
+        return s[0:until]
+    except ValueError:
+        return ""
-- 
GitLab