From c58ee368ce56d1f44767cbddebf79800a6ea8c6a Mon Sep 17 00:00:00 2001
From: e3fm8 <wolf.widdra@physik.uni-halle.de>
Date: Tue, 3 Nov 2020 13:08:37 +0100
Subject: [PATCH] .

---
 libhreels/expLogbook.py | 658 +++-------------------------------------
 1 file changed, 48 insertions(+), 610 deletions(-)

diff --git a/libhreels/expLogbook.py b/libhreels/expLogbook.py
index 0fba0f4..04ce7c4 100644
--- a/libhreels/expLogbook.py
+++ b/libhreels/expLogbook.py
@@ -1,610 +1,48 @@
-# print('Executing expLogbook.py')
-# from getComment4File import *
-import requests
-import urllib.parse
-import os
-import builtins
-import re
-from elog.logbook_exceptions import *
-from datetime import datetime
-from time import localtime
-
-# disable warnings about ssl verification
-from requests.packages.urllib3.exceptions import InsecureRequestWarning
-requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
-
-
-class myLogbook(object):
-    """
-    Logbook provides methods to interface with logbook on location: "server:port/subdir/logbook". User can create,
-    edit, delete logbook messages.
-    """
-
-    def __init__(self, hostname, logbook='', port=None, user=None, password=None, subdir='', use_ssl=True,
-                 encrypt_pwd=True):
-        """
-        :param hostname: elog server hostname. If whole url is specified here, it will be parsed and arguments:
-                         "logbook, port, subdir, use_ssl" will be overwritten by parsed values.
-        :param logbook: name of the logbook on the elog server
-        :param port: elog server port (if not specified will default to '80' if use_ssl=False or '443' if use_ssl=True
-        :param user: username (if authentication needed)
-        :param password: password (if authentication needed) Password will be encrypted with sha256 unless
-                         encrypt_pwd=False (default: True)
-        :param subdir: subdirectory of logbooks locations
-        :param use_ssl: connect using ssl (ignored if url starts with 'http://'' or 'https://'?
-        :param encrypt_pwd: To avoid exposing password in the code, this flag can be set to False and password
-                            will then be handled as it is (user needs to provide sha256 encrypted password with
-                            salt= '' and rounds=5000)
-        :return:
-        """
-        hostname = hostname.strip()
-        
-        # parse url to see if some parameters are defined with url
-        parsed_url = urllib.parse.urlsplit(hostname)
-
-        # ---- handle SSL -----
-        # hostname must be modified according to use_ssl flag. If hostname starts with https:// or http://
-        # the use_ssl flag is ignored
-        url_scheme = parsed_url.scheme
-        if url_scheme == 'http':
-            use_ssl = False
-
-        elif url_scheme == 'https':
-            use_ssl = True
-
-        elif not url_scheme:
-            # add http or https
-            if use_ssl:
-                url_scheme = 'https'
-            else:
-                url_scheme = 'http'
-
-        # ---- handle port -----
-        # 1) by default use port defined in the url
-        # 2) remove any 'default' ports such as 80 for http and 443 for https
-        # 3) if port not defined in url and not 'default' add it to netloc
-        
-        netloc = parsed_url.netloc
-        if netloc == "" and "localhost" in hostname:
-            netloc = 'localhost'
-        netloc_split = netloc.split(':')
-        if len(netloc_split) > 1:
-            # port defined in url --> remove if needed
-            port = netloc_split[1]
-            if (port == 80 and not use_ssl) or (port == 443 and use_ssl):
-                netloc = netloc_split[0]
-
-        else:
-            # add port info if needed
-            if port is not None and not (port == 80 and not use_ssl) and not (port == 443 and use_ssl):
-                netloc += ':{}'.format(port)
-
-        # ---- handle subdir and logbook -----
-        # parsed_url.path = /<subdir>/<logbook>/
-
-        # Remove last '/' for easier parsing
-        url_path = parsed_url.path
-        if url_path.endswith('/'):
-            url_path = url_path[:-1]
-
-        splitted_path = url_path.split('/')
-        if url_path and len(splitted_path) > 1:
-            # If here ... then at least some part of path is defined.
-
-            # If logbook defined --> treat path current path as subdir and add logbook at the end
-            # to define the full path. Else treat existing path as <subdir>/<logbook>.
-            # Put first and last '/' back on its place
-            if logbook:
-                url_path += '/{}'.format(logbook)
-            else:
-                logbook = splitted_path[-1]
-            
-        else:
-            # There is nothing. Use arguments.
-            url_path = subdir + '/' + logbook
-
-        # urllib.parse.quote replaces special characters with %xx escapes
-        # self._logbook_path = urllib.parse.quote('/' + url_path + '/').replace('//', '/')
-        self._logbook_path = ('/' + url_path + '/').replace('//', '/')
-        
-        self._url = url_scheme + '://' + netloc + self._logbook_path
-        self.logbook = logbook
-        self._user = user
-        self._password = _handle_pswd(password, encrypt_pwd)
-
-    def post(self, message, msg_id=None, reply=False, attributes=None, attachments=None, encoding=None,
-             **kwargs):
-        """
-        Posts message to the logbook. If msg_id is not specified new message will be created, otherwise existing
-        message will be edited, or a reply (if reply=True) to it will be created. This method returns the msg_id
-        of the newly created message.
-
-        :param message: string with message text
-        :param msg_id: ID number of message to edit or reply. If not specified new message is created.
-        :param reply: If 'True' reply to existing message is created instead of editing it
-        :param attributes: Dictionary of attributes. Following attributes are used internally by the elog and will be
-                           ignored: Text, Date, Encoding, Reply to, In reply to, Locked by, Attachment
-        :param attachments: list of:
-                                  - file like objects which read() will return bytes (if file_like_object.name is not
-                                    defined, default name "attachment<i>" will be used.
-                                  - paths to the files
-                            All items will be appended as attachment to the elog entry. In case of unknown
-                            attachment an exception LogbookInvalidAttachment will be raised.
-        :param encoding: Defines encoding of the message. Can be: 'plain' -> plain text, 'html'->html-text,
-                         'ELCode' --> elog formatting syntax
-        :param kwargs: Anything in the kwargs will be interpreted as attribute. e.g.: logbook.post('Test text',
-                       Author='Rok Vintar), "Author" will be sent as an attribute. If named same as one of the
-                       attributes defined in "attributes", kwargs will have priority.
-
-        :return: msg_id
-        """
-
-        attributes = attributes or {}
-        attributes = {**attributes, **kwargs}  # kwargs as attributes with higher priority
-
-        attachments = attachments or []
-
-        if encoding is not None:
-            if encoding not in ['plain', 'HTML', 'ELCode']:
-                raise LogbookMessageRejected('Invalid message encoding. Valid options: plain, HTML, ELCode.')
-            attributes['Encoding'] = encoding
-
-        attributes_to_edit = dict()
-        if msg_id:
-            # Message exists, we can continue
-            if reply:
-                # Verify that there is a message on the server, otherwise do not reply to it!
-                self._check_if_message_on_server(msg_id)  # raises exception in case of none existing message
-
-                attributes['reply_to'] = str(msg_id)
-
-            else:  # Edit existing
-                attributes['edit_id'] = str(msg_id)
-                attributes['skiplock'] = '1'
-
-                # Handle existing attachments
-                msg_to_edit, attributes_to_edit, attach_to_edit = self.read(msg_id)
-
-                i = 0
-                for attachment in attach_to_edit:
-                    if attachment:
-                        # Existing attachments must be passed as regular arguments attachment<i> with value= file name
-                        # Read message returnes full urls to existing attachments:
-                        # <hostname>:[<port>][/<subdir]/<logbook>/<msg_id>/<file_name>
-                        attributes['attachment' + str(i)] = os.path.basename(attachment)
-                        i += 1
-
-                for attribute, data in attributes.items():
-                    new_data = attributes.get(attribute)
-                    if new_data is not None:
-                        attributes_to_edit[attribute] = new_data
-        else:
-            # As we create a new message, specify creation time if not already specified in attributes
-            if 'When' not in attributes:
-                attributes['When'] = int(datetime.now().timestamp())
-
-        if not attributes_to_edit:
-            attributes_to_edit = attributes
-        # Remove any attributes that should not be sent
-        _remove_reserved_attributes(attributes_to_edit)
-
-        if attachments:
-            files_to_attach, objects_to_close = self._prepare_attachments(attachments)
-        else:
-            objects_to_close = list()
-            files_to_attach = list()
-
-        # Make requests module think that Text is a "file". This is the only way to force requests to send data as
-        # multipart/form-data even if there are no attachments. Elog understands only multipart/form-data
-        files_to_attach.append(('Text', ('', message)))
-
-        # Base attributes are common to all messages
-        self._add_base_msg_attributes(attributes_to_edit)
-        
-        # Keys in attributes cannot have certain characters like whitespaces or dashes for the http request
-        attributes_to_edit = _replace_special_characters_in_attribute_keys(attributes_to_edit)
-
-        try:
-            response = requests.post(self._url, data=attributes_to_edit, files=files_to_attach, allow_redirects=False,
-                                     verify=False)
-            # Validate response. Any problems will raise an Exception.
-            resp_message, resp_headers, resp_msg_id = _validate_response(response)
-
-            # Close file like objects that were opened by the elog (if  path
-            for file_like_object in objects_to_close:
-                if hasattr(file_like_object, 'close'):
-                    file_like_object.close()
-
-        except requests.RequestException as e:
-            # Check if message on server.
-            self._check_if_message_on_server(msg_id)  # raises exceptions if no message or no response from server
-
-            # If here: message is on server but cannot be downloaded (should never happen)
-            raise LogbookServerProblem('Cannot access logbook server to post a message, ' + 'because of:\n' +
-                                       '{0}'.format(e))
-
-        # Any error before here should raise an exception, but check again for nay case.
-        if not resp_msg_id or resp_msg_id < 1:
-            raise LogbookInvalidMessageID('Invalid message ID: ' + str(resp_msg_id) + ' returned')
-        return resp_msg_id
-
-    def read(self, msg_id):
-        """
-        Reads message from the logbook server and returns tuple of (message, attributes, attachments) where:
-        message: string with message body
-        attributes: dictionary of all attributes returned by the logbook
-        attachments: list of urls to attachments on the logbook server
-
-        :param msg_id: ID of the message to be read
-        :return: message, attributes, attachments
-        """
-
-        request_headers = dict()
-        if self._user or self._password:
-            request_headers['Cookie'] = self._make_user_and_pswd_cookie()
-
-        try:
-            self._check_if_message_on_server(msg_id)  # raises exceptions if no message or no response from server
-            response = requests.get(self._url + str(msg_id) + '?cmd=download', headers=request_headers,
-                                    allow_redirects=False, verify=False)
-
-            # Validate response. If problems Exception will be thrown.
-            resp_message, resp_headers, resp_msg_id = _validate_response(response)
-
-        except requests.RequestException as e:
-            # If here: message is on server but cannot be downloaded (should never happen)
-            raise LogbookServerProblem('Cannot access logbook server to read the message with ID: ' + str(msg_id) +
-                                       'because of:\n' + '{0}'.format(e))
-
-        # Parse message to separate message body, attributes and attachments
-        attributes = dict()
-        attachments = list()
-
-        returned_msg = resp_message.decode('utf-8', 'ignore').splitlines()
-        delimiter_idx = returned_msg.index('========================================')
-
-        message = '\n'.join(returned_msg[delimiter_idx + 1:])
-        for line in returned_msg[0:delimiter_idx]:
-            line = line.split(': ')
-            data = ''.join(line[1:])
-            if line[0] == 'Attachment':
-                attachments = data.split(',')
-                # Here are only attachment names, make a full url out of it, so they could be
-                # recognisable by others, and downloaded if needed
-                attachments = [self._url + '{0}'.format(i) for i in attachments]
-            else:
-                attributes[line[0]] = data
-
-        return message, attributes, attachments
-
-    def delete(self, msg_id):
-        """
-        Deletes message thread (!!!message + all replies!!!) from logbook.
-        It also deletes all of attachments of corresponding messages from the server.
-
-        :param msg_id: message to be deleted
-        :return:
-        """
-
-        request_headers = dict()
-        if self._user or self._password:
-            request_headers['Cookie'] = self._make_user_and_pswd_cookie()
-
-        try:
-            self._check_if_message_on_server(msg_id)  # check if something to delete
-
-            response = requests.get(self._url + str(msg_id) + '?cmd=Delete&confirm=Yes', headers=request_headers,
-                                    allow_redirects=False, verify=False)
-
-            _validate_response(response)  # raises exception if any other error identified
-
-        except requests.RequestException as e:
-            # If here: message is on server but cannot be downloaded (should never happen)
-            raise LogbookServerProblem('Cannot access logbook server to delete the message with ID: ' + str(msg_id) +
-                                       'because of:\n' + '{0}'.format(e))
-
-        # Additional validation: If successfully deleted then status_code = 302. In case command was not executed at
-        # all (not English language --> no download command supported) status_code = 200 and the content is just a
-        # html page of this whole message.
-        if response.status_code == 200:
-            raise LogbookServerProblem('Cannot process delete command (only logbooks in English supported).')
-
-    def search(self, search_term, n_results = 20, scope="subtext"):
-        """
-        Searches the logbook and returns the message ids.
-
-        """
-        request_headers = dict()
-        if self._user or self._password:
-            request_headers['Cookie'] = self._make_user_and_pswd_cookie()
-
-        # Putting n_results = 0 crashes the elog. also in the web-gui.
-        n_results = 1 if n_results < 1 else n_results
-
-        params = {
-            "mode": "full",
-            "reverse": "1",
-            "npp": n_results
-        }
-        if type(search_term) is dict:
-            params.update(search_term)
-        else:
-            params.update({scope : search_term})        
-
-        try:
-            response = requests.get(self._url, params=params, headers=request_headers,
-                                    allow_redirects=False, verify=False)
-
-            # Validate response. If problems Exception will be thrown.
-            _validate_response(response)
-            resp_message = response
-
-        except requests.RequestException as e:
-            # If here: message is on server but cannot be downloaded (should never happen)
-            raise LogbookServerProblem('Cannot access logbook server to read message ids '
-                                       'because of:\n' + '{0}'.format(e))
-
-        from lxml import html
-        tree = html.fromstring(resp_message.content)
-        message_ids = tree.xpath('(//tr/td[@class="list1" or @class="list2"][1])/a/@href')
-        message_ids = [int(m.split("/")[-1]) for m in message_ids]
-        return message_ids
-
-
-    def get_last_message_id(self):
-        ids = self.get_message_ids()
-        if len(ids) > 0:
-            return ids[0]
-        else:
-            return None
-
-    def get_message_ids(self):
-        request_headers = dict()
-        if self._user or self._password:
-            request_headers['Cookie'] = self._make_user_and_pswd_cookie()
-
-        try:
-            response = requests.get(self._url + 'page', headers=request_headers,
-                                    allow_redirects=False, verify=False)
-
-            # Validate response. If problems Exception will be thrown.
-            _validate_response(response)
-            resp_message = response
-
-        except requests.RequestException as e:
-            # If here: message is on server but cannot be downloaded (should never happen)
-            raise LogbookServerProblem('Cannot access logbook server to read message ids '
-                                       'because of:\n' + '{0}'.format(e))
-
-        from lxml import html
-        tree = html.fromstring(resp_message.content)
-        message_ids = tree.xpath('(//tr/td[@class="list1" or @class="list2"][1])/a/@href')
-        message_ids = [int(m.split("/")[-1]) for m in message_ids]
-        return message_ids
-
-    def _check_if_message_on_server(self, msg_id):
-        """Try to load page for specific message. If there is a htm tag like <td class="errormsg"> then there is no
-        such message.
-
-        :param msg_id: ID of message to be checked
-        :return:
-        """
-
-        request_headers = dict()
-        if self._user or self._password:
-            request_headers['Cookie'] = self._make_user_and_pswd_cookie()
-        try:
-            response = requests.get(self._url + str(msg_id), headers=request_headers, allow_redirects=False,
-                                    verify=False)
-
-            # If there is no message code 200 will be returned (OK) and _validate_response will not recognise it
-            # but there will be some error in the html code.
-            resp_message, resp_headers, resp_msg_id = _validate_response(response)
-            # If there is no message, code 200 will be returned (OK) but there will be some error indication in
-            # the html code.
-            if re.findall('<td.*?class="errormsg".*?>.*?</td>',
-                          resp_message.decode('utf-8', 'ignore'),
-                          flags=re.DOTALL):
-                raise LogbookInvalidMessageID('Message with ID: ' + str(msg_id) + ' does not exist on logbook.')
-
-        except requests.RequestException as e:
-            raise LogbookServerProblem('No response from the logbook server.\nDetails: ' + '{0}'.format(e))
-
-    def _add_base_msg_attributes(self, data):
-        """
-        Adds base message attributes which are used by all messages.
-        :param data: dict of current attributes
-        :return: content string
-        """
-        data['cmd'] = 'Submit'
-        data['exp'] = self.logbook
-        if self._user:
-            data['unm'] = self._user
-        if self._password:
-            data['upwd'] = self._password
-
-    def _prepare_attachments(self, files):
-        """
-        Parses attachments to content objects. Attachments can be:
-            - file like objects: must have method read() which returns bytes. If it has attribute .name it will be used
-              for attachment name, otherwise generic attribute<i> name will be used.
-            - path to the file on disk
-
-        Note that if attachment is is an url pointing to the existing Logbook server it will be ignored and no
-        exceptions will be raised. This can happen if attachments returned with read_method are resend.
-
-        :param files: list of file like objects or paths
-        :return: content string
-        """
-        prepared = list()
-        i = 0
-        objects_to_close = list()  # objects that are created (opened) by elog must be later closed
-        for file_obj in files:
-            if hasattr(file_obj, 'read'):
-                i += 1
-                attribute_name = 'attfile' + str(i)
-
-                filename = attribute_name  # If file like object has no name specified use this one
-                candidate_filename = os.path.basename(file_obj.name)
-
-                if filename:  # use only if not empty string
-                    filename = candidate_filename
-
-            elif isinstance(file_obj, str):
-                # Check if it is:
-                #           - a path to the file --> open file and append
-                #           - an url pointing to the existing Logbook server --> ignore
-
-                filename = ""
-                attribute_name = ""
-
-                if os.path.isfile(file_obj):
-                    i += 1
-                    attribute_name = 'attfile' + str(i)
-
-                    file_obj = builtins.open(file_obj, 'rb')
-                    filename = os.path.basename(file_obj.name)
-
-                    objects_to_close.append(file_obj)
-
-                elif not file_obj.startswith(self._url):
-                    raise LogbookInvalidAttachmentType('Invalid type of attachment: \"' + file_obj + '\".')
-            else:
-                raise LogbookInvalidAttachmentType('Invalid type of attachment[' + str(i) + '].')
-
-            prepared.append((attribute_name, (filename, file_obj)))
-
-        return prepared, objects_to_close
-
-    def _make_user_and_pswd_cookie(self):
-        """
-        prepares user name and password cookie. It is sent in header when posting a message.
-        :return: user name and password value for the Cookie header
-        """
-        cookie = ''
-        if self._user:
-            cookie += 'unm=' + self._user + ';'
-        if self._password:
-            cookie += 'upwd=' + self._password + ';'
-
-        return cookie
-
-    def getMessage4File(self, filename):
-        ids = self.search({'file':filename})
-        if len(ids) > 0:
-            message, _ , _ = self.read(ids[0])
-            return message
-        else:
-            print("File {} in eLog not found!".format(filename))
-            return None
-
-    def getShortMessage4File(self,filename):
-        stuff = self.getMessage4File(filename)
-        import html2text
-        h = html2text.HTML2Text()
-        # Ignore converting links from HTML
-        h.ignore_links = True
-        if stuff: return h.handle(stuff)
-        return
-
-def _remove_reserved_attributes(attributes):
-    """
-    Removes elog reserved attributes (from the attributes dict) that can not be sent.
-
-    :param attributes: dictionary of attributes to be cleaned.
-    :return:
-    """
-
-    if attributes:
-        attributes.get('$@MID@$', None)
-        attributes.pop('Date', None)
-        attributes.pop('Attachment', None)
-        attributes.pop('Text', None)  # Remove this one because it will be send attachment like
-
-
-def _replace_special_characters_in_attribute_keys(attributes):
-    """
-    Replaces special characters in elog attribute keys by underscore, otherwise attribute values will be erased in
-    the http request. This is using the same replacement elog itself is using to handle these cases
-
-    :param attributes: dictionary of attributes to be cleaned.
-    :return: attributes with replaced keys
-    """
-    return {re.sub('[^0-9a-zA-Z]', '_', key): value for key, value in attributes.items()}
-
-
-def _validate_response(response):
-    """ Validate response of the request."""
-
-    msg_id = None
-
-    if response.status_code not in [200, 302]:
-        # 200 --> OK; 302 --> Found
-        # Html page is returned with error description (handling errors same way as on original client. Looks
-        # like there is no other way.
-
-        err = re.findall('<td.*?class="errormsg".*?>.*?</td>',
-                         response.content.decode('utf-8', 'ignore'),
-                         flags=re.DOTALL)
-
-        if len(err) > 0:
-            # Remove html tags
-            # If part of the message has: Please go  back... remove this part since it is an instruction for
-            # the user when using browser.
-            err = re.sub('(?:<.*?>)', '', err[0])
-            if err:
-                raise LogbookMessageRejected('Rejected because of: ' + err)
-            else:
-                raise LogbookMessageRejected('Rejected because of unknown error.')
-
-        # Other unknown errors
-        raise LogbookMessageRejected('Rejected because of unknown error.')
-    else:
-        location = response.headers.get('Location')
-        if location is not None:
-            if 'has moved' in location:
-                raise LogbookServerProblem('Logbook server has moved to another location.')
-            elif 'fail' in location:
-                raise LogbookAuthenticationError('Invalid username or password.')
-            else:
-                # returned locations is something like: '<host>/<sub_dir>/<logbook>/<msg_id><query>
-                # with urllib.parse.urlparse returns attribute path=<sub_dir>/<logbook>/<msg_id>
-                msg_id = int(urllib.parse.urlsplit(location).path.split('/')[-1])
-
-        if b'form name=form1' in response.content or b'type=password' in response.content:
-            # Not to smart to check this way, but no other indication of this kind of error.
-            # C client does it the same way
-            raise LogbookAuthenticationError('Invalid username or password.')
-
-    return response.content, response.headers, msg_id
-
-
-def _handle_pswd(password, encrypt=True):
-    """
-    Takes password string and returns password as needed by elog. If encrypt=True then password will be
-    sha256 encrypted (salt='', rounds=5000). Before returning password, any trailing $5$$ will be removed
-    independent off encrypt flag.
-
-    :param password: password string
-    :param encrypt: encrypt password?
-    :return: elog prepared password
-    """
-    if encrypt and password:
-        from passlib.hash import sha256_crypt
-        return sha256_crypt.encrypt(password, salt='', rounds=5000)[4:]
-    elif password and password.startswith('$5$$'):
-        return password[4:]
-    else:
-        return password
-
-
-##########################################################################################################
-# ogf settings:
-try:
-    from libhreels.eLogCredentials import dummy, unsafe    # Defines User credentials
-    logbook = myLogbook('https://labor-ep3.physik.uni-halle.de/HREELS/', user=dummy, password=unsafe)
-    print('eLog available!')
-    available = True
-except:
-    available = False
-
+# print('Executing expLogbook.py')
+# from getComment4File import *
+import requests
+import urllib.parse
+import os
+import builtins
+import re
+from elog.logbook_exceptions import *
+import elog
+from datetime import datetime
+from time import localtime
+
+# disable warnings about ssl verification
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
+requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
+
+
+class myLogbook(elog.Logbook):
+        
+    def getMessage4File(self, filename):
+        ids = super().search({'file':filename})
+        if len(ids) > 0:
+            message, _ , _ = super().read(ids[0])
+            return message
+        else:
+            print("File {} in eLog not found!".format(filename))
+            return None
+
+    def getShortMessage4File(self,filename):
+        stuff = self.getMessage4File(filename)
+        import html2text
+        h = html2text.HTML2Text()
+        # Ignore converting links from HTML
+        h.ignore_links = True
+        if stuff: return h.handle(stuff)
+        return
+
+
+##########################################################################################################
+# ogf settings:
+try:
+    from libhreels.eLogCredentials import dummy, unsafe    # Defines User credentials
+    logbook = myLogbook('https://labor-ep3.physik.uni-halle.de/HREELS/', user=dummy, password=unsafe)
+    print('eLog available!')
+    available = True
+except:
+    available = False
+
-- 
GitLab