From 61a3ceaf9ab8f3a018d07040294e8ccbad5f384a Mon Sep 17 00:00:00 2001 From: John McNamara Date: Thu, 27 Mar 2025 17:58:06 +0000 Subject: [PATCH] test: update python test runner --- test/functional/helper_functions.py | 146 ++++++++++++++++++---------- 1 file changed, 93 insertions(+), 53 deletions(-) diff --git a/test/functional/helper_functions.py b/test/functional/helper_functions.py index 2cf7da84..8ee5e3c7 100644 --- a/test/functional/helper_functions.py +++ b/test/functional/helper_functions.py @@ -6,29 +6,26 @@ # Copyright 2014-2025, John McNamara, jmcnamara@cpan.org. # -import re -import sys import os.path -from zipfile import ZipFile -from zipfile import BadZipfile -from zipfile import LargeZipFile +import re +from zipfile import BadZipFile, LargeZipFile, ZipFile def _xml_to_list(xml_str): # Convert test generated XML strings into lists for comparison testing. # Split the XML string at tag boundaries. - parser = re.compile(r'>\s*<') + parser = re.compile(r">\s*<") elements = parser.split(xml_str.strip()) elements = [s.replace("\r", "") for s in elements] # Add back the removed brackets. for index, element in enumerate(elements): - if not element[0] == '<': - elements[index] = '<' + elements[index] - if not element[-1] == '>': - elements[index] = elements[index] + '>' + if not element[0] == "<": + elements[index] = "<" + elements[index] + if not element[-1] == ">": + elements[index] = elements[index] + ">" return elements @@ -43,7 +40,7 @@ def _vml_to_list(vml_str): vml_str = vml_str.replace("\r", "") vml = vml_str.split("\n") - vml_str = '' + vml_str = "" for line in vml: # Skip blank lines. @@ -61,11 +58,11 @@ def _vml_to_list(vml_str): line += " " # Add newline after element end. - if re.search('>$', line): + if re.search(">$", line): line += "\n" # Split multiple elements. - line = line.replace('><', ">\n<") + line = line.replace("><", ">\n<") # Put all of Anchor on one line. if line == "\n": @@ -109,28 +106,28 @@ def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): # XML file into an list of XML elements. try: # Open the XlsxWriter as a zip file for testing. - got_zip = ZipFile(got_file, 'r') + got_zip = ZipFile(got_file, "r") except IOError: # For Python 2.5+ compatibility. e = sys.exc_info()[1] error = "XlsxWriter file error: " + str(e) - return error, '' + return error, "" except (BadZipfile, LargeZipFile): e = sys.exc_info()[1] error = "XlsxWriter zipfile error, '" + exp_file + "': " + str(e) - return error, '' + return error, "" try: # Open the Excel as a zip file for testing. - exp_zip = ZipFile(exp_file, 'r') + exp_zip = ZipFile(exp_file, "r") except IOError: e = sys.exc_info()[1] error = "Excel file error: " + str(e) - return error, '' + return error, "" except (BadZipfile, LargeZipFile): e = sys.exc_info()[1] error = "Excel zipfile error, '" + exp_file + "': " + str(e) - return error, '' + return error, "" # Get the filenames from the zip files. got_files = sorted(got_zip.namelist()) @@ -146,57 +143,58 @@ def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): # Compare each file in the XLSX containers. for filename in exp_files: - got_xml_str = got_zip.read(filename) exp_xml_str = exp_zip.read(filename) # Compare binary files with string comparison based on extension. extension = os.path.splitext(filename)[1] - if extension in ('.png', '.jpeg', '.gif','.bmp', '.bin'): + if extension in (".png", ".jpeg", ".gif", ".bmp", ".wmf", ".emf", ".bin"): if got_xml_str != exp_xml_str: - return 'got: %s' % filename, 'exp: %s' % filename + return f"got: {filename}", f"exp: {filename}" continue - if sys.version_info >= (3, 0, 0): - got_xml_str = got_xml_str.decode('utf-8') - exp_xml_str = exp_xml_str.decode('utf-8') + got_xml_str = got_xml_str.decode("utf-8") + exp_xml_str = exp_xml_str.decode("utf-8") + + # Check for errant xml tags in the generated file. + if "<<" in got_xml_str: + return f"Double start tag in XlsxWriter file {filename}", "" # Remove dates and user specific data from the core.xml data. - if filename == 'docProps/core.xml': - exp_xml_str = re.sub(r' ?John', '', exp_xml_str) - exp_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ', - '', exp_xml_str) - got_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ', - '', got_xml_str) + if filename == "docProps/core.xml": + exp_xml_str = re.sub(r" ?John", "", exp_xml_str) + exp_xml_str = re.sub( + r"\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ", "", exp_xml_str + ) + got_xml_str = re.sub( + r"\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ", "", got_xml_str + ) # Remove workbookView dimensions which are almost always different # and calcPr which can have different Excel version ids. - if filename == 'xl/workbook.xml': - exp_xml_str = re.sub(r']*>', - '', exp_xml_str) - got_xml_str = re.sub(r']*>', - '', got_xml_str) - exp_xml_str = re.sub(r']*>', - '', exp_xml_str) - got_xml_str = re.sub(r']*>', - '', got_xml_str) + if filename == "xl/workbook.xml": + exp_xml_str = re.sub(r"]*>", "", exp_xml_str) + got_xml_str = re.sub(r"]*>", "", got_xml_str) + exp_xml_str = re.sub(r"]*>", "", exp_xml_str) + got_xml_str = re.sub(r"]*>", "", got_xml_str) # Remove printer specific settings from Worksheet pageSetup elements. - if re.match(r'xl/worksheets/sheet\d.xml', filename): - exp_xml_str = re.sub(r'horizontalDpi="200" ', '', exp_xml_str) - exp_xml_str = re.sub(r'verticalDpi="200" ', '', exp_xml_str) - exp_xml_str = re.sub(r'(]*) r:id="rId1"', - r'\1', exp_xml_str) + if re.match(r"xl/worksheets/sheet\d.xml", filename): + exp_xml_str = re.sub(r'horizontalDpi="200" ', "", exp_xml_str) + exp_xml_str = re.sub(r'verticalDpi="200" ', "", exp_xml_str) + exp_xml_str = re.sub(r'(]*) r:id="rId1"', r"\1", exp_xml_str) # Remove Chart pageMargin dimensions which are almost always different. - if re.match(r'xl/charts/chart\d.xml', filename): - exp_xml_str = re.sub(r']*>', - '', exp_xml_str) - got_xml_str = re.sub(r']*>', - '', got_xml_str) + if re.match(r"xl/charts/chart\d.xml", filename): + exp_xml_str = re.sub( + r"]*>", "", exp_xml_str + ) + got_xml_str = re.sub( + r"]*>", "", got_xml_str + ) # Convert the XML string to lists for comparison. - if re.search('.vml$', filename): + if re.search(".vml$", filename): got_xml = _xml_to_list(got_xml_str) exp_xml = _vml_to_list(exp_xml_str) else: @@ -212,10 +210,14 @@ def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): got_xml = [tag for tag in got_xml if not re.match(pattern, tag)] # Reorder the XML elements in the XLSX relationship files. - if filename == '[Content_Types].xml' or re.search('.rels$', filename): + if filename == "[Content_Types].xml" or re.search(".rels$", filename): got_xml = _sort_rel_file_data(got_xml) exp_xml = _sort_rel_file_data(exp_xml) + # Indent the XML elements to make the visual comparison of failures easier. + got_xml = _indent_elements(got_xml) + exp_xml = _indent_elements(exp_xml) + # Compared the XML elements in each file. if got_xml != exp_xml: got_xml.insert(0, filename) @@ -223,4 +225,42 @@ def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): return got_xml, exp_xml # If we got here the files are the same. - return 'Ok', 'Ok' + return "Ok", "Ok" + + +def compare_xlsx_files(file1, file2, ignore_files=None, ignore_elements=None): + """ + External wrapper function to allow simplified equality testing of two Excel + files. Note, this function doesn't test equivalence, only equality. + + """ + if ignore_files is None: + ignore_files = [] + + if ignore_elements is None: + ignore_elements = [] + + got, exp = _compare_xlsx_files(file1, file2, ignore_files, ignore_elements) + + return got == exp + + +# Indent XML elements to make the visual comparison of failures easier. +def _indent_elements(xml_elements): + indent_level = 0 + indented_elements = [] + + for element in xml_elements: + if element.startswith("") + ): + indent_level += 1 + + return indented_elements