A quick python 2,x script to copy, unzip, and uniformly reformat the XML of an .odt file. It adds indentations and line breaks. Useful to debug my invoicing script, which muddles with the xml files, by making the files diff-able and easier to read and easier to search.
#!/usr/bin/env python import os import xml.etree.ElementTree as ET odt_path_and_file = 'path/to/file.odt' # This function was copied from http://effbot.org/zone/element-lib.htm def indent(elem, level=0): i = "\n" + level*" " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " if not elem.tail or not elem.tail.strip(): elem.tail = i for elem in elem: indent(elem, level+1) if not elem.tail or not elem.tail.strip(): elem.tail = i else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = i odt_filename = odt_path_and_file.split('/')[-1] folder_name = ('Desktop/' + odt_path_and_file.split('/')[-1].rstrip('.odt')) os.popen('rm -r ' + folder_name) #Delete any old working files os.popen('mkdir ' + folder_name) os.popen('cp ' + odt_path_and_file + ' ' + folder_name) os.popen('unzip ' + folder_name + '/' + odt_filename + ' -d ' + folder_name) reply = os.popen('ls ' + folder_name) file_list = [filename.rstrip('\n') for filename in reply.readlines() if filename.count('.xml') > 0] for file in file_list: print ('Parsing ' + folder_name + '/' + file) tree = ET.parse(folder_name + '/' + file) indent(tree.getroot()) tree.write(folder_name + '/' + file) print ('Completed ' + file)
No comments:
Post a Comment