import gc import sys import time import random import string from cStringIO import StringIO from lxml import etree as lxml_etree from xml.etree import ElementTree as etree_etree from xml.etree import cElementTree as cetree_etree UTF8_SKULL = u'\u2620' STRINGS = [] for i in range(100): STRINGS.append(UTF8_SKULL.join([random.choice(string.letters) for x in range(random.randint(3,12))])) META_TAG = 'ahfalksjfdasdjhfald' DATA_TAG = 'adsf' def generate(etree, num_data=2000): root = etree.Element('lxml-v-etree', foo='bar', bar='baz') meta = etree.SubElement(root, 'meta') data = etree.SubElement(root, 'data') for i in range(0, len(STRINGS), 2): meta_el = etree.SubElement(meta, META_TAG, test=STRINGS[i]) meta_el.text = STRINGS[i+1] for i in range(num_data): data_el = etree.SubElement(data, DATA_TAG, foo='bar') for i in range(0, len(STRINGS) - 3, 3): more = etree.SubElement(data_el, STRINGS[i][0], bar=STRINGS[i+1]) more.text = STRINGS[i+2] return root def main(): CONTEXT = 10 mods = sorted((lxml_etree, etree_etree, cetree_etree) * 3) old_str = '' old_txt = '' print 'name generate | tostring | total | write | parse | find | total' print '------------------------+----------+-------+-------+-------+------+------' # disable gc gc.disable() for i, mod in enumerate(mods): name = mod.__name__.replace('.etree', '') # manually collect garbage gc.collect() # disable async checks sys.setcheckinterval(99999) s = time.time() res = generate(mod, 3000) e = time.time() work = e - s s = time.time() tmp = mod.tostring(res, encoding='utf-8') e = time.time() tostring = e - s if old_str: old_str_len = len(old_str) assert old_str_len == len(tmp), 'Old: %s, New: %s' % ( old_str_len, len(tmp)) # Disabled because etree & lxml differ in attribute order: #Old: [[ l-v-etree bar="baz" ]] #New: [[ l-v-etree foo="bar" ]] """ for ii, (o, n) in enumerate(zip(old_str, tmp)): b = ii - CONTEXT if ii > CONTEXT else 0 e = ii + CONTEXT if ii < (old_str_len - CONTEXT) \ else old_str_len assert o == n, "\nOld: [[ %s ]]\nNew: [[ %s ]]" % ( old_str[b:e], tmp[b:e]) """ old_str = tmp f = StringIO() if hasattr(mod, 'write'): s = time.time() mod.write(f, encoding='utf-8') e = time.time() write = e - s else: s = time.time() f.write(tmp) e = time.time() write = (e - s) + tostring f.seek(0) s = time.time() tree = mod.parse(f) e = time.time() parse_time = e - s s = time.time() root = tree.getroot() last_child = root.getchildren()[-1].getchildren()[-1].getchildren()[-1] e = time.time() find = e - s # reenable async checks sys.setcheckinterval(0) if old_txt: assert last_child.text == old_txt old_txt = last_child.text f.close() print '%-16s %4d | %4d | %4d | %4d | %4d | %4d | %4d' % ( name, work * 1000, tostring * 1000, (work + tostring) * 1000, write * 1000, parse_time * 1000, find * 1000, (parse_time + find) * 1000) if __name__ == '__main__': main()