""" xmlhelp.py, a wrapper for the amara xml bindery, by chaynes@indiana.edu http://www.cs.indiana.edu/~chaynes/lib/xmlhelp.py """ # TODO: make namespace aware # Amara bug: xpath does not like html attribute xmlns="http://www.w3.org/1999/xhtml" import amara # http://uche.ogbuji.net/tech/4suite/amara/ def children(elt): """Returns a list of non-whitespace children of elt.""" return [e for e in elt.xml_children if not (isinstance(e, unicode) and e.isspace())] def child(elt, index=0): """Returns the child of elt indicated by index, ignoring whitespace children. """ return children(elt)[index] def attrs(elt): """Returns dictionary mapping attribure ns-qualified names to their values.""" props = elt.xml_properties d = {} for name, (ns_name, ns_url) in elt.xml_attributes.items(): d[ns_name] = props[name] return d def translate_element(elt, table, document=None, filter=lambda elt: True): """Table is a dictionary mapping old tags to new tags or pairs of the form (newtag, {attribute: value}]). Return a modified deep copy of elt. document keywords indicates the containing document (defaults to elt). Modifications are indicated in the table. Old element attributes are used if the table entry does not indicate new attribute. If an attribute dictionary value is None, the old value for that attribute is used. Exclude child elements for which filter is false. >>> doc = make_document('text') >>> print translate_element(doc.a, {'b': 'new'}).xml() text >>> table = {'b': ('new', {'foo': 4, 'c': None})} >>> print translate_element(doc.a, table).xml() text """ def walk(elt): if isinstance(elt, unicode): return elt old_tag = elt.localName old_attributes = attributes_dict(elt.xml_properties) children = [walk(celt) for celt in elt.xml_children if filter(celt)] for key, value in old_attributes.items(): # remove subelement properties if not isinstance(value, unicode): del old_attributes[key] if old_tag in table: new_tag = table[old_tag] new_attributes = old_attributes if isinstance(new_tag, tuple): new_tag, d = new_tag new_attributes = dict(d) for key, value in new_attributes.items(): if value is None: if key in old_attributes: new_attributes[key] = old_attributes[key] else: del new_attributes[key] return make_element(document, new_tag, *children, **new_attributes) else: return make_element(document, old_tag, *children, **old_attributes) if not document: document = elt if isinstance(elt, amara.bindery.root_base): raise Exception('Error: can translate elements, not documents') return walk(elt) ## Does not work because changes in the name of a node must be reflected in ## properties of the parent. ## def translate_element(elt, table): ## """table is a list of tuples of the form ## (xpath, newtag[, {attribute: value}]). ## Perform the xpath searches from elt, replacing matching nodes with ones ## build using the corresponding newtag and and attributes. If the attributes ## are omitted, they default to the ones in the old node. If a given attribute ## value is None, the old node value is used. ## >>> doc = make_document('text') ## >>> table = [('//b', 'new')] ## >>> translate_element(doc, table).xml() ## >>> print doc.xml() ## ## text ## """ ## # TODO: generalize for content lists ## for entry in table: ## xpath, tag = entry[:2] ## if len(entry) == 3: ## attributes = entry[2] ## else: ## attributes = None ## ## this does one xpath response at a time, but doesn't fix the problem ## ## while True: ## ## elts = elt_or_doc.xml_xpath(xpath) ## ## if not elts: ## ## break ## ## replace_element(elts[0], tag=tag, attributes=attributes) ## for elt in elt_or_doc.xml_xpath(xpath): ## replace_element(elt, tag=tag, attributes=attributes) ## def replace_element(old, new=None, tag=None, content_list=None, ## attributes=None): ## """Replace old element with a new one, returning new one. ## Keywords: ## new -- the replacement element, otherwise build a new one ## tag -- the tag for the replacement (cannot be used with new keyword) ## content_list -- indicates content of the new element: see make_content ## attributes -- dictionary that indicates new element attributes ## If tag, content_list, or attributes kewords are missing along with the new ## keyword, their values are taken from the old element. ## >>> doc = make_document('text') ## >>> replace_element(doc.a.b, tag='c').xml() ## 'text' ## >>> print doc.xml() ## ## text ## >>> replace_element(doc.a.c, attributes={'new': 4}).xml() ## 'text' ## >>> new = replace_element(doc.a, tag='d').xml() ## >>> print doc.xml() ## ## text ## """ ## if new is None: ## if tag is None: ## tag = old.localName ## if content_list is None: ## content_list = old.xml_children ## if attributes is None: ## attributes = attributes_dict(old.xml_properties) ## for key, value in attributes.items(): # remove subelement properties ## if not isinstance(value, unicode): ## del attributes[key] ## new = make_element(old, tag, *content_list, **attributes) ## elif tag or content_list or attributes: ## raise Exception("new can't be used with other replace_element keywords") ## old.parentNode.xml_insert_after(old, new) ## old.parentNode.xml_remove_child_at(old.xml_index_on_parent) ## return new def unicode_dict(d): """Return corresponding dictionary with unicode keys and values. If a key ends with trailing underscores, the underscores are removed (to support the standard convension for parameter name that conflict with python keywords). """ return dict([(unicode(key.rstrip('_')), unicode(value)) for key, value in d.iteritems()]) def attributes_dict(d): """Return corresponding dictionary with string keys.""" return dict([(str(key), value) for key, value in d.iteritems()]) def make_document(content_thing=[], namespace=None, **keywords): """Return a new amara document. If there are keywords (in addition to content_thing and namespace), a DTD declaration and corresponding element are created. The keywords type, pubid, and sysid are then all required, and attributes is optional: type -- the DTD and element type pubid -- a basestring for creating the DTD sysid -- a basestring for creating the DTD attributes -- a dictionary for creating the element The content_thing argument may be a string or list, and the indicated content is appended to the document or DTD element body. If content_thing is a string, it is parsed. Otherwise, make_content populates the new document as indicated by the content and namespace arguments. """ if keywords: for key in keywords: f = (key == 'attributes') and unicode_dict or unicode keywords[key] = f(keywords[key]) if 'type' not in keywords: raise Exception('keyword "type" required') type = keywords['type'] del keywords['type'] doc = amara.create_document(type, ns=namespace, **keywords) else: doc = amara.create_document(ns=namespace) if content_thing: d_or_e = keywords and doc.childNodes[0] or doc if isinstance(content_thing, basestring): d_or_e.xml_append_fragment(str(content_thing)) else: content = make_content(doc, content_thing, namespace=namespace) insert_content(d_or_e, content) return doc def insert_content(doc_or_elt, content_list, namespace=None, index=None, before=None, after=None): """Adds the content indicated by content_list to the document or element. See make_content. At most one of the keywords index, before, or after may be used to specify the insertion position, which defaults to the end (appending). index is a zero-based number, while before and after refer to existing child elements. """ content = make_content(doc_or_elt, content_list, namespace=namespace) if index is not None or before or after: if index is not None: before = doc_or_elt.xml_children[index] for celt in content: if before: doc_or_elt.xml_insert_before(before, celt) else: doc_or_elt.xml_insert_after(after, celt) after = celt else: for celt in content: doc_or_elt.xml_append(celt) def make_content(document, content_list, namespace=None): """Returns a list of content objects given a content list. content_list is a list of PCDATA, element, processing instruction, xml object, and None values. Lists represent elements and are of the form [tag, [dictionary,] subcontent_list, ...] where the dictionary indicates attributes, whose keys and values are converted to unicode. If a key ends with trailing underscores, the underscores are removed (to support the standard convension for parameter name that conflict with python keywords). Tuples represent processing instruction and are of the form ("tag text...", [dictionary]) where the dictionary represents attributes, as in the element form. The strings are converted to unicode. Bindery objects (inheriting from amara.bindery.element_base) are added unchanged. Other values are converted to unicode representing PCDATA. >>> me = lambda *content_list, **attrs: make_element(*content_list, ... **attrs).xml() >>> doc = make_document() >>> me(doc, 'a', 3, b=1) '3' >>> me(doc, 'a', 3, ['c', {'at': 5}, ['d', 4]], b=1) '34' >>> insert_content(doc, make_content(doc, [['a'], 3, ['b', {'c':4}, 5]])) >>> print doc.xml() 35 >>> doc = make_document() >>> insert_content(doc, make_content(doc, [['a', {'href': u'#name'}, u'module_', ['strong', u'name'], '']])) >>> print doc.xml() module_name """ ##print content_list## def make_e(lst): attributes = len(lst) > 1 and isinstance(lst[1], dict) and lst[1] or {} subcontent = make_content(document, lst[attributes and 2 or 1 : ]) return make_element(document, lst[0], namespace=namespace, *subcontent, **attributes) def make_p(tup): tag, text = tup[0].split(' ', 1) attributes = len(tup) > 1 and tup[1] or {} pi = make_pi(tag, text, **attributes) return pi xml_content = [] for value in content_list: if value is None: continue elif isinstance(value, list): xml_content.append(make_e(value)) elif isinstance(value, tuple): xml_content.append(make_p(value)) elif isinstance(value, (amara.bindery.pi_base, amara.bindery.element_base)): xml_content.append(value) else: xml_content.append(unicode(value)) return xml_content def append_element(xml_element, tag, *content_list, **attributes): """Call make_element with xml_element's document and other given arguments, append the new element to xml_element, and return the new element. """ new_elt = make_element(xml_element.rootNode, tag, *content_list, **attributes) xml_element.xml_append(new_elt) return new_elt def make_element(document, tag, *content_list, **attributes): """Make an element for the document with tag, content_list, and attributes. See make_content. The attribute 'namespace' is treated as a keyword and passed to amara's make_content method (default None). If content_list starts with a dictionary, it is removed and appended to the attributes. """ if tag.isspace(): raise Exception('tag cannot be whitespace') ns = attributes.pop('namespace', None) if content_list and isinstance(content_list[0], dict): content_list = list(content_list) for key, value in content_list.pop(0).items(): attributes[key] = value attrs = unicode_dict(attributes) elt = document.xml_create_element(unicode(tag), ns, attributes=attrs) insert_content(elt, content_list, namespace=ns) return elt def make_pi(tag, text, **attributes): """Make a processing instruction with tag and attributes.""" dict_data = ' '.join([unicode(key) + u'=' + unicode(value) for key, value in attributes.items()]) if text: text = ' ' + text if dict_data: dict_data = ' ' + dict_data pi = amara.bindery.pi_base(unicode(tag), text + dict_data) pi.__iter__ = lambda: iter([pi]) # work around bug in amara return pi def _test(): ## doc = make_document('text') ## table = {'b': ('new', {'foo': 4, 'c': None})} ## print translate_element(doc.a, table).xml() import doctest doctest.testmod(raise_on_error=False) # raise_on_error raises at wrong point if __name__ == "__main__": _test()