"""
xmlhelp.py, a wrapper for the amara xml bindery, by chaynes@indiana.edu
http://www.cs.indiana.edu/~chaynes/lib/xmlhelp.py
"""
# TODO: make namespace aware
# Amara bug: xpath does not like html attribute xmlns="http://www.w3.org/1999/xhtml"
import amara # http://uche.ogbuji.net/tech/4suite/amara/
def children(elt):
"""Returns a list of non-whitespace children of elt."""
return [e for e in elt.xml_children
if not (isinstance(e, unicode) and e.isspace())]
def child(elt, index=0):
"""Returns the child of elt indicated by index, ignoring whitespace
children.
"""
return children(elt)[index]
def attrs(elt):
"""Returns dictionary mapping attribure ns-qualified names to their
values."""
props = elt.xml_properties
d = {}
for name, (ns_name, ns_url) in elt.xml_attributes.items():
d[ns_name] = props[name]
return d
def translate_element(elt, table, document=None, filter=lambda elt: True):
"""Table is a dictionary mapping old tags to new tags or pairs of the form
(newtag, {attribute: value}]).
Return a modified deep copy of elt. document keywords indicates the
containing document (defaults to elt).
Modifications are indicated in the table. Old element attributes are
used if the table entry does not indicate new attribute. If an attribute
dictionary value is None, the old value for that attribute is used.
Exclude child elements for which filter is false.
>>> doc = make_document('text')
>>> print translate_element(doc.a, {'b': 'new'}).xml()
text
>>> table = {'b': ('new', {'foo': 4, 'c': None})}
>>> print translate_element(doc.a, table).xml()
text
"""
def walk(elt):
if isinstance(elt, unicode):
return elt
old_tag = elt.localName
old_attributes = attributes_dict(elt.xml_properties)
children = [walk(celt) for celt in elt.xml_children if filter(celt)]
for key, value in old_attributes.items(): # remove subelement properties
if not isinstance(value, unicode):
del old_attributes[key]
if old_tag in table:
new_tag = table[old_tag]
new_attributes = old_attributes
if isinstance(new_tag, tuple):
new_tag, d = new_tag
new_attributes = dict(d)
for key, value in new_attributes.items():
if value is None:
if key in old_attributes:
new_attributes[key] = old_attributes[key]
else:
del new_attributes[key]
return make_element(document, new_tag, *children, **new_attributes)
else:
return make_element(document, old_tag, *children, **old_attributes)
if not document:
document = elt
if isinstance(elt, amara.bindery.root_base):
raise Exception('Error: can translate elements, not documents')
return walk(elt)
## Does not work because changes in the name of a node must be reflected in
## properties of the parent.
## def translate_element(elt, table):
## """table is a list of tuples of the form
## (xpath, newtag[, {attribute: value}]).
## Perform the xpath searches from elt, replacing matching nodes with ones
## build using the corresponding newtag and and attributes. If the attributes
## are omitted, they default to the ones in the old node. If a given attribute
## value is None, the old node value is used.
## >>> doc = make_document('text')
## >>> table = [('//b', 'new')]
## >>> translate_element(doc, table).xml()
## >>> print doc.xml()
##
## text
## """
## # TODO: generalize for content lists
## for entry in table:
## xpath, tag = entry[:2]
## if len(entry) == 3:
## attributes = entry[2]
## else:
## attributes = None
## ## this does one xpath response at a time, but doesn't fix the problem
## ## while True:
## ## elts = elt_or_doc.xml_xpath(xpath)
## ## if not elts:
## ## break
## ## replace_element(elts[0], tag=tag, attributes=attributes)
## for elt in elt_or_doc.xml_xpath(xpath):
## replace_element(elt, tag=tag, attributes=attributes)
## def replace_element(old, new=None, tag=None, content_list=None,
## attributes=None):
## """Replace old element with a new one, returning new one.
## Keywords:
## new -- the replacement element, otherwise build a new one
## tag -- the tag for the replacement (cannot be used with new keyword)
## content_list -- indicates content of the new element: see make_content
## attributes -- dictionary that indicates new element attributes
## If tag, content_list, or attributes kewords are missing along with the new
## keyword, their values are taken from the old element.
## >>> doc = make_document('text')
## >>> replace_element(doc.a.b, tag='c').xml()
## 'text'
## >>> print doc.xml()
##
## text
## >>> replace_element(doc.a.c, attributes={'new': 4}).xml()
## 'text'
## >>> new = replace_element(doc.a, tag='d').xml()
## >>> print doc.xml()
##
## text
## """
## if new is None:
## if tag is None:
## tag = old.localName
## if content_list is None:
## content_list = old.xml_children
## if attributes is None:
## attributes = attributes_dict(old.xml_properties)
## for key, value in attributes.items(): # remove subelement properties
## if not isinstance(value, unicode):
## del attributes[key]
## new = make_element(old, tag, *content_list, **attributes)
## elif tag or content_list or attributes:
## raise Exception("new can't be used with other replace_element keywords")
## old.parentNode.xml_insert_after(old, new)
## old.parentNode.xml_remove_child_at(old.xml_index_on_parent)
## return new
def unicode_dict(d):
"""Return corresponding dictionary with unicode keys and values.
If a key ends with trailing underscores, the underscores are removed
(to support the standard convension for parameter name that conflict with
python keywords).
"""
return dict([(unicode(key.rstrip('_')), unicode(value))
for key, value in d.iteritems()])
def attributes_dict(d):
"""Return corresponding dictionary with string keys."""
return dict([(str(key), value) for key, value in d.iteritems()])
def make_document(content_thing=[], namespace=None, **keywords):
"""Return a new amara document.
If there are keywords (in addition to content_thing and namespace), a DTD
declaration and corresponding element are created. The keywords type, pubid,
and sysid are then all required, and attributes is optional:
type -- the DTD and element type
pubid -- a basestring for creating the DTD
sysid -- a basestring for creating the DTD
attributes -- a dictionary for creating the element
The content_thing argument may be a string or list, and the indicated
content is appended to the document or DTD element body. If content_thing is
a string, it is parsed. Otherwise, make_content populates the new document
as indicated by the content and namespace arguments.
"""
if keywords:
for key in keywords:
f = (key == 'attributes') and unicode_dict or unicode
keywords[key] = f(keywords[key])
if 'type' not in keywords:
raise Exception('keyword "type" required')
type = keywords['type']
del keywords['type']
doc = amara.create_document(type, ns=namespace, **keywords)
else:
doc = amara.create_document(ns=namespace)
if content_thing:
d_or_e = keywords and doc.childNodes[0] or doc
if isinstance(content_thing, basestring):
d_or_e.xml_append_fragment(str(content_thing))
else:
content = make_content(doc, content_thing, namespace=namespace)
insert_content(d_or_e, content)
return doc
def insert_content(doc_or_elt, content_list, namespace=None,
index=None, before=None, after=None):
"""Adds the content indicated by content_list to the document or element.
See make_content.
At most one of the keywords index, before, or after may be used to specify
the insertion position, which defaults to the end (appending). index is a
zero-based number, while before and after refer to existing child elements.
"""
content = make_content(doc_or_elt, content_list, namespace=namespace)
if index is not None or before or after:
if index is not None:
before = doc_or_elt.xml_children[index]
for celt in content:
if before:
doc_or_elt.xml_insert_before(before, celt)
else:
doc_or_elt.xml_insert_after(after, celt)
after = celt
else:
for celt in content:
doc_or_elt.xml_append(celt)
def make_content(document, content_list, namespace=None):
"""Returns a list of content objects given a content list. content_list is
a list of PCDATA, element, processing instruction, xml object, and None
values.
Lists represent elements and are of the form
[tag, [dictionary,] subcontent_list, ...]
where the dictionary indicates attributes, whose keys and values are
converted to unicode. If a key ends with trailing underscores, the
underscores are removed (to support the standard convension for parameter
name that conflict with python keywords).
Tuples represent processing instruction and are of the form
("tag text...", [dictionary])
where the dictionary represents attributes, as in the element form. The
strings are converted to unicode.
Bindery objects (inheriting from amara.bindery.element_base) are added
unchanged.
Other values are converted to unicode representing PCDATA.
>>> me = lambda *content_list, **attrs: make_element(*content_list,
... **attrs).xml()
>>> doc = make_document()
>>> me(doc, 'a', 3, b=1)
'3'
>>> me(doc, 'a', 3, ['c', {'at': 5}, ['d', 4]], b=1)
'34'
>>> insert_content(doc, make_content(doc, [['a'], 3, ['b', {'c':4}, 5]]))
>>> print doc.xml()
35
>>> doc = make_document()
>>> insert_content(doc, make_content(doc, [['a', {'href': u'#name'}, u'module_', ['strong', u'name'], '']]))
>>> print doc.xml()
module_name
"""
##print content_list##
def make_e(lst):
attributes = len(lst) > 1 and isinstance(lst[1], dict) and lst[1] or {}
subcontent = make_content(document, lst[attributes and 2 or 1 : ])
return make_element(document, lst[0], namespace=namespace,
*subcontent, **attributes)
def make_p(tup):
tag, text = tup[0].split(' ', 1)
attributes = len(tup) > 1 and tup[1] or {}
pi = make_pi(tag, text, **attributes)
return pi
xml_content = []
for value in content_list:
if value is None:
continue
elif isinstance(value, list):
xml_content.append(make_e(value))
elif isinstance(value, tuple):
xml_content.append(make_p(value))
elif isinstance(value, (amara.bindery.pi_base,
amara.bindery.element_base)):
xml_content.append(value)
else:
xml_content.append(unicode(value))
return xml_content
def append_element(xml_element, tag, *content_list, **attributes):
"""Call make_element with xml_element's document and other given arguments,
append the new element to xml_element, and return the new element.
"""
new_elt = make_element(xml_element.rootNode, tag, *content_list,
**attributes)
xml_element.xml_append(new_elt)
return new_elt
def make_element(document, tag, *content_list, **attributes):
"""Make an element for the document with tag, content_list, and attributes.
See make_content.
The attribute 'namespace' is treated as a keyword and passed to amara's
make_content method (default None).
If content_list starts with a dictionary, it is removed and appended to the
attributes.
"""
if tag.isspace():
raise Exception('tag cannot be whitespace')
ns = attributes.pop('namespace', None)
if content_list and isinstance(content_list[0], dict):
content_list = list(content_list)
for key, value in content_list.pop(0).items():
attributes[key] = value
attrs = unicode_dict(attributes)
elt = document.xml_create_element(unicode(tag), ns, attributes=attrs)
insert_content(elt, content_list, namespace=ns)
return elt
def make_pi(tag, text, **attributes):
"""Make a processing instruction with tag and attributes."""
dict_data = ' '.join([unicode(key) + u'=' + unicode(value)
for key, value in attributes.items()])
if text:
text = ' ' + text
if dict_data:
dict_data = ' ' + dict_data
pi = amara.bindery.pi_base(unicode(tag), text + dict_data)
pi.__iter__ = lambda: iter([pi]) # work around bug in amara
return pi
def _test():
## doc = make_document('text')
## table = {'b': ('new', {'foo': 4, 'c': None})}
## print translate_element(doc.a, table).xml()
import doctest
doctest.testmod(raise_on_error=False) # raise_on_error raises at wrong point
if __name__ == "__main__":
_test()