@@ -37,36 +37,39 @@ def _usage(this_file):
3737 return """SYNOPSIS: pretty print an XML document
3838USAGE: python %s <filename> \n """ % this_file
3939
40- def _pprint_line (indent_level , line , width = 100 , output = _sys .stdout ):
40+ def _pprint_line (indent_level , line , width = 100 , output = _sys .stdout , ignore_contents = False ):
4141 if line .strip ():
4242 start = ""
4343 number_chars = 0
4444 for l in range (indent_level ):
4545 start = start + " "
4646 number_chars = number_chars + 1
47- try :
48- elem_start = _re .findall ("(\<\W{0,1}\w+:\w+) ?" , line )[0 ]
49- elem_finished = _re .findall ("([?|\]\]/]*\>)" , line )[0 ]
50- #should not have *
51- attrs = _re .findall ("(\S*?\=\" .*?\" )" , line )
52- output .write (start + elem_start )
53- number_chars = len (start + elem_start )
54- for attr in attrs :
55- if (attrs .index (attr ) + 1 ) == len (attrs ):
56- number_chars = number_chars + len (elem_finished )
57- if (number_chars + len (attr ) + 1 ) > width :
58- output .write ("\n " )
59- for i in range (len (start + elem_start ) + 1 ):
47+ if not ignore_contents :
48+ try :
49+ elem_start = _re .findall ("(\<\W{0,1}\w+:\w+) ?" , line )[0 ]
50+ elem_finished = _re .findall ("([?|\]\]/|\-\-]*\>)" , line )[0 ]
51+ #should not have *
52+ attrs = _re .findall ("(\S*?\=\" .*?\" )" , line )
53+ output .write (start + elem_start )
54+ number_chars = len (start + elem_start )
55+ for attr in attrs :
56+ if (attrs .index (attr ) + 1 ) == len (attrs ):
57+ number_chars = number_chars + len (elem_finished )
58+ if (number_chars + len (attr ) + 1 ) > width :
59+ output .write ("\n " )
60+ for i in range (len (start + elem_start ) + 1 ):
61+ output .write (" " )
62+ number_chars = len (start + elem_start ) + 1
63+ else :
6064 output .write (" " )
61- number_chars = len (start + elem_start ) + 1
62- else :
63- output .write (" " )
64- number_chars = number_chars + 1
65- output .write (attr )
66- number_chars = number_chars + len (attr )
67- output .write (elem_finished + "\n " )
68- except IndexError :
69- #give up pretty print this line
65+ number_chars = number_chars + 1
66+ output .write (attr )
67+ number_chars = number_chars + len (attr )
68+ output .write (elem_finished + "\n " )
69+ except IndexError :
70+ #give up pretty print this line
71+ output .write (start + line + "\n " )
72+ else :
7073 output .write (start + line + "\n " )
7174
7275
@@ -80,7 +83,8 @@ def _get_next_elem(data):
8083 start_pos = data .find ("<" )
8184 end_pos = data .find (">" ) + 1
8285 retval = data [start_pos :end_pos ]
83- stopper = retval .rfind ("/" )
86+ stopper = retval .rfind ("/" )
87+ ignore_contents = False
8488 if stopper < retval .rfind ("\" " ):
8589 stopper = - 1
8690 single = (stopper > - 1 and ((retval .find (">" ) - stopper ) < (stopper - retval .find ("<" ))))
@@ -89,23 +93,32 @@ def _get_next_elem(data):
8993 ignore_question = retval .find ("<?" ) > - 1
9094
9195 if ignore_excl :
96+ ignore_contents = True
9297 cdata = retval .find ("<![CDATA[" ) > - 1
9398 if cdata :
9499 end_pos = data .find ("]]>" )
95100 if end_pos > - 1 :
96101 end_pos = end_pos + len ("]]>" )
102+ stopper = end_pos
103+ else :
104+ end_pos = data .find ("-->" )
105+ if end_pos > - 1 :
106+ end_pos = end_pos + len ("-->" )
107+ stopper = end_pos
108+ retval = data [start_pos :end_pos ]
97109
98110 elif ignore_question :
99111 end_pos = data .find ("?>" ) + len ("?>" )
100112 ignore = ignore_excl or ignore_question
101113
102114 no_indent = ignore or single
103115
104- #print retval, end_pos, start_pos, stopper > -1, no_indent
116+
105117 return start_pos , \
106118 end_pos , \
107119 stopper > - 1 , \
108- no_indent
120+ no_indent , \
121+ ignore_contents
109122
110123def get_pprint (xml , indent = 4 , width = 80 ):
111124 """Returns the pretty printed xml """
@@ -116,6 +129,8 @@ def write(self, string):
116129 self .output += string
117130 out = out ()
118131 pprint (xml , output = out , indent = indent , width = width )
132+
133+
119134
120135 return out .output
121136
@@ -126,7 +141,7 @@ def pprint(xml, output=_sys.stdout, indent=4, width=80):
126141 Use indent to select indentation level. Default is 4 """
127142 data = xml
128143 indent_level = 0
129- start_pos , end_pos , is_stop , no_indent = _get_next_elem (data )
144+ start_pos , end_pos , is_stop , no_indent , ignore_contents = _get_next_elem (data )
130145 while ((start_pos > - 1 and end_pos > - 1 )):
131146 _pprint_elem_content (indent_level , data [:start_pos ].strip (),
132147 output = output )
@@ -136,15 +151,16 @@ def pprint(xml, output=_sys.stdout, indent=4, width=80):
136151 _pprint_line (indent_level ,
137152 data [:end_pos - start_pos ],
138153 width = width ,
139- output = output )
154+ output = output ,
155+ ignore_contents = ignore_contents )
140156 data = data [end_pos - start_pos :]
141157 if not is_stop and not no_indent :
142158 indent_level = indent_level + indent
143159
144160 if not data :
145161 break
146162 else :
147- start_pos , end_pos , is_stop , no_indent = _get_next_elem (data )
163+ start_pos , end_pos , is_stop , no_indent , ignore_contents = _get_next_elem (data )
148164
149165
150166if __name__ == "__main__" :
0 commit comments