3 Data-oriented XML parsing code.
7 #-- simplification wrapper around XML parser -------------------------------
8 # namespace-aware, folds URIs down to expected xmlnamespace prefixes;
9 # this is mostly handled internally, not by php-xml
16 #-- general xmlns= mappings -> we don't want to deal with URIs
17 # but get shortened abbreviations instead (and not unpredictable ones)
19 "http://www.w3.org/1999/02/22-rdf-syntax-ns#" => "rdf",
20 "http://purl.org/dc/elements/1.1/" => "dc",
21 "http://purl.org/rss/1.0/modules/wiki/" => "rss-wiki",
22 "http://purl.org/rss/1.0/" => "rss", // (at least its similar enough)
23 "http://purl.org/atom/ns#" => "atom",
25 #<eee># "urn:uuid:c2f41010-65b3-11d1-a29f-00aa00c14882/" => "ms-dav-time",
26 "http://xmlns.com/foaf/0.1/" => "foaf",
28 #-- final xmlns= qualifier down-mapping,
29 # think of this as "local xmlns:tag renaming"
34 //"xmlns-prefix:weird-tag-name" => "desired",
38 var $current, $parent, $stack=array();
42 function easy_xml($xml="", $charset=NULL, $targetcoding="ISO-8859-1") {
44 #-- init (no "_ns" parser, because we can handle that better)
45 $this->xp = xml_parser_create($charset);
46 xml_parser_set_option($this->xp, XML_OPTION_CASE_FOLDING, false);
47 xml_parser_set_option($this->xp, XML_OPTION_SKIP_WHITE, true);
48 xml_parser_set_option($this->xp, XML_OPTION_TARGET_ENCODING, $targetcoding);
52 xml_set_character_data_handler($this->xp, array(&$this,"cdata"));
53 xml_set_element_handler($this->xp, array(&$this,"start"), array(&$this,"end"));
55 #-- we take care of namespaces ourselves
56 $this->xmlns2 = array();
61 function parse($more_xml="") {
62 $r = xml_parse($this->xp, trim($this->xml . $more_xml), $_is_final=TRUE);
63 if ($e = xml_get_error_code($this->xp)) {
64 trigger_error("XML error #$e: " . xml_error_string($e), E_WARNING);
68 unset($this->current);
72 unset($this->as_content);
73 unset($this->as_list);
74 $r = xml_parser_free($this->xp) && ($r);
80 #-- map ugly XMLNS urls to known identifiers/internal representation
82 while (($l = strrpos($tag, ":"))
83 and ($prefix = substr($tag, 0, $l))
84 and (isset($this->xmlns2[$prefix])) )
86 $ns = $this->xmlns2[$prefix];
91 #-- strip known xmlns qualifier/moniker
92 $tag = substr($tag, $l+1);
94 $tag = $ns . ":" . $tag;
99 #-- and also rewrite to preferred tag names
100 if (isset($this->map[$tag])) {
101 $tag = $this->map[$tag];
108 #-- log <opening> tags
109 function start($xp, &$tag, &$attr) {
110 #-- normalize attributes and discover namespaces
113 foreach ($attr as $i=>$v) {
114 if (strncmp($i, "xmlns:", 6) == 0) {
115 $this->xmlns($xp, substr($i, 6), $v);
125 #-- normalize tag names
126 $tag = $this->tag($tag);
128 #-- track where we are
129 if ($this->current) {
130 $this->stack[] = $this->current;
132 $this->parent = $this->current;
133 $this->current = $tag;
137 #-- track </end> tags
138 function end($xp, &$tag) {
139 $tag = $this->tag($tag);
140 $this->current = $this->parent;
141 $this->parent = array_pop($this->stack);
146 function cdata($xp, $data) {
150 #-- we handle namespaces ourselves to decipher silly URIs and
151 # get rid of uncommon prefixes
152 function xmlns($xp, $short, $uri) {
153 #-- setup back-mapping to OUR preferred xmlnamespace abbr
154 if ($desired = $this->xmlns[$uri]) {
155 if (($short != $desired)
156 and (isset($this->xmlns2["rw:"][$short])
157 or !isset($this->xmlns2[$short])) )
159 $this->xmlns2[$short] = $desired;
160 $this->xmlns2["rw:"][$short] = 1; // mark as overwritable entry
162 // prevents 1:1-conversions
166 $this->xmlns[$uri] = $short; // log
167 // $this->xmlns2[$short] = $short;
176 #-- simple data/array XML file --------------------------------------------
177 # can decode only two-level data containers
178 class easy_xml_data extends easy_xml {
180 #-- which elements always to expect multiple times (= becomes list)
181 var $as_list = array();
183 #-- which attributes to transmove into cdata
184 var $as_content = array();
187 #-- append string data to collection
188 function cdata($xp, $data) {
190 $this->l[$this->parent][$this->current] = $data;
195 #-- extract single blocks from array collection list
196 function end($xp, $tag) {
198 parent::end($xp, $tag);
200 if (isset($this->l[$tag])) {
202 if (isset($this->{$tag}) || in_array($tag, $this->as_list)) {
203 #-- convert into list
204 if (isset($this->{$tag}) && !isset($this->{$tag}[0])) {
205 $this->{$tag} = array(
209 $this->{$tag}[] = $this->l[$tag];
213 $this->{$tag} = $this->l[$tag];
215 unset($this->l[$tag]);
220 #-- converts certain expected tag attributes into cdata
221 function start($xp, &$tag, &$attr) {
222 parent::start($xp, $tag, $attr);
223 foreach ($attr as $i=>$content) {
224 if ($this->as_content[$tag]==$i) {
225 $this->cdata($xp, $content);
234 #-- special simplifications for RSS- and Atom- XML files --------------------
235 class easy_xml_rss extends easy_xml_data {
237 #-- add a few mappings to make RSS and Atom look similar
238 function easy_xml_rss($xml, $cs=NULL, $tc="ISO-8859-1") {
239 parent::easy_xml($xml, $cs, $tc);
240 // kill some of _our_ namespace prefixes
241 $this->xmlns2 += array(
247 // rename tags (mainly for Atom)
252 "content" => "description",
257 // tag attributes to auto-convert into content
258 $this->as_content += array(
261 // always make list-array of items
262 $this->as_list = array(
271 #-- decode into structs (fast, but annoying to work with) ------------------
272 class ewiki_xml_fast {
279 xml_parse_into_struct($this->xp, $this->xml, $this->data, $this->tags);
281 #-- fix xmlns, tag names
282 $data = &$this->data;
283 $tags = &$this->tags;
284 foreach ($data as $i=>$d) {
285 if ($new = $this->tag($data[$i]["tag"])) {
286 $data[$i]["tag"] = $new;
288 if (isset($data[$i]["attributes"])) {
289 foreach ($data[$i]["attributes"] as $key=>$val) {
290 if ($new = ewiki_short_xmlns($key, $xmlns)) {
291 unset($data[$i]["attributes"][$key]);
292 $data[$i]["attributes"][$new] = $val;
297 foreach ($tags as $key=>$val) {
298 if ($new = $this->tag($key)) {