4 This filter plugin implements minimal html tag balancing, and can also
5 convert ewiki_page() output into (hopefully) valid xhtml. It just works
6 around some markup problems found in ewiki and that may arise from Wiki
7 markup abuse; it however provides no fix for <ul> inside <ul> or even
8 <h2> inside <p> problems (this should rather be fixed in the ewiki_format
9 function). So following code is not meant to fix any possible html file,
10 and it certainly won't make valid html files out of random binary data.
11 So for full html spec conformance you should rather utilize w3c tidy (by
12 using your Webservers "Filter" directive).
16 define("EWIKI_XHTML", 0);
17 $ewiki_plugins["page_final"][] = "ewiki_html_tag_balancer";
20 function ewiki_html_tag_balancer(&$html) {
23 $html_standalone = array(
25 "input", "meta", "link",
28 "a", "abbr", "acronym", "address", "applet", "area", "b", "base",
29 "basefont", "bdo", "big", "blockquote", "body", "br", "button",
30 "caption", "center", "cite", "code", "col", "colgroup", "dd", "del",
31 "dfn", "dir", "div", "dl", "dt", "em", "fieldset", "font", "form",
32 "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i",
33 "iframe", "img", "input", "ins", "kbd", "label", "legend", "li",
34 "link", "map", "menu", "meta", "noframes", "noscript", "object", "ol",
35 "optgroup", "option", "p", "param", "pre", "q", "s", "samp", "script",
36 "select", "small", "span", "strike", "strong", "style", "sub", "sup",
37 "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title",
38 "tr", "tt", "u", "ul", "var",
39 #-- H2.0 "nextid", "listing", "xmp", "plaintext",
40 #-- H3.2 "frame", "frameset",
41 #-- X1.1 "rb", "rbc", "rp", "rt", "rtc", "ruby",
43 $close_opened_when = array(
44 "p", "div", "ul", "td", "table", "tr",
47 $html_tags = array_merge( (array) $html_tags, array(
48 "bgsound", "embed", "layer", "multicol", "nobr", "noembed",
52 #-- walk through all tags
58 while (($pos < $len) && $loop--) {
61 $l = strpos($html, "<", $pos);
62 $r = strpos($html, ">", $l);
63 if (($l===false) or ($r===false)) {
65 $done .= substr($html, $pos);
69 #-- copy plain text part
71 $done .= substr($html, $pos, $l-$pos);
75 #-- analyze current html tag
78 $tag = substr($html, $l + 1, $r - $l - 1);
80 #-- split into name and attributes
81 $tname = strtolower(strtok($tag, " \t\n>")); // LOWERCASING not needed here really
82 ($tattr = strtok(">")) && ($tattr = " $tattr");
84 // attribute checking could go here
85 // (here we just assume good output from ewiki core)
89 if (substr($tname, 0, 3) == "!--") {
90 $r = strpos($html, "-->", $l+4);
92 $done .= substr($html, $l, $r-$l+3);
97 elseif ($tname[0] != "/") {
100 if (in_array($tname, $html_standalone)) {
101 $tattr = rtrim(rtrim($tattr, "/"));
108 if (in_array($tname, $html_tags)) {
112 $tattr .= " class=\"$tname\"";
115 array_push($tree, $tname);
118 $tag = "$tname$tattr";
122 $tname = substr($tname, 1);
124 if (!in_array($tname, $html_tags)) {
128 #-- check if this is allowed
130 continue; // ignore closing tag
132 $last = array_pop($tree);
133 if ($last != $tname) {
135 #-- close until last opened block element
136 if (in_array($tname, $close_opened_when)) {
140 while (($last = array_pop($tree)) && ($last!=$tname));
142 #-- close last, close current, reopen last
144 array_push($tree, $last);
145 $done .= "</$last></$tname><$last>";
153 #-- readd closing-slash to tag name
161 #-- close still open tags
162 while ($tree && ($last = array_pop($tree))) {
166 #-- copy back changes