mods/wiki/plugins/page/textupload.php

   1 <?php
   2
   3 /*
   4    This plugin allows to upload files, which then get inserted as wiki
   5    pages. It accepts plain text files, but also ZIP files or tarballs
   6    containing multiple ones (if your server can 'tar' or 'unzip'). Also
   7    it can 'parse' html files and gives acceptable results for them.
   8
   9    It may also convert files from proprietary word processing formats, if
  10    you have the according progams available server-side. But only enable
  11    this for filters you really have, as you otherwise could end up with
  12    empty pages. Eventually this runs reliable on Unix systems only.
  13
  14    To make it run on Win4/NT systems, you'll need to uncomment unsupported
  15    text filters (often all), and set _UNTAR and _UNZIP to more useful
  16    values. _UNZIP is also needed for reading OpenOffice files.
  17 */
  18
  19
  20 #-- are following tools available?
  21 define("EWIKI_UNTAR", "tar");           #-- GNU/Linux tar, Cygwin tar.exe
  22 define("EWIKI_UNZIP", "unzip");         #-- unzip or pkunzip.exe
  23 #-- else
  24 @define("EWIKI_UNTAR", 0);
  25 @define("EWIKI_UNZIP", 0);
  26 #-- additional settings
  27 if (DIRECTORY_SEPARATOR=="/") {
  28    define("EWIKI_DEV_STDOUT", "/dev/stdout");     #-- Unix
  29 } else {
  30    define("EWIKI_DEV_STDOUT", "CON");             #-- DOS
  31 }
  32
  33
  34 #-- filter table (Unix rules!)
  35 $ewiki_textfilters = array(
  36    array("text/plain", "text/plain", "cat %f"),
  37    array("application/x-msword", "text/wiki", "wvWiki %f -"),
  38    array("application/x-msword", "text/html", "wvHtml %f -"),
  39    array("application/x-msword", "text/html", "word2x -f html %f"),
  40    array("application/x-msword", "text/plain", "antiword -t %f"),
  41    array("application/x-wordperfect", "text/html", "wpd2html %f"),
  42    array("application/pdf", "text/html", "pdftotext -htmlmeta -q %f"),
  43    array("application/x-latex", "text/html", "latex2html < %f"),
  44    array("x.file-ext/x.docbook", "text/html", "docbook2html --nochunks %f"),
  45    array("text/html", "text/html", "tidy -quiet -latin1 %f"),
  46    array("text/xhtml", "text/html", "tidy -quiet -latin1 %f"),
  47    array("application/vnd.sun.xml.writer", "text/x.office.content.xml", EWIKI_UNZIP." -p %f content.xml"),
  48 #  array("text/xml+docbook", "text/wiki", "db2html %f -"), ????
  49 #  array("*/*", "application/postscript", "a2ps -q"),
  50    array("application/postscript", "text/plain", "pstotext"),
  51 );
  52 $mime_ext["docbook"] = "x.file-ext/x.docbook";
  53 $mime_ext["db"] = "x.file-ext/x.docbook";
  54 $mime_ext["sgml"] = "x.file-ext/x.docbook";
  55 $mime_ext["mar"] = "x-multipart/parallel";
  56 /*
  57    In each line, the accepted input mime-type, and the resulting output
  58    type are listed. The command (third entry) must read the file "%f" as
  59    input (or from stdin), and send the entire output to stdout or "%o".
  60    "%f" and "%o" are placeholders, which are set automatically (eventually
  61    then read "/dev/stdin" and "/dev/stdout" with "<" or ">").
  62
  63    Some of these filters are usually already available with modern UNIX
  64    distros.  As fallback text data gets ripped out from binary files
  65    (garbage will remain in the page), or the file could be rejected
  66    completely.
  67 */
  68
  69
  70 #-- plugin registration
  71 $ewiki_plugins["page"]["TextUpload"] = "ewiki_page_textupload";
  72
  73
  74
  75
  76 /*  this prints the
  77     upload <form>
  78 */
  79 function ewiki_page_textupload($id, $data, $action) {
  80
  81    $o = ewiki_make_title($id, $id, 2);
  82
  83    if (empty($_FILES["upload_text_file"])) {
  84
  85       $ACCEPT="text/plain,text/wiki,text/html,text/*,application/x-tar,application/x-gtar,application/x-ustar,application/zip";
  86
  87       $url = ewiki_script("", $id);
  88       $o .= ewiki_t(<<<END
  89 Use this upload form to insert text files as pages into the Wiki. This
  90 function also has super cow powers and can extract multiple files from a zip
  91 archive or tarball (compressed or not).
  92 <br />
  93 <br />
  94 <form action="$url" method="POST" enctype="multipart/form-data">
  95   file <input type="file" name="upload_text_file" accept-type="$ACCEPT">
  96    <small><br /><br /></small>
  97   <input type="submit" value="store into Wiki">
  98    <br /><br />
  99   <input type="checkbox" name="textfile_overwrite_pages" value="1" checked="checked"> overwrite existing page
 100    <br />
 101   <input type="checkbox" name="textfile_assume_text" value="1"> assume file is text/plain,
 102   <input type="checkbox" name="textfile_noext_is_text" value="1" checked="checked"> if no .ext
 103    <br />
 104   <input type="checkbox" name="textfile_brute_force" value="1"> brute-force extract text data from binary file
 105    <br /><br />
 106   strip <select name="textfile_strip_ext"><option value="0">no</option><option value="1" selected="selected">last</option><option value="2">all</option></select> file name extension(s), and use the remaining string as destination page name
 107    <br />
 108   or store file as <input type="text" name="textfile_saveas" size="22"> (page name)
 109 </form>
 110 END
 111       );
 112    }
 113    else {
 114       $o .= ewiki_textfile_save($_FILES["upload_text_file"]);
 115    }
 116
 117    return($o);
 118 }
 119
 120
 121 /*  This code is responsible for checking the parameters of uploaded
 122     $_FILES, unpacking zip archives and tarballs, and finally converting
 123     (from *.* into text/wiki) and storing individual files as wikipages
 124     into the database.
 125 */
 126 function ewiki_textfile_save($file=array()) {
 127
 128    #set_time_limit(+30);
 129    $o = "";
 130
 131    #-- upload file vars
 132    $fn = $file["tmp_name"];
 133    $fn_orig = $file["name"];
 134    $mime = $file["type"];
 135
 136    #-- pre-guess content
 137    if ($_REQUEST["textfile_assume_text"] && !strpos($fn_orig, ".") && ($mime=="application/octet-stream")) {
 138       $mime = "text/plain";
 139    }
 140
 141    #-- is current file an archive?
 142    if (strpos($fn_orig,".sx")) {
 143       $mime = "application/vnd.sun.xml.writer";
 144    }
 145    $untar = (preg_match("#^application/(x-)?(g|us)tar$#", $mime) || preg_match("#\.tar|\.tgz#", $fn_orig)) ? EWIKI_UNTAR : "";
 146    $unzip = (($mime=="application/zip") || strpos($file["name"],".zip")) ? EWIKI_UNZIP : "";
 147    $multimime = (strstr($mime, "multipart/"));
 148
 149    #-- tarball or zip archive ------------------------------------------------
 150    if ($untar || $unzip) {
 151
 152       #-- create temporary dir
 153       $tmp_dir = EWIKI_TMP."/ewiki-txtupl-$untar$unzip-".time()."-".rand(0,523555).".tmp.d";
 154       mkdir($tmp_dir);
 155       $cwd = getcwd(); chdir($tmp_dir);
 156
 157       #-- archive extraction
 158       if ($untar) {
 159                      { exec("$untar xzf '$fn'", $uu, $error); }
 160          if ($error) { exec("$untar xjf '$fn'", $uu, $error); }
 161          if ($error) { exec("$untar xf '$fn'", $uu, $error); }
 162       }
 163       elseif ($unzip) {
 164          `$unzip "$fn"`;
 165       }
 166
 167       #-- go throgh directory
 168       chdir($cwd);
 169       $o .= ewiki_textupload_readdir($tmp_dir);
 170
 171       #-- remove temporary directory
 172       `rm -rf "$tmp_dir"`;
 173
 174    }
 175
 176    #-- multipart/ mime archive -----------------------------------------------
 177    elseif ($multimime) {
 178    }
 179
 180    #-- plain file --------------------------------------------------------
 181    else {
 182
 183       #-- extract wiki content from file
 184       $content = ewiki_textfile_convert(
 185          $fn, $fn_orig, $mime,
 186          $_REQUEST["textfile_brute_force"],
 187          $_REQUEST["textfile_assume_text"],
 188          $_REQUEST["textfile_noext_is_text"]
 189       );
 190
 191       #-- make short filename
 192       $fn_orig = strtr($fn_orig, "\\", "/");
 193       if ($p = strrpos($fn_orig, "/")) {
 194          $fn_orig = substr($fn_orig, $p+1);
 195       }
 196       if (!$fn_orig)  {
 197          $fn_orig = "";
 198       }
 199
 200       #-- destination filename
 201       $dest_id = trim($_REQUEST["textfile_saveas"]);
 202       if (!$dest_id) {
 203          $dest_id = trim(trim($fn_orig), ".");
 204          if ($_REQUEST["textfile_strip_ext"] == 2) {
 205             $dest_id = strtok($fn_orig, ".");
 206          }
 207          if ($_REQUEST["textfile_strip_ext"] == 1) {
 208            if ($p = strrpos($dest_id, ".")) {
 209               $dest_id = substr($dest_id, 0, $p);
 210            }
 211          }
 212          $dest_id = trim($dest_id);
 213       }
 214
 215       #-- reject
 216       if (!$dest_id) {
 217          return($o . "· Could not store '$fn_orig', please specify a page name to use as destination.<br />\n");
 218       }
 219
 220
 221       #-- store -----------------------------------------------------------
 222       if ($content) {
 223          $ahref_dest = '<a href="' . ewiki_script("",$dest_id) . '">' . $dest_id . '</a>';
 224
 225          $data = ewiki_db::GET($dest_id);
 226          if ($data && !$_REQUEST["textfile_overwrite_pages"]) {
 227             $o .= "· did not overwrite existing page '$ahref_dest' with content from file '$fn_orig'<br />\n";
 228          }
 229          else {
 230             if (empty($data)) {
 231                $data = array(
 232                   "id" => $dest_id,
 233                   "version" => 0,
 234                   "created" => time(),
 235                   "meta" => "",
 236                   "flags" => EWIKI_DB_F_TEXT,
 237                   "refs" => "",
 238                   "hits" => 0,
 239                );
 240             }
 241             $data["version"]++;
 242             $data["lastmodified"] = time();
 243             $data["author"] = ewiki_author("TextUpload");
 244             $data["content"] = $content;
 245             ewiki_scan_wikiwords($data["content"], $ewiki_links, "_STRIP_EMAIL=1");
 246             $data["refs"] = "\n\n".implode("\n", array_keys($ewiki_links))."\n\n";
 247
 248             if (ewiki_db::WRITE($data)) {
 249                $o .= "· extracted text from '$fn_orig' into page '$ahref_dest'<br />\n";
 250
 251 #<debug>#  $o .= "<br /><br /><h1>src</h1>" . ($data["content"])."<h1>page</h1>" . ewiki_format($data["content"]);
 252             }
 253             else {
 254                $o .= "· database error occoured, when writing to '$ahref_dest' from file '$fn_orig'<br />\n";
 255             }
 256          }
 257       }
 258       else {
 259          $o .= "· couldn't detect format (and text content) of '$fn_orig'<br />\n";
 260       }
 261
 262    }
 263
 264    return($o);
 265 }
 266
 267
 268
 269 /*  reads a directory (from unpackked tarballs), and re-calls the
 270     _textfile_save() function for storing individual files.
 271 */
 272 function ewiki_textupload_readdir($tmp_dir) {
 273
 274    $o = "";
 275
 276    $dh = opendir($tmp_dir);
 277    while ($fn = readdir($dh)) {
 278
 279       if ($fn[0]==".") {
 280          continue;
 281       }
 282       elseif (is_dir("$tmp_dir/$fn")) {
 283          $o .= ewiki_textupload_readdir("$tmp_dir/$fn");
 284       }
 285       else {
 286          $o .= ewiki_textfile_save(array(
 287             "tmp_name" => "$tmp_dir/$fn",
 288             "name" => "$fn",
 289             "type" => ewiki_get_mime_type("$tmp_dir/$fn"),
 290          ));
 291       }
 292    }
 293    closedir($dh);
 294
 295    return($o);
 296 }
 297
 298
 299 #==========================================================================
 300
 301
 302 /*  Guesses a files mime type using magic data, or the file extension
 303     mapping list in /etc/mime.types
 304 */
 305 function ewiki_get_mime_type($fn) {
 306
 307    global $mime_ext;
 308
 309    #-- default
 310    $mime = "application/octet-stream";
 311
 312    #-- by content
 313    if (function_exists("mime_content_type")) {
 314       $mime = mime_content_type($fn);
 315    }
 316
 317    #-- by ext
 318    if (($mime == "application/octet-stream") && strpos($fn, ".")) {
 319       if (empty($mime_ext) && ($list=file("/etc/mime.types")))
 320       foreach ($list as $line) {
 321          $line = trim($line);
 322          $m = strtok($line, " \t");
 323          if (strpos($m, "/") && $e=explode(" ", trim(strtr(strtok("\n"), ".\t", "  "))) ) {
 324             foreach ($e as $ext) if ($ext) {
 325                $mime_ext[$ext] = $m;
 326             }
 327          }
 328       }
 329       $ext = explode(".", $fn);  unset($ext[0]);
 330       foreach ($ext as $e) {
 331          if ($m = $mime_ext[$e]) {
 332             $mime = $m;
 333          }
 334       }
 335    }
 336
 337    return($mime);
 338 }
 339
 340
 341
 342 #==========================================================================
 343
 344
 345 /*  This function tries to convert a uploaded plain file into a text/plain
 346     (we here call it text/wiki) page. It uses some filters to convert from
 347     file format to file format (usually converting into text/html and then
 348     into Wiki format).
 349 */
 350 function ewiki_textfile_convert($fn, $fn_orig, $mime, $extr_bin=0, $is_text=0, $noext2text=1)
 351 {
 352    global $ewiki_textfilters;
 353
 354    #-- handled by ewiki_unformat()
 355    $html_variants = array(
 356       "text/html", "text/xhtml", "text/wap", "application/vnd.wap.wml",
 357       "text/xml+html",
 358       "text/x.office.content.xml",  # an extracted OpenOffice content.xml
 359    );
 360
 361    #-- read in complete file
 362    if ($f = fopen($fn, "rb")) {
 363       $content = fread($f, 1<<18);  #-- max 256K
 364       fclose($f);
 365    }
 366    else {
 367       return(false);
 368    }
 369
 370    #-- get mime-type
 371    if ($mime == "application/octet-stream") {
 372       $mime = ewiki_get_mime_type($fn_orig);
 373    }
 374    if ($is_text && ($mime == "application/octet-stream")) {
 375       $mime = "text/plain";
 376    }
 377    if ($noext2text && !strpos($fn_orig,".") && ($mime == "application/octet-stream")) {
 378       $mime = "text/plain";
 379    }
 380
 381    #-- go thru installed filters
 382    foreach ($ewiki_textfilters as $filter_row) {
 383       list($f_from, $f_into, $f_prog) = $filter_row;
 384       if (($f_from==$mime) || ($f_from=="*/*")) {
 385
 386          $tmpf = EWIKI_TMP."/ewiki-txtupl-filter-".time()."-".rand(0,564595).".tmp";
 387          if ($f = fopen($tmpf, "wb")) {
 388             fwrite($f, $content);
 389             fclose($f);
 390          }
 391          else { continue; }
 392
 393          #-- replace placeholders "%f" and "%o"
 394          if (!strpos($f_prog, "%f")) {
 395             $f_prog .= " < '%f' ";
 396          }
 397          $f_prog = str_replace("%o", EWIKI_DEV_STDOUT, $f_prog);
 398          $f_prog = str_replace("%f", $tmpf, $f_prog);
 399
 400          #-- exec, unlink temporary file
 401          $new_content = `$f_prog`;
 402          unlink($tmpf);
 403
 404          #-- success?
 405          if ($new_content) {
 406             $content = $new_content;
 407             $mime = $f_into;
 408             unset($new_content);
 409          }
 410       }
 411    }
 412
 413 #
 414 #...
 415 #
 416
 417    #-- brute force text extraction from binary files
 418    if ($extr_bin && (strtok($mime, "/") == "application")) {
 419       # ??? #
 420       preg_match_all("/([\n\r\t\040-\176\177-\237\260-\377]{7,})/", $content, $uu);
 421       if ($uu) {
 422          $content = implode("\n", $uu[1]);
 423          $mime = "text/plain";
 424       }
 425    }
 426
 427    #-- HTML->Wiki-source transformation
 428    if (in_array($mime, $html_variants)) {
 429       $content = ewiki_unformat($content);
 430       $mime = "text/wiki";
 431    }
 432
 433    #-- file reached text status
 434    if ($mime == "text/plain") {
 435       #- this is simple
 436       $mime = "text/wiki";
 437    }
 438
 439    #-- finish
 440    if ($mime == "text/wiki") {
 441       return($content);
 442    }
 443 }
 444
 445
 446
 447 #===========================================================================
 448
 449 /****************
 450 #echo
 451  ewiki_unformat('
 452 <html>
 453 <body>
 454
 455 <h2>HeadLine</h2>
 456
 457 See also <a href="WikiText.html">WikiText</a> or somehwere __else__.
 458
 459 <span class="para">We\'ll expect some magic here?</span>
 460
 461 <ul>
 462   <li> list ==entry== 1
 463   <li> list entry **2**
 464   <ol><li> list entry 2-1</ol>
 465 </ul>
 466
 467 </body>
 468 </html>
 469 ');
 470 ********************/
 471
 472
 473 /*  This function is believed to backconvert pages from .html into
 474     WikiMarkup. As it shall be used to convert any .html file (and not only
 475     ewiki exported ones), it cannot convert tables back (think of layout
 476     tables).
 477     It has limited support for openoffice xml (for full support this needed
 478     to be an xml parser).
 479 */
 480 function ewiki_unformat($html) {
 481
 482    $src = "";
 483
 484    $tagmap = array(
 485       "b" => "**",
 486       "i" => "''",
 487       "strong" => "__",
 488       "em" => "''",
 489       "tt" => "==",
 490       "big" => "##",
 491       "small" => "µµ",
 492       "sup" => "^^",
 493       "br" => "\n%%%\n",
 494       "hr" => "\n\n----\n\n",
 495    );
 496    $standalone_tags = array(
 497       "img", "br", "hr", "input", "meta", "link",
 498    );
 499    $xml = array();
 500    $xml_i = false;
 501
 502 #-- TODO
 503 # table
 504 # pre
 505 # ..
 506
 507
 508
 509    #-- walk through all tags ----------------------------------------------
 510    $tag_level = 0;
 511    $close_css = array();
 512    $len = strlen($html);
 513    $pos = 0;
 514    $loop = 500;
 515    $in_table = 0;   // ignore such??
 516    $in_pre = 0;
 517    $in_list = 0;
 518    $list = "";
 519    while (($pos < $len) && $loop--) {
 520
 521       #-- decode step by step
 522       list($pretext, $tagstr, $tagattr) = ewiki_htmlparser_get($html,$pos,$len,$in_pre);
 523       $tagname = ltrim($tagstr, "/");
 524
 525       #-- add pre-text (no linebreaks in it)
 526       if ($pretext) {
 527          $src .= $pretext;
 528       }
 529       $src .= $post;
 530       $post = "";
 531
 532       #-- handle things we have WikiMarkup for
 533       if ($tagstr) switch ($tagstr) {
 534
 535          #-- paragraphes
 536          case "p":
 537             $src .= "\n";
 538             $tag_level=0; $close_css=array();
 539             break;
 540          case "/p":
 541             $src .= "\n\n";
 542             break;
 543
 544          #-- headlines
 545          case "h1":
 546          case "h2":
 547             $src .= "\n\n!!! "; break;
 548          case "h3":
 549             $src .= "\n\n!! "; break;
 550          case "h4":
 551             $src .= "\n\n! "; break;
 552          case "h5":
 553          case "h6":
 554             $src .= "\n\n__"; break;
 555
 556          case "/h1":
 557          case "/h2":
 558          case "/h3":
 559          case "/h4":
 560          case "/h5":
 561             $src .= "\n\n"; break;
 562          case "/h5":
 563          case "/h6":
 564             $src .= "__\n\n"; break;
 565
 566
 567          #-- lists
 568          case "ul":
 569          case "ol":
 570             if (!$in_list) {
 571                $src .= "\n\n";
 572             }
 573             $in_list++;
 574             $list .= ($tagstr=="ul") ? "*" : "#";
 575             break;
 576
 577          case "/ul":
 578          case "/ol":
 579             $in_list--;
 580             $list = substr($list, 0, $in_list);
 581             if (!$in_list) {
 582                $src .= "\n\n";
 583             }
 584             break;
 585
 586          case "li":
 587             $src .= "\n" . $list;
 588
 589
 590          #-- hyperlinks
 591          case "a":
 592             $name = $tagattr["name"];
 593             $href = $tagattr["href"];
 594             if ($href || $name) {
 595                $text = "";
 596                do {
 597                   list($t,$tagstr,$tagattr) = ewiki_htmlparser_get($html,$pos,$len);
 598                   $text = trim("$text$t");
 599                }
 600                while ($tagstr!="/a");
 601
 602                if (empty($text)) {
 603                   $text = "$name$href";
 604                }
 605
 606                #-- define anchor
 607                if ($name) {
 608                   $src .= "[#$name \"$text\"]";
 609                }
 610                #-- link to anchor
 611                elseif ($href[0] == "#") {
 612                   $src .= "[.#$href \"$text\"]";
 613                }
 614                #-- hyperlink
 615                else {
 616                   #-- check for InterWikiLink
 617                   foreach ($ewiki_config["interwiki"] as $abbr=>$url) {
 618                      $url = str_replace("%s", "", $url);
 619                      if (substr($href, 0, strlen($url)) === $url) {
 620                         $href = "?id=".$abbr.":".substr($href, strlen($url));
 621                      }
 622                   }
 623                   #-- binary link (should rarely happen)
 624                   if ($p=strpos($href, EWIKI_IDF_INTERNAL)) {
 625                      $href = strtok(substr($href, $p), "&");
 626                      $src .= "[$href \"$text\"]";
 627                   }
 628                   #-- www link
 629                   elseif (strpos($href, "://")) {
 630                      if ($href == $text) {
 631                         $src .= "$href";
 632                      } else {
 633                         $src .= "[$href \"$text\"]";
 634                      }
 635                   }
 636                   else {
 637                      $wikilink = "";
 638                      #-- ewiki URL
 639                      if (preg_match('#\?(?:id|page|file|name)=(.+)(&(?>!amp;)|$)#', urldecode($href), $uu)) {
 640                         $wikilink = $uu[1];
 641                      }
 642                      #-- ewiki .html export filenames
 643                      elseif (preg_match('#^([^/:]+)(\.html?)?$#', urldecode($href), $uu)) {
 644                         $wikilink = $uu[1];
 645                      }
 646                      #-- looks like wikilink
 647                      if ($wikilink) {
 648                         if (strpos($wikilink, "view/")===0) {
 649                            $wikilink = substr($wikilink, 5);
 650                            $src .= "[$text|$wikilink]";
 651                         }
 652                         if (($wikilink == $text) || ($wikilink == str_replace(" ", "", $text))) {
 653                            if (preg_match('/(['.EWIKI_CHARS_U.']+['.EWIKI_CHARS_L.']+){2}/', $wikilink)) {
 654                               $src .= $wikilink;
 655                            }
 656                            else {
 657                               $src .= "[$wikilink]";
 658                            }
 659                         }
 660                         else {
 661                            $src .= "[$wikilink \"$text\"]";
 662                         }
 663                      }
 664                      #-- absolute URL
 665                      elseif ($href[0] == "/") {
 666                         $src .= "[url:$href \"$text\"]";
 667                      }
 668                      #-- should eventually drop this
 669                      else {
 670                         $src .= "[$href \"$text\"]";
 671                      }
 672                   }
 673                }
 674             }
 675             break;
 676
 677
 678          #-- images
 679          case "img":
 680             if ($href = $tagattr["src"]) {
 681                ($alt = $tagattr["alt"]) or ($alt = $tagattr["title"]) or ($alt = "<img>");
 682                $src .= "[$alt|$src]";
 683             }
 684             break;
 685
 686
 687          #-- yet unsupported
 688          case "code":
 689             if ($end = strpos($html, '</code', $pos)) {
 690                $end = strpos($html, '>', $end);
 691                $src .= "\n\n<code>" . substr($html, $pos, $end-$pos);
 692                $pos = $end + 1;
 693             }
 694             break;
 695
 696
 697          #-- pre
 698          case "pre":
 699             $src .= "\n<pre>\n";
 700             $in_pre = 1;
 701             break;
 702          case "/pre":
 703             $src .= "\n</pre>\n";
 704             $in_pre = 0;
 705             break;
 706
 707
 708          #-- OpenOffice -----------------------------------------------------
 709          case "office:document-content":
 710             if ($tagattr["xmlns:office"] && $tagattr["xmlns:text"] && ($tagattr["office:class"]=="text")) {
 711                $xml["office"] = 1;
 712             }
 713             break;
 714          #-- formatting
 715          case "style:style":
 716          case "style:properties":
 717             if ($xml["office"]) {
 718                if ($uu = $tagattr["style:name"]) {
 719                   $xml_i = $uu;  # style selector
 720                   $xml[$uu] = array();
 721                }
 722                if ("bold" == $tagattr["fo:font-weight"]) {
 723                   $xml[$xml_i][0] .= "__";
 724                }
 725                if ("italic" == $tagattr["fo:font-style"]) {
 726                   $xml[$xml_i][0] .= "''";
 727                }
 728                if (strpos($tagattr["style:parent-style-name"], "eadline")) {
 729                   $xml[$xml_i][1] = "\n!";
 730                }
 731             }
 732             break;
 733          #-- content
 734          case "text:p":
 735          case "text:span":
 736             $xml_i = $tagattr["text:style-name"];
 737             $src .= $xml[$xml_i][1]
 738                   . $xml[$xml_i][0];
 739             break;
 740          case "/text:p":
 741          case "/text:span":
 742             $src .= strrev($xml[$xml_i][0]);
 743             $xml_i == false;
 744             if (!$xml["list"] && ($tagstr == "/text:p")) {
 745                $src .= "\n";
 746             }
 747             break;
 748          #-- headlines
 749          case "text:h":
 750             $level = $tagattr["text:level"];
 751             $src .= "\n" . str_repeat("!", 1 + ($level>4)?1:0 + ($level>6)?1:0);
 752             break;
 753          case "/text:h":
 754             $src .= "\n";
 755             break;
 756          #-- links
 757          case "text:a":
 758             if ($href = $tagattr["xlink:href"]) {
 759                $src .= "[$href \"";# . $pretext;
 760             }
 761             break;
 762          case "/text:a":
 763             $src .= "\"]";
 764             break;
 765          #-- lists
 766          case "text:list-item":
 767             $src .= "\n" . $xml["list"] . " ";
 768             break;
 769          case "text:ordered-list":
 770             $xml["list"] .= "#";
 771             break;
 772          case "text:unordered-list":
 773             $xml["list"] .= "*";
 774             break;
 775          case "/text:ordered-list":
 776          case "/text:unordered-list":
 777             $xml["list"] = substr($xml["list"], 0, -1);
 778             $src .= "\n";  # there aren't nested lists in OO anyhow
 779             break;
 780
 781
 782          #-- anything else --------------------------------------------------
 783          default:
 784             #-- one of the standard tags?
 785             if ($add = @$tagmap[$tagname]) {
 786                $src .= $add;
 787             }
 788             break;
 789
 790       }#- switch(tag)
 791
 792
 793       #-- count tags
 794       if ($tagstr[0] == "/") {
 795          $tag_level--;
 796          if ($tag_level<0) { $tag_level=0; }
 797       }
 798       elseif (!in_array($tagname, $standalone_tags)) {
 799          $tag_level++;
 800       }
 801
 802       #-- markup_css
 803       if (($css=@$tagattr["class"]) || ($css=strtr($tagattr["class"], " \r\t\n", "    "))) {
 804          $css = strtr($css, " ", "-");
 805          $src .= "@@$css ";
 806          $close_css[$tag_level-1]++;
 807       }
 808       if (($css=@$tagattr["style"]) || ($css=strtr($tagattr["style"], " \r\t\n", "    "))) {
 809          $css = str_replace($css, " ", "");
 810          $src .= "@@$css ";
 811          $close_css[$tag_level-1]++;
 812       }
 813       while (@$close_css[$tag_level]) {
 814          $src .= "@@ ";
 815          $close_css[$tag_level]--;
 816       }
 817
 818       $src .= $post;
 819    }
 820
 821    return($src);
 822 }
 823
 824
 825 /*  Fetches (step by step) next html <tag> from the input string, and
 826     also returns text content prepending it.
 827 */
 828 function ewiki_htmlparser_get(&$html, &$pos, &$len, $pre=0) {
 829
 830       $text=$tagstr=$tagattr="";
 831
 832       #-- search next tag
 833       $l = strpos($html, "<", $pos);
 834       $r = strpos($html, ">", $l);
 835       if (($l===false) or ($r===false)) {
 836          #-- finish
 837          $text = substr($html, $pos);
 838          $pos = $len;
 839       }
 840
 841       #-- text part
 842       if ($l >= $pos) {
 843          $text = substr($html, $pos, $l-$pos);
 844          if (!$pre) {
 845             $text = strtr($text, "\r\n", "  ");
 846          }
 847          $pos = $l;
 848       }
 849
 850       #-- any tag here?
 851       if ($r >= $pos) {
 852          $pos = $r + 1;
 853          $tag = substr($html, $l + 1, $r - $l - 1);
 854
 855          #-- split into name and attributes
 856          $tagstr = strtolower(rtrim(strtok($tag, " \t\n>"),"/"));
 857          $tagattr = array();
 858          if (($tattr=strtok(">")) && strpos($tattr,"=")) {
 859             preg_match_all('/([-:\w]+)=(\".*?\"|[^\s]+)/', $tag, $uu);
 860             if ($uu) {
 861                foreach ($uu[1] as $i=>$a) {
 862                   $tagattr[$uu[1][$i]] = trim($uu[2][$i], '"');
 863                }
 864             }
 865          }
 866       }#- tag
 867
 868    return(   array($text, $tagstr, $tagattr)   );
 869 }
 870
 871
 872 ?>