4 This plugin allows to upload files, which then get inserted as wiki
5 pages. It accepts plain text files, but also ZIP files or tarballs
6 containing multiple ones (if your server can 'tar' or 'unzip'). Also
7 it can 'parse' html files and gives acceptable results for them.
9 It may also convert files from proprietary word processing formats, if
10 you have the according progams available server-side. But only enable
11 this for filters you really have, as you otherwise could end up with
12 empty pages. Eventually this runs reliable on Unix systems only.
14 To make it run on Win4/NT systems, you'll need to uncomment unsupported
15 text filters (often all), and set _UNTAR and _UNZIP to more useful
16 values. _UNZIP is also needed for reading OpenOffice files.
20 #-- are following tools available?
21 define("EWIKI_UNTAR", "tar"); #-- GNU/Linux tar, Cygwin tar.exe
22 define("EWIKI_UNZIP", "unzip"); #-- unzip or pkunzip.exe
24 @define("EWIKI_UNTAR", 0);
25 @define("EWIKI_UNZIP", 0);
26 #-- additional settings
27 if (DIRECTORY_SEPARATOR=="/") {
28 define("EWIKI_DEV_STDOUT", "/dev/stdout"); #-- Unix
30 define("EWIKI_DEV_STDOUT", "CON"); #-- DOS
34 #-- filter table (Unix rules!)
35 $ewiki_textfilters = array(
36 array("text/plain", "text/plain", "cat %f"),
37 array("application/x-msword", "text/wiki", "wvWiki %f -"),
38 array("application/x-msword", "text/html", "wvHtml %f -"),
39 array("application/x-msword", "text/html", "word2x -f html %f"),
40 array("application/x-msword", "text/plain", "antiword -t %f"),
41 array("application/x-wordperfect", "text/html", "wpd2html %f"),
42 array("application/pdf", "text/html", "pdftotext -htmlmeta -q %f"),
43 array("application/x-latex", "text/html", "latex2html < %f"),
44 array("x.file-ext/x.docbook", "text/html", "docbook2html --nochunks %f"),
45 array("text/html", "text/html", "tidy -quiet -latin1 %f"),
46 array("text/xhtml", "text/html", "tidy -quiet -latin1 %f"),
47 array("application/vnd.sun.xml.writer", "text/x.office.content.xml", EWIKI_UNZIP." -p %f content.xml"),
48 # array("text/xml+docbook", "text/wiki", "db2html %f -"), ????
49 # array("*/*", "application/postscript", "a2ps -q"),
50 array("application/postscript", "text/plain", "pstotext"),
52 $mime_ext["docbook"] = "x.file-ext/x.docbook";
53 $mime_ext["db"] = "x.file-ext/x.docbook";
54 $mime_ext["sgml"] = "x.file-ext/x.docbook";
55 $mime_ext["mar"] = "x-multipart/parallel";
57 In each line, the accepted input mime-type, and the resulting output
58 type are listed. The command (third entry) must read the file "%f" as
59 input (or from stdin), and send the entire output to stdout or "%o".
60 "%f" and "%o" are placeholders, which are set automatically (eventually
61 then read "/dev/stdin" and "/dev/stdout" with "<" or ">").
63 Some of these filters are usually already available with modern UNIX
64 distros. As fallback text data gets ripped out from binary files
65 (garbage will remain in the page), or the file could be rejected
70 #-- plugin registration
71 $ewiki_plugins["page"]["TextUpload"] = "ewiki_page_textupload";
79 function ewiki_page_textupload($id, $data, $action) {
81 $o = ewiki_make_title($id, $id, 2);
83 if (empty($_FILES["upload_text_file"])) {
85 $ACCEPT="text/plain,text/wiki,text/html,text/*,application/x-tar,application/x-gtar,application/x-ustar,application/zip";
87 $url = ewiki_script("", $id);
89 Use this upload form to insert text files as pages into the Wiki. This
90 function also has super cow powers and can extract multiple files from a zip
91 archive or tarball (compressed or not).
94 <form action="$url" method="POST" enctype="multipart/form-data">
95 file <input type="file" name="upload_text_file" accept-type="$ACCEPT">
96 <small><br /><br /></small>
97 <input type="submit" value="store into Wiki">
99 <input type="checkbox" name="textfile_overwrite_pages" value="1" checked="checked"> overwrite existing page
101 <input type="checkbox" name="textfile_assume_text" value="1"> assume file is text/plain,
102 <input type="checkbox" name="textfile_noext_is_text" value="1" checked="checked"> if no .ext
104 <input type="checkbox" name="textfile_brute_force" value="1"> brute-force extract text data from binary file
106 strip <select name="textfile_strip_ext"><option value="0">no</option><option value="1" selected="selected">last</option><option value="2">all</option></select> file name extension(s), and use the remaining string as destination page name
108 or store file as <input type="text" name="textfile_saveas" size="22"> (page name)
114 $o .= ewiki_textfile_save($_FILES["upload_text_file"]);
121 /* This code is responsible for checking the parameters of uploaded
122 $_FILES, unpacking zip archives and tarballs, and finally converting
123 (from *.* into text/wiki) and storing individual files as wikipages
126 function ewiki_textfile_save($file=array()) {
128 #set_time_limit(+30);
132 $fn = $file["tmp_name"];
133 $fn_orig = $file["name"];
134 $mime = $file["type"];
136 #-- pre-guess content
137 if ($_REQUEST["textfile_assume_text"] && !strpos($fn_orig, ".") && ($mime=="application/octet-stream")) {
138 $mime = "text/plain";
141 #-- is current file an archive?
142 if (strpos($fn_orig,".sx")) {
143 $mime = "application/vnd.sun.xml.writer";
145 $untar = (preg_match("#^application/(x-)?(g|us)tar$#", $mime) || preg_match("#\.tar|\.tgz#", $fn_orig)) ? EWIKI_UNTAR : "";
146 $unzip = (($mime=="application/zip") || strpos($file["name"],".zip")) ? EWIKI_UNZIP : "";
147 $multimime = (strstr($mime, "multipart/"));
149 #-- tarball or zip archive ------------------------------------------------
150 if ($untar || $unzip) {
152 #-- create temporary dir
153 $tmp_dir = EWIKI_TMP."/ewiki-txtupl-$untar$unzip-".time()."-".rand(0,523555).".tmp.d";
155 $cwd = getcwd(); chdir($tmp_dir);
157 #-- archive extraction
159 { exec("$untar xzf '$fn'", $uu, $error); }
160 if ($error) { exec("$untar xjf '$fn'", $uu, $error); }
161 if ($error) { exec("$untar xf '$fn'", $uu, $error); }
167 #-- go throgh directory
169 $o .= ewiki_textupload_readdir($tmp_dir);
171 #-- remove temporary directory
176 #-- multipart/ mime archive -----------------------------------------------
177 elseif ($multimime) {
180 #-- plain file --------------------------------------------------------
183 #-- extract wiki content from file
184 $content = ewiki_textfile_convert(
185 $fn, $fn_orig, $mime,
186 $_REQUEST["textfile_brute_force"],
187 $_REQUEST["textfile_assume_text"],
188 $_REQUEST["textfile_noext_is_text"]
191 #-- make short filename
192 $fn_orig = strtr($fn_orig, "\\", "/");
193 if ($p = strrpos($fn_orig, "/")) {
194 $fn_orig = substr($fn_orig, $p+1);
200 #-- destination filename
201 $dest_id = trim($_REQUEST["textfile_saveas"]);
203 $dest_id = trim(trim($fn_orig), ".");
204 if ($_REQUEST["textfile_strip_ext"] == 2) {
205 $dest_id = strtok($fn_orig, ".");
207 if ($_REQUEST["textfile_strip_ext"] == 1) {
208 if ($p = strrpos($dest_id, ".")) {
209 $dest_id = substr($dest_id, 0, $p);
212 $dest_id = trim($dest_id);
217 return($o . "· Could not store '$fn_orig', please specify a page name to use as destination.<br />\n");
221 #-- store -----------------------------------------------------------
223 $ahref_dest = '<a href="' . ewiki_script("",$dest_id) . '">' . $dest_id . '</a>';
225 $data = ewiki_db::GET($dest_id);
226 if ($data && !$_REQUEST["textfile_overwrite_pages"]) {
227 $o .= "· did not overwrite existing page '$ahref_dest' with content from file '$fn_orig'<br />\n";
236 "flags" => EWIKI_DB_F_TEXT,
242 $data["lastmodified"] = time();
243 $data["author"] = ewiki_author("TextUpload");
244 $data["content"] = $content;
245 ewiki_scan_wikiwords($data["content"], $ewiki_links, "_STRIP_EMAIL=1");
246 $data["refs"] = "\n\n".implode("\n", array_keys($ewiki_links))."\n\n";
248 if (ewiki_db::WRITE($data)) {
249 $o .= "· extracted text from '$fn_orig' into page '$ahref_dest'<br />\n";
251 #<debug># $o .= "<br /><br /><h1>src</h1>" . ($data["content"])."<h1>page</h1>" . ewiki_format($data["content"]);
254 $o .= "· database error occoured, when writing to '$ahref_dest' from file '$fn_orig'<br />\n";
259 $o .= "· couldn't detect format (and text content) of '$fn_orig'<br />\n";
269 /* reads a directory (from unpackked tarballs), and re-calls the
270 _textfile_save() function for storing individual files.
272 function ewiki_textupload_readdir($tmp_dir) {
276 $dh = opendir($tmp_dir);
277 while ($fn = readdir($dh)) {
282 elseif (is_dir("$tmp_dir/$fn")) {
283 $o .= ewiki_textupload_readdir("$tmp_dir/$fn");
286 $o .= ewiki_textfile_save(array(
287 "tmp_name" => "$tmp_dir/$fn",
289 "type" => ewiki_get_mime_type("$tmp_dir/$fn"),
299 #==========================================================================
302 /* Guesses a files mime type using magic data, or the file extension
303 mapping list in /etc/mime.types
305 function ewiki_get_mime_type($fn) {
310 $mime = "application/octet-stream";
313 if (function_exists("mime_content_type")) {
314 $mime = mime_content_type($fn);
318 if (($mime == "application/octet-stream") && strpos($fn, ".")) {
319 if (empty($mime_ext) && ($list=file("/etc/mime.types")))
320 foreach ($list as $line) {
322 $m = strtok($line, " \t");
323 if (strpos($m, "/") && $e=explode(" ", trim(strtr(strtok("\n"), ".\t", " "))) ) {
324 foreach ($e as $ext) if ($ext) {
325 $mime_ext[$ext] = $m;
329 $ext = explode(".", $fn); unset($ext[0]);
330 foreach ($ext as $e) {
331 if ($m = $mime_ext[$e]) {
342 #==========================================================================
345 /* This function tries to convert a uploaded plain file into a text/plain
346 (we here call it text/wiki) page. It uses some filters to convert from
347 file format to file format (usually converting into text/html and then
350 function ewiki_textfile_convert($fn, $fn_orig, $mime, $extr_bin=0, $is_text=0, $noext2text=1)
352 global $ewiki_textfilters;
354 #-- handled by ewiki_unformat()
355 $html_variants = array(
356 "text/html", "text/xhtml", "text/wap", "application/vnd.wap.wml",
358 "text/x.office.content.xml", # an extracted OpenOffice content.xml
361 #-- read in complete file
362 if ($f = fopen($fn, "rb")) {
363 $content = fread($f, 1<<18); #-- max 256K
371 if ($mime == "application/octet-stream") {
372 $mime = ewiki_get_mime_type($fn_orig);
374 if ($is_text && ($mime == "application/octet-stream")) {
375 $mime = "text/plain";
377 if ($noext2text && !strpos($fn_orig,".") && ($mime == "application/octet-stream")) {
378 $mime = "text/plain";
381 #-- go thru installed filters
382 foreach ($ewiki_textfilters as $filter_row) {
383 list($f_from, $f_into, $f_prog) = $filter_row;
384 if (($f_from==$mime) || ($f_from=="*/*")) {
386 $tmpf = EWIKI_TMP."/ewiki-txtupl-filter-".time()."-".rand(0,564595).".tmp";
387 if ($f = fopen($tmpf, "wb")) {
388 fwrite($f, $content);
393 #-- replace placeholders "%f" and "%o"
394 if (!strpos($f_prog, "%f")) {
395 $f_prog .= " < '%f' ";
397 $f_prog = str_replace("%o", EWIKI_DEV_STDOUT, $f_prog);
398 $f_prog = str_replace("%f", $tmpf, $f_prog);
400 #-- exec, unlink temporary file
401 $new_content = `$f_prog`;
406 $content = $new_content;
417 #-- brute force text extraction from binary files
418 if ($extr_bin && (strtok($mime, "/") == "application")) {
420 preg_match_all("/([\n\r\t\040-\176\177-\237\260-\377]{7,})/", $content, $uu);
422 $content = implode("\n", $uu[1]);
423 $mime = "text/plain";
427 #-- HTML->Wiki-source transformation
428 if (in_array($mime, $html_variants)) {
429 $content = ewiki_unformat($content);
433 #-- file reached text status
434 if ($mime == "text/plain") {
440 if ($mime == "text/wiki") {
447 #===========================================================================
457 See also <a href="WikiText.html">WikiText</a> or somehwere __else__.
459 <span class="para">We\'ll expect some magic here?</span>
462 <li> list ==entry== 1
463 <li> list entry **2**
464 <ol><li> list entry 2-1</ol>
470 ********************/
473 /* This function is believed to backconvert pages from .html into
474 WikiMarkup. As it shall be used to convert any .html file (and not only
475 ewiki exported ones), it cannot convert tables back (think of layout
477 It has limited support for openoffice xml (for full support this needed
478 to be an xml parser).
480 function ewiki_unformat($html) {
494 "hr" => "\n\n----\n\n",
496 $standalone_tags = array(
497 "img", "br", "hr", "input", "meta", "link",
509 #-- walk through all tags ----------------------------------------------
511 $close_css = array();
512 $len = strlen($html);
515 $in_table = 0; // ignore such??
519 while (($pos < $len) && $loop--) {
521 #-- decode step by step
522 list($pretext, $tagstr, $tagattr) = ewiki_htmlparser_get($html,$pos,$len,$in_pre);
523 $tagname = ltrim($tagstr, "/");
525 #-- add pre-text (no linebreaks in it)
532 #-- handle things we have WikiMarkup for
533 if ($tagstr) switch ($tagstr) {
538 $tag_level=0; $close_css=array();
547 $src .= "\n\n!!! "; break;
549 $src .= "\n\n!! "; break;
551 $src .= "\n\n! "; break;
554 $src .= "\n\n__"; break;
561 $src .= "\n\n"; break;
564 $src .= "__\n\n"; break;
574 $list .= ($tagstr=="ul") ? "*" : "#";
580 $list = substr($list, 0, $in_list);
587 $src .= "\n" . $list;
592 $name = $tagattr["name"];
593 $href = $tagattr["href"];
594 if ($href || $name) {
597 list($t,$tagstr,$tagattr) = ewiki_htmlparser_get($html,$pos,$len);
598 $text = trim("$text$t");
600 while ($tagstr!="/a");
603 $text = "$name$href";
608 $src .= "[#$name \"$text\"]";
611 elseif ($href[0] == "#") {
612 $src .= "[.#$href \"$text\"]";
616 #-- check for InterWikiLink
617 foreach ($ewiki_config["interwiki"] as $abbr=>$url) {
618 $url = str_replace("%s", "", $url);
619 if (substr($href, 0, strlen($url)) === $url) {
620 $href = "?id=".$abbr.":".substr($href, strlen($url));
623 #-- binary link (should rarely happen)
624 if ($p=strpos($href, EWIKI_IDF_INTERNAL)) {
625 $href = strtok(substr($href, $p), "&");
626 $src .= "[$href \"$text\"]";
629 elseif (strpos($href, "://")) {
630 if ($href == $text) {
633 $src .= "[$href \"$text\"]";
639 if (preg_match('#\?(?:id|page|file|name)=(.+)(&(?>!amp;)|$)#', urldecode($href), $uu)) {
642 #-- ewiki .html export filenames
643 elseif (preg_match('#^([^/:]+)(\.html?)?$#', urldecode($href), $uu)) {
646 #-- looks like wikilink
648 if (strpos($wikilink, "view/")===0) {
649 $wikilink = substr($wikilink, 5);
650 $src .= "[$text|$wikilink]";
652 if (($wikilink == $text) || ($wikilink == str_replace(" ", "", $text))) {
653 if (preg_match('/(['.EWIKI_CHARS_U.']+['.EWIKI_CHARS_L.']+){2}/', $wikilink)) {
657 $src .= "[$wikilink]";
661 $src .= "[$wikilink \"$text\"]";
665 elseif ($href[0] == "/") {
666 $src .= "[url:$href \"$text\"]";
668 #-- should eventually drop this
670 $src .= "[$href \"$text\"]";
680 if ($href = $tagattr["src"]) {
681 ($alt = $tagattr["alt"]) or ($alt = $tagattr["title"]) or ($alt = "<img>");
682 $src .= "[$alt|$src]";
689 if ($end = strpos($html, '</code', $pos)) {
690 $end = strpos($html, '>', $end);
691 $src .= "\n\n<code>" . substr($html, $pos, $end-$pos);
703 $src .= "\n</pre>\n";
708 #-- OpenOffice -----------------------------------------------------
709 case "office:document-content":
710 if ($tagattr["xmlns:office"] && $tagattr["xmlns:text"] && ($tagattr["office:class"]=="text")) {
716 case "style:properties":
717 if ($xml["office"]) {
718 if ($uu = $tagattr["style:name"]) {
719 $xml_i = $uu; # style selector
722 if ("bold" == $tagattr["fo:font-weight"]) {
723 $xml[$xml_i][0] .= "__";
725 if ("italic" == $tagattr["fo:font-style"]) {
726 $xml[$xml_i][0] .= "''";
728 if (strpos($tagattr["style:parent-style-name"], "eadline")) {
729 $xml[$xml_i][1] = "\n!";
736 $xml_i = $tagattr["text:style-name"];
737 $src .= $xml[$xml_i][1]
742 $src .= strrev($xml[$xml_i][0]);
744 if (!$xml["list"] && ($tagstr == "/text:p")) {
750 $level = $tagattr["text:level"];
751 $src .= "\n" . str_repeat("!", 1 + ($level>4)?1:0 + ($level>6)?1:0);
758 if ($href = $tagattr["xlink:href"]) {
759 $src .= "[$href \"";# . $pretext;
766 case "text:list-item":
767 $src .= "\n" . $xml["list"] . " ";
769 case "text:ordered-list":
772 case "text:unordered-list":
775 case "/text:ordered-list":
776 case "/text:unordered-list":
777 $xml["list"] = substr($xml["list"], 0, -1);
778 $src .= "\n"; # there aren't nested lists in OO anyhow
782 #-- anything else --------------------------------------------------
784 #-- one of the standard tags?
785 if ($add = @$tagmap[$tagname]) {
794 if ($tagstr[0] == "/") {
796 if ($tag_level<0) { $tag_level=0; }
798 elseif (!in_array($tagname, $standalone_tags)) {
803 if (($css=@$tagattr["class"]) || ($css=strtr($tagattr["class"], " \r\t\n", " "))) {
804 $css = strtr($css, " ", "-");
806 $close_css[$tag_level-1]++;
808 if (($css=@$tagattr["style"]) || ($css=strtr($tagattr["style"], " \r\t\n", " "))) {
809 $css = str_replace($css, " ", "");
811 $close_css[$tag_level-1]++;
813 while (@$close_css[$tag_level]) {
815 $close_css[$tag_level]--;
825 /* Fetches (step by step) next html <tag> from the input string, and
826 also returns text content prepending it.
828 function ewiki_htmlparser_get(&$html, &$pos, &$len, $pre=0) {
830 $text=$tagstr=$tagattr="";
833 $l = strpos($html, "<", $pos);
834 $r = strpos($html, ">", $l);
835 if (($l===false) or ($r===false)) {
837 $text = substr($html, $pos);
843 $text = substr($html, $pos, $l-$pos);
845 $text = strtr($text, "\r\n", " ");
853 $tag = substr($html, $l + 1, $r - $l - 1);
855 #-- split into name and attributes
856 $tagstr = strtolower(rtrim(strtok($tag, " \t\n>"),"/"));
858 if (($tattr=strtok(">")) && strpos($tattr,"=")) {
859 preg_match_all('/([-:\w]+)=(\".*?\"|[^\s]+)/', $tag, $uu);
861 foreach ($uu[1] as $i=>$a) {
862 $tagattr[$uu[1][$i]] = trim($uu[2][$i], '"');
868 return( array($text, $tagstr, $tagattr) );