2 /****************************************************************/
\r
4 /****************************************************************/
\r
5 /* Copyright (c) 2002-2008 by Greg Gay & Cindy Qi Li */
\r
6 /* Adaptive Technology Resource Centre / University of Toronto */
\r
7 /* http://atutor.ca */
\r
9 /* This program is free software. You can redistribute it and/or*/
\r
10 /* modify it under the terms of the GNU General Public License */
\r
11 /* as published by the Free Software Foundation. */
\r
12 /****************************************************************/
\r
13 // $Id: utf8conv.php 2008-01-23 14:49:24Z cindy $
\r
15 * This script only works when being included in the scripts
\r
16 * where vitals.inc.php is included as:
\r
18 * define('AT_INCLUDE_PATH', '../../include/');
\r
19 * require (AT_INCLUDE_PATH.'vitals.inc.php');
\r
21 * structure of this document (in order):
\r
23 * 1. Unzip uploaded file to module's content directory
\r
24 * 2. Read content folder recursively and search through all html and xml files
\r
25 * to find "charset" defined in html "meta" tag and "charset" defined in xml files.
\r
27 * (1) Only 1 character set found:
\r
28 * The module converts the files with file types to convert from found charset
\r
30 * (2) More than 1 character set found:
\r
31 * The module displays a drop-down listbox with all found character sets. User
\r
32 * selects one and clicks "Go" button. The module will do the conversion from
\r
33 * the selcted character set to UTF-8.
\r
34 * (3) No character set found:
\r
35 * The module displays a drop-down listbox with default character sets. User
\r
36 * selects one and clicks "Go" button. The module will do the conversion from
\r
37 * the selcted character set to UTF-8.
\r
38 * 3. Zip converted files
\r
39 * 4. force zipped converted file to download
\r
40 * 5. clear all temp files
\r
44 // Define character set to convert to
\r
45 $charset_to = "UTF-8";
\r
47 // Define all file types to be converted
\r
48 $filetypes = ARRAY('html', 'xml', 'csv', 'txt', 'sql');
\r
50 $filetypes = array_change_key_case($filetypes); // use lower case
\r
52 $default_charsets = ARRAY(
\r
53 'ISO-8859-1','ISO-8859-2','ISO-8859-3','ISO-8859-4','ISO-8859-5','ISO-8859-6','ISO-8859-7',
\r
54 'ISO-8859-8','ISO-8859-9','ISO-8859-10','ISO-8859-11','ISO-8859-13','ISO-8859-14',
\r
55 'ISO-8859-15','ISO-8859-16','BIG5','EUC-JP','GB2312','US-ASCII','WINDOWS-874','WINDOWS-936',
\r
56 'WINDOWS-1250','WINDOWS-1251','WINDOWS-1252','WINDOWS-1253','WINDOWS-1254','WINDOWS-1255',
\r
57 'WINDOWS-1256','WINDOWS-1257','WINDOWS-1258');
\r
59 $charsets = array();
\r
62 * This function finds charset definition from html & xml files
\r
64 * @param string $filename The name of the file to find charset definition
\r
66 * @author Cindy Qi Li
\r
68 function find_charset($filename)
\r
70 if (preg_match("/\.html$/i", $filename))
\r
72 $pattern = '/<meta.*charset=(.*) /i';
\r
73 preg_match($pattern, file_get_contents($filename), $matches);
\r
75 // remove quote signs in the match
\r
76 $charset = strtoupper(trim(preg_replace('/(\'|\")/', '', $matches[1])));
\r
79 if (preg_match("/\.xml$/i", $filename))
\r
81 if (preg_match("#<charset>(.*)</charset>#i", file_get_contents($filename), $matches))
\r
83 $charset = strtoupper(trim($matches[1]));
\r
91 * This function finds all charsets defined in all html & xml files in the given zip
\r
92 * and save the charsets in global variable $charsets
\r
94 * @param string $path The full path and name of the files to find charset definition
\r
96 * @author Cindy Qi Li
\r
98 function find_all_charsets($path, $filename)
\r
102 $charset = find_charset($path);
\r
104 if (strlen($charset) > 0 && !in_array($charset, $charsets))
\r
106 array_push($charsets, $charset);
\r
112 * 1. replaces the charset strings defined in html "meta" tag and "charset" tag in xml files to "UTF-8";
\r
113 * 2. convert files from old character set to UTF-8.
\r
115 * @param string $path The full path and name of the files to find charset definition
\r
116 * output of readDir
\r
117 * @author Cindy Qi Li
\r
119 function convert($path, $filename)
\r
121 global $charset_from, $charset_to;
\r
125 // 1. html & xml files:
\r
126 // if charset is defined, convert from defined charset,
\r
127 // otherwise, convert from $charset_from
\r
128 // 2. Other files with defined file type
\r
129 // convert from $charset_from
\r
130 if ((in_array('html', $filetypes) && preg_match("/\.html$/i", $path)) ||
\r
131 (in_array('xml', $filetypes) && preg_match("/\.xml$/i", $path)))
\r
133 $charset_in = find_charset($path);
\r
135 $content = file_get_contents($path);
\r
137 if (strlen($charset_in) > 0)
\r
139 // replace old charset in <meta> tag to new charset
\r
140 $content = str_ireplace($charset_in,$charset_to,$content);
\r
142 // convert file from old charset to new charset
\r
143 $content = iconv($charset_in, $charset_to. '//IGNORE', $content);
\r
147 $content = iconv($charset_from, $charset_to. '//IGNORE', $content);
\r
150 $fp = fopen($path,'w');
\r
151 fwrite($fp,$content);
\r
154 elseif (in_array(strtolower(substr($path, (strrpos($path, '.')+1))),$filetypes))
\r
156 $content = file_get_contents($path);
\r
157 $content = iconv($charset_from, $charset_to. '//IGNORE', $content);
\r
159 $fp = fopen($path,'w');
\r
160 fwrite($fp,$content);
\r
166 * This function displays all values in $charsets_array in a drop-down box
\r
168 * @param array $charsets_array The options to display
\r
169 * @author Cindy Qi Li
\r
171 function display_options($charsets_array)
\r
174 <select name="charfrom" id="charfrom" class="input">
\r
176 foreach($charsets_array as $charset)
\r
179 <option><?php echo $charset; ?></option>
\r
188 * This function deletes $dir recrusively without deleting $dir itself.
\r
190 * @param string $charsets_array The name of the directory where all files and folders under needs to be deleted
\r
191 * @author Cindy Qi Li
\r
193 function clear_dir($dir) {
\r
194 include_once(AT_INCLUDE_PATH . '/lib/filemanager.inc.php');
\r
196 if(!$opendir = @opendir($dir)) {
\r
200 while(($readdir=readdir($opendir)) !== false) {
\r
201 if (($readdir !== '..') && ($readdir !== '.')) {
\r
202 $readdir = trim($readdir);
\r
204 clearstatcache(); /* especially needed for Windows machines: */
\r
206 if (is_file($dir.'/'.$readdir)) {
\r
207 if(!@unlink($dir.'/'.$readdir)) {
\r
210 } else if (is_dir($dir.'/'.$readdir)) {
\r
211 /* calls lib function to clear subdirectories recrusively */
\r
212 if(!clr_dir($dir.'/'.$readdir)) {
\r
219 @closedir($opendir);
\r
225 * Main convert process:
\r
226 * 1. Unzip uploaded file to module's content directory
\r
227 * 2. Convert unzipped files with file types to convert
\r
228 * 3. Zip converted files
\r
229 * 4. force zipped converted file to download
\r
230 * 5. clear all temp files
\r
232 * @author Cindy Qi Li
\r
234 $module_content_folder = AT_CONTENT_DIR . "utf8conv";
\r
236 include_once(AT_INCLUDE_PATH . '/classes/pclzip.lib.php');
\r
238 if (isset($_POST['Convert']))
\r
240 // clean up module content folder
\r
241 clear_dir($module_content_folder);
\r
243 // 1. unzip uploaded file to module's content directory
\r
244 $archive = new PclZip($_FILES['userfile']['tmp_name']);
\r
246 if ($archive->extract(PCLZIP_OPT_PATH, $module_content_folder) == 0)
\r
248 clear_dir($module_content_folder);
\r
249 die("Cannot unzip file " . $_FILES['userfile']['tmp_name'] . "<br>Error : ".$archive->errorInfo(true));
\r
253 if (isset($_POST['Convert']) || $_POST['Go'])
\r
255 // 2. Read content folder recursively to convert.
\r
256 include_once("readDir.php");
\r
258 $dir = new readDir();
\r
259 // set the directory to read
\r
260 if (!$dir->setPath( $module_content_folder ))
\r
262 clear_dir($module_content_folder);
\r
263 die($dir->error());
\r
266 // set recursive reading of sub folders
\r
267 $dir->readRecursive(true);
\r
269 // set a function to call when a new file is read
\r
270 if (!$dir->setEvent( 'readDir_file', 'find_all_charsets' ))
\r
272 clear_dir($module_content_folder);
\r
273 die($dir->error());
\r
277 if ( !$dir->read() )
\r
279 clear_dir($module_content_folder);
\r
280 die($dir->error());
\r
283 // If only one character set is found in all html & xml files
\r
284 if ((count($charsets) == 1 && $_POST['Convert']) ||
\r
285 (count($charsets) != 1 && $_POST['Go']))
\r
287 if (count($charsets) == 1 && $_POST['Convert'])
\r
288 $charset_from = $charsets[0];
\r
289 elseif (count($charsets) != 1 && $_POST['Go'])
\r
290 $charset_from = $_POST['charfrom'];
\r
293 $dir = new readDir();
\r
294 $dir->setPath( $module_content_folder );
\r
295 $dir->readRecursive(true);
\r
296 $dir->setEvent( 'readDir_file', 'convert' );
\r
299 // 3. ZIP converted files
\r
300 if ($_POST['Convert']) $orig_filename=$_FILES['userfile']['name'];
\r
301 elseif ($_POST['Go']) $orig_filename=$_POST['filename'];
\r
303 $zip_filename = AT_CONTENT_DIR . "/" . str_replace('.zip','_'.$charset_to . '.zip', $orig_filename);
\r
305 $archive = new PclZip($zip_filename);
\r
307 if ($archive->add($module_content_folder, PCLZIP_OPT_REMOVE_PATH, $module_content_folder) == 0) {
\r
308 clear_dir($module_content_folder);
\r
309 die("Cannot zip converted files. <br>Error : ".$archive->errorInfo(true));
\r
312 // 4. force zipped converted file to download
\r
315 header('Content-Type: application/x-zip');
\r
316 header('Content-transfer-encoding: binary');
\r
317 header('Content-Disposition: attachment; filename="'.htmlspecialchars(basename($zip_filename)) . '"');
\r
318 header('Expires: 0');
\r
319 header('Cache-Control: must-revalidate, post-check=0, pre-check=0');
\r
320 header('Pragma: public');
\r
322 readfile($zip_filename);
\r
324 // 5. clear all temp files
\r
325 unlink($zip_filename);
\r
326 clear_dir($module_content_folder);
\r
329 // End of main convert process
\r
331 include_once (AT_INCLUDE_PATH.'header.inc.php');
\r
333 // Check ICONV library is installed and enabled
\r
334 if (!extension_loaded('iconv') || !function_exists('iconv'))
\r
336 die ('<font color="red">Warning: This utility is not available as PHP ICONV module is not installed or not enabled.</font>');
\r
342 <SCRIPT LANGUAGE="JavaScript">
\r
345 String.prototype.trim = function() {
\r
346 return this.replace(/^\s+|\s+$/g,"");
\r
349 // This function validates if and only if a zip file is given
\r
350 function validate_filename() {
\r
352 var file = document.frm_upload.userfile.value;
\r
353 if (!file || file.trim()=='') {
\r
354 alert('Please give a zip file!');
\r
358 if(file.slice(file.lastIndexOf(".")).toLowerCase() != '.zip') {
\r
359 alert('Please upload ZIP file only!');
\r
369 <FORM NAME="frm_upload" ENCTYPE="multipart/form-data" METHOD=POST ACTION="<?php echo $_SERVER['PHP_SELF']; ?>" >
\r
371 <div class="input-form">
\r
374 Upload a zip file to convert the character set to UTF-8:
\r
378 <INPUT TYPE="hidden" name="MAX_FILE_SIZE" VALUE="52428800">
\r
379 <INPUT TYPE="file" NAME="userfile" SIZE=50>
\r
382 <div class="row buttons">
\r
383 <INPUT TYPE="submit" name="Convert" value="Convert" onClick="javascript: return validate_filename(); " class="submit" />
\r
384 <INPUT TYPE="hidden" name="filename">
\r
387 if ($_POST["Convert"] && count($charsets) != 1)
\r
391 if (count($charsets) > 1)
\r
395 <font color="red">Multiple character sets are found, please select one to convert from:</font>
\r
398 display_options($charsets);
\r
404 <font color="red">No character set found in zip file, please choose one character set to convert from:</font>
\r
408 display_options($default_charsets);
\r
411 <div class="row buttons">
\r
412 <INPUT TYPE="submit" name="Go" value="Go" class="submit" />
\r
425 <SCRIPT LANGUAGE="JavaScript">
\r
429 // store the upload zip file name in a hidden field for the future use for charsets selection
\r
430 if ($_POST['Convert'])
\r
433 document.frm_upload.filename.value = '<?php echo $_FILES['userfile']['name']; ?>';
\r
442 <?php include_once (AT_INCLUDE_PATH.'footer.inc.php'); ?>
\r