AT-4848: Added a condition to handle a false positive.
[atutor.git] / docs / mods / _core / imscp / ims_import.php
index f11e90d..103163e 100644 (file)
@@ -3,13 +3,13 @@
 /* ATutor                                                               */
 /************************************************************************/
 /* Copyright (c) 2002 - 2009                                            */
-/* Adaptive Technology Resource Centre / University of Toronto          */
+/* Inclusive Design Institute                                           */
 /*                                                                      */
 /* This program is free software. You can redistribute it and/or        */
 /* modify it under the terms of the GNU General Public License          */
 /* as published by the Free Software Foundation.                        */
 /************************************************************************/
-// $Id: ims_import.php 9081 2010-01-13 20:26:03Z cindy $
+// $Id$
 define('AT_INCLUDE_PATH', '../../../include/');
 require(AT_INCLUDE_PATH.'vitals.inc.php');
 
@@ -95,18 +95,18 @@ function checkResources($import_path){
 
        //generate a file tree
        $data = rscandir($import_path);
-
        //check if every file is presented in the manifest
        foreach($data as $filepath){
-               $filepath = substr($filepath, strlen($import_path));
-
+//     debug(preg_match('/(.*)\.xml/', substr($filepath, strlen($import_path))));
                //validate xml via its xsd/dtds
-               if (preg_match('/(.*)\.xml/', $filepath)){
+               if (preg_match('/(.*)\.xml/', substr($filepath, strlen($import_path)))){
                        libxml_use_internal_errors(true);
                        $dom = new DOMDocument();
-                       $dom->load(realpath($import_path.$filepath));
-                       if (!@$dom->schemaValidate('main.xsd')){
+                       $dom->load(realpath($filepath));
+//                     debug(realpath($filepath), 'filepath');
+                       if (!$dom->schemaValidate('main.xsd')){
                                $errors = libxml_get_errors();
+//                             debug($errors);exit;
                                foreach ($errors as $error) {
                                        //suppress warnings
                                        if ($error->level==LIBXML_ERR_WARNING){
@@ -116,6 +116,7 @@ function checkResources($import_path){
                                }
                                libxml_clear_errors();
                        }
+                       
                        //if this is the manifest file, we do not have to check for its existance.
 //                     if (preg_match('/(.*)imsmanifest\.xml/', $filepath)){
 //                             continue;
@@ -129,7 +130,10 @@ function checkResources($import_path){
                if(isset($fileinfo['file']) && is_array($fileinfo['file']) && !empty($fileinfo['file'])){
                        foreach($fileinfo['file'] as $fn){
                                if (!in_array(realpath($import_path.$fn), $filearray)){
-                                       $filearray[] = realpath($import_path. $fn);
+                                       //if url, skip
+                                       if (preg_match('/^http[s]?\:/', $fn) == 0){
+                                               $filearray[] = realpath($import_path. $fn);
+                                       }                                       
                                }
                        }
                }
@@ -154,8 +158,10 @@ function checkResources($import_path){
        //other arrays. 
        //Using sizeof make sure it's not a subset of array2.
        //-1 on data because it always contain the imsmanifest.xml file
-       if (!empty($result) || sizeof($data)-1>sizeof($filearray)){
-               $msg->addError(array('IMPORT_CARTRIDGE_FAILED', _AT('ims_missing_references')));
+       if (!$skip_ims_validation){
+           if (!empty($result) || sizeof($data)-1>sizeof($filearray)){
+                   $msg->addError(array('IMPORT_CARTRIDGE_FAILED', _AT('ims_missing_references')));
+           }
        }
        return true;
 }
@@ -264,6 +270,66 @@ function rehash($items){
        return $rehashed_items;
 }
 
+/**
+ * Take out the common path within all $items['new_path']. Make sure we include
+ * AccessForAll files in the array.  
+ * This allows import/export repeatedly without duplicating its path
+ * @param   array   contains the breakdown of all resources in the XML
+ */
+function removeCommonPath($items){
+    $common_path; 
+    $quit = false;  //a flag that is set if it's not the first time being run.
+
+    $filearray = array();
+    //get all files listed in the manifest
+    foreach($items as $name=>$fileinfo){
+               if(isset($fileinfo['file']) && is_array($fileinfo['file']) && !empty($fileinfo['file'])){
+                       foreach($fileinfo['file'] as $fn){
+                               if (!in_array($fn, $filearray)){
+                                       if (preg_match('/^http[s]?\:/', $fn) == 0){
+                                               $filearray[] = $fn;
+                                       }                                       
+                               }
+                       }
+               }
+       }
+
+    foreach($filearray as $index=>$path){
+        //hack
+        //check if this is a XML file; if so, skip through, 
+        //cause XML most likely isn't a content resource.
+        $ext = substr($path, (strrpos($path, '.')+1));
+        if($ext=='xml'){
+            continue;
+        }
+        //if common path is empty, assign the first path to it.
+        if ($common_path=='' && $quit==false){
+            $common_path = $path;
+            $quit = true;   //the next time common_path is empty, quit;
+            continue;
+        }
+        //we use '/' here instead of DIRECTORY_SEPARATOR because php would
+        //actually use '\' and return the whole string. 
+        $common_array = explode('/', $common_path);
+        $path_array = explode('/', $path);
+        //convert path_array to absolute path
+        //TODO: array_search is slow, build a faster search
+        $pos=array_search('..', $path_array);
+        while($pos=array_search('..', $path_array)){
+            array_splice($path_array, $pos-1, 2);
+        }
+        $intersect_array = array_intersect($common_array, $path_array);
+        $common_path = implode('/', $intersect_array);       
+    }
+    // If this path (including file extension) is in the filearray,
+    // then this means there is only 1 file in the array, implies there
+    // wasn't any intersection at all.  In this case, use the base root.
+    if(in_array($common_path, $filearray)) {
+        $common_path = '';
+    }
+    return $common_path;
+}
+
 
 /** 
  * This function will take the test accessment XML and add these to the database.
@@ -296,7 +362,7 @@ function rehash($items){
        /* builds the $path array which is the path from the root to the current element */
        function startElement($parser, $name, $attrs) {
                global $items, $path, $package_base_path, $all_package_base_path, $package_real_base_path;
-               global $element_path, $import_path;
+               global $element_path, $import_path, $skip_ims_validation;
                global $xml_base_path, $test_message, $content_type;
                global $current_identifier, $msg, $ns, $ns_cp;
 
@@ -318,7 +384,7 @@ function rehash($items){
 */
 
                //validate namespaces
-               if(isset($attrs['xsi:schemaLocation']) && $name=='manifest'){
+               if(!$skip_ims_validation && isset($attrs['xsi:schemaLocation']) && $name=='manifest'){
                        $schema_location = array();
                        $split_location = preg_split('/[\r\n\s]+/', trim($attrs['xsi:schemaLocation']));
 
@@ -354,7 +420,7 @@ function rehash($items){
                        $xml_base_path = $attrs['xml:base'];
                } else if ($name == 'file') {
                        // check if it misses file references
-                       if(!isset($attrs['href']) || $attrs['href']==''){
+                       if(!$skip_ims_validation && (!isset($attrs['href']) || $attrs['href']=='')){
                                //$msg->addError('MANIFEST_NOT_WELLFORM');
                                $msg->addError(array('IMPORT_CARTRIDGE_FAILED', _AT('ims_missing_references')));
                        }
@@ -368,15 +434,18 @@ function rehash($items){
                        }
 
                        $temp_path = pathinfo($attrs['href']);
-                       $temp_path = explode('/', $temp_path['dirname']);
-                       if (empty($package_base_path)){
-                           $package_base_path = $temp_path;
-            }
-                       if ($all_package_base_path!='' && empty($all_package_base_path)){
-                               $all_package_base_path = $temp_path;
+//                     if (!strpos($temp_path['dirname'], 'Share')) {
+            if ($temp_path['extension'] == 'html') {
+                           $temp_path = explode('/', $temp_path['dirname']);
+                           if (empty($package_base_path)){
+                               $package_base_path = $temp_path;
+                }
+                           if ($all_package_base_path!='' && empty($all_package_base_path)){
+                                   $all_package_base_path = $temp_path;
+                           }
+                           $package_base_path = array_intersect_assoc($package_base_path, $temp_path);
                        }
-                       $package_base_path = array_intersect_assoc($package_base_path, $temp_path);
-                       
+
                        //calculate the depths of relative paths
                        if ($all_package_base_path!=''){
                                $no_relative_temp_path = $temp_path;
@@ -396,15 +465,17 @@ function rehash($items){
                        if (in_array('..', $temp_path)){
                                $sizeofrp = array_count_values($temp_path);
                        }
-
-                       //for IMSCC, assume that all resources lies in the same folder, except styles.css
+                       //for IMSCC, assume that all resources lies in the same folder, except styles.css               
                        if ($items[$current_identifier]['type']=='webcontent' || $items[$current_identifier]['type']=='imsdt_xmlv1p0'){
                                //find the intersection of each item's related files, then that intersection is the content_path
                                if (isset($items[$current_identifier]['file'])){
                                        foreach ($items[$current_identifier]['file'] as $resource_path){
-                                               $temp_path = pathinfo($resource_path);
-                                               $temp_path = explode('/', $temp_path['dirname']);
-                                               $package_base_path = array_intersect_assoc($package_base_path, $temp_path);                                             
+                                           if (!strpos($resource_path, 'Share')) {
+                                                   $temp_path = pathinfo($resource_path);
+                                                   $temp_path = explode('/', $temp_path['dirname']);
+
+                                                   $package_base_path = array_intersect_assoc($package_base_path, $temp_path);                                         
+                                               }
                                        }
                                }
                        }
@@ -415,33 +486,15 @@ function rehash($items){
                                        array_pop($all_package_base_path);
                                }
                        }
-                       
-                       if (count($package_base_path) > 0) {
+                       if (!empty($package_base_path)) {
                                $items[$current_identifier]['new_path'] = implode('/', $package_base_path);
                        }
                                
-/* 
- * @harris, reworked the package_base_path 
-                               if ($package_base_path=="") {
-                                       $package_base_path = $temp_path;
-                               } 
-                               elseif (is_array($package_base_path) && $content_type != 'IMS Common Cartridge') {
-                                       //if this is a content package, we want only intersection
-                                       $package_base_path = array_intersect($package_base_path, $temp_path);
-                                       $temp_path = $package_base_path;
-                               }
-                               //added these 2 lines in so that pictures would load.  making the elseif above redundant.
-                               //if there is a bug for pictures not load, then it's the next 2 lines.
-                               $package_base_path = array_intersect($package_base_path, $temp_path);
-                               $temp_path = $package_base_path;
-                       }
-                       $items[$current_identifier]['new_path'] = implode('/', $temp_path);     
-*/
-                       if (    isset($_POST['allow_test_import']) && isset($items[$current_identifier]) 
+                       if (isset($_POST['allow_test_import']) && isset($items[$current_identifier]) 
                                                && preg_match('/((.*)\/)*tests\_[0-9]+\.xml$/', $attrs['href'])) {
                                $items[$current_identifier]['tests'][] = $attrs['href'];
                        } 
-                       if (    isset($_POST['allow_a4a_import']) && isset($items[$current_identifier])) {
+                       if (isset($_POST['allow_a4a_import']) && isset($items[$current_identifier])) {
                                $items[$current_identifier]['a4a_import_enabled'] = true;
                        }
                } else if (($name == 'item') && ($attrs['identifierref'] != '')) {
@@ -489,7 +542,7 @@ function rehash($items){
                        if(!isset($items[$current_identifier]) && $attrs['href']!=''){
                                $items[$current_identifier]['href']      = $attrs['href'];
                        }
-                       if (substr($attrs['href'], 0, 7) == 'http://' || substr($attrs['href'], 0, 8) == 'https://' || file_exists($import_path.$attrs['href'])){
+                       if (substr($attrs['href'], 0, 7) == 'http://' || substr($attrs['href'], 0, 8) == 'https://' || file_exists($import_path.$attrs['href']) || $skip_ims_validation){
                                $items[$current_identifier]['file'][] = $attrs['href'];
                        } else {
                                $msg->addError(array('IMPORT_CARTRIDGE_FAILED', _AT(array('ims_files_missing', $attrs['href']))));
@@ -661,6 +714,11 @@ if (!isset($_POST['submit']) && !isset($_POST['cancel'])) {
 
 $cid = intval($_POST['cid']);
 
+//If user chooses to ignore validation.
+if(isset($_POST['ignore_validation']) && $_POST['ignore_validation']==1) {
+       $skip_ims_validation = true;
+}
+
 if (isset($_POST['url']) && ($_POST['url'] != 'http://') ) {
        if ($content = @file_get_contents($_POST['url'])) {
                // save file to /content/
@@ -682,8 +740,9 @@ if (isset($_POST['url']) && ($_POST['url'] != 'http://') ) {
        $_FILES['file']['tmp_name'] = $full_filename;
        $_FILES['file']['size']     = strlen($content);
        unset($content);
-       $url_parts = pathinfo($_POST['url']);
-       $package_base_name_url = $url_parts['basename'];
+       //$url_parts = pathinfo($_POST['url']);
+       //$package_base_name_url = $url_parts['basename'];
+    $package_base_name_url = md5(time());
 }
 $ext = pathinfo($_FILES['file']['name']);
 $ext = $ext['extension'];
@@ -879,11 +938,7 @@ if (file_exists($import_path . $glossary_path . 'glossary.xml')){
 }
 
 // Check if all the files exists in the manifest, iff it's a IMS CC package.
-if ($content_type == 'IMS Common Cartridge') {
-       //If user chooses to ignore validation.
-       if(isset($_POST['ignore_validation']) && $_POST['ignore_validation']==1) {
-               $skip_ims_validation = true;
-       }
+if ($content_type == 'IMS Common Cartridge') { 
        checkResources($import_path);
 }
 
@@ -902,7 +957,7 @@ if ($msg->containsErrors()) {
 /* the 'content_path' field in the content table will be set to this path. */
 /* $package_base_name_url comes from the URL file name (NOT the file name of the actual file we open)*/
 if (!$package_base_name && $package_base_name_url) {
-       $package_base_name = substr($package_base_name_url, 0, -4);
+       $package_base_name = substr($package_base_name_url, -6);
 } else if (!$package_base_name) {
        $package_base_name = substr($_FILES['file']['name'], 0, -4);
 }
@@ -920,7 +975,6 @@ if ($package_base_path) {
 } elseif (empty($package_base_path)){
        $package_base_path = '';
 }
-
 if ($xml_base_path) {
        $package_base_path = $xml_base_path . $package_base_path;
 
@@ -935,6 +989,7 @@ $row        = mysql_fetch_assoc($result);
 $order_offset = intval($row['ordering']); /* it's nice to have a real number to deal with */
 $lti_offset = array(); //since we don't need lti tools, the ordering needs to be subtracted
 //reorder the items stack
+$common_path = removeCommonPath($items);
 $items = rehash($items);
 //debug($items);exit;
 foreach ($items as $item_id => $content_info) 
@@ -1167,8 +1222,12 @@ foreach ($items as $item_id => $content_info)
                $all_package_base_path = implode('/', $all_package_base_path);
        }
 
-       if ($all_package_base_path != '') {
-               $content_info['new_path'] = $package_base_name . substr($content_info['new_path'], strlen($all_package_base_path));
+    // The following condition checks if there is a common path, if so, remove it from the content base_href.
+    // This prevents the path to grow longer.
+       if ($common_path != '' 
+               && ($content_info['new_path'] === $common_path
+               || substr($content_info['new_path'], strlen($common_path)))) {
+               $content_info['new_path'] = $package_base_name . substr($content_info['new_path'], strlen($common_path));
        } else {
                $content_info['new_path'] = $package_base_name . '/' . $content_info['new_path'];
        }
@@ -1329,37 +1388,51 @@ foreach ($items as $item_id => $content_info)
                $dt_import->associateForum($items[$item_id]['real_content_id'], $added_dt[$item_id]);
        }
 }
-//exit;//harris
+
 if ($package_base_path == '.') {
        $package_base_path = '';
 }
 
 // loop through the files outside the package folder, and copy them to its relative path
+/**
 if (is_dir(AT_CONTENT_DIR . 'import/'.$_SESSION['course_id'].'/resources')) {
        $handler = opendir(AT_CONTENT_DIR . 'import/'.$_SESSION['course_id'].'/resources');
        while ($file = readdir($handler)){
                $filename = AT_CONTENT_DIR . 'import/'.$_SESSION['course_id'].'/resources/'.$file;
+debug($filename);
                if(is_file($filename)){
                        @rename($filename, AT_CONTENT_DIR .$_SESSION['course_id'].'/'.$package_base_name.'/'.$file);
                }
        }
        closedir($handler);
 }
+**/
+//--- harris edit for path thing
+$file = AT_CONTENT_DIR . 'import/'.$_SESSION['course_id'].DIRECTORY_SEPARATOR.$common_path;
+if (is_dir($file)) {
+    rename($file, AT_CONTENT_DIR .$_SESSION['course_id'].'/'.$package_base_name);
+}
+//--- end
 //takes care of the condition where the whole package doesn't have any contents but question banks
+//also is the case of urls
 if(is_array($all_package_base_path)){
        $all_package_base_path = implode('/', $all_package_base_path);
+       if(strpos($all_package_base_path, 'http:/')===false){
+        if (@rename($import_path.$all_package_base_path, AT_CONTENT_DIR .$_SESSION['course_id'].'/'.$package_base_name) === false) {
+            if (!$msg->containsErrors()) {
+                $msg->addError('IMPORT_FAILED');
+            }
+        }
+    }
 }
-
-if (@rename($import_path.$all_package_base_path, AT_CONTENT_DIR .$_SESSION['course_id'].'/'.$package_base_name) === false) {
-       if (!$msg->containsErrors()) {
-               $msg->addError('IMPORT_FAILED');
-       }
-}
+//exit;//harris
 //check if there are still resources missing
+/*
 foreach($items as $idetails){
        $temp_path = pathinfo($idetails['href']);
        @rename(AT_CONTENT_DIR . 'import/'.$_SESSION['course_id'].'/'.$temp_path['dirname'], AT_CONTENT_DIR .$_SESSION['course_id'].'/'.$package_base_name . '/' . $temp_path['dirname']);
 }
+*/
 clr_dir(AT_CONTENT_DIR . 'import/'.$_SESSION['course_id']);
 
 if (file_exists($full_filename)) {