http://atutor.ca/atutor/mantis/view.php?id=4574
[acontent.git] / docs / include / lib / resources_parser.inc.php
1 <?php
2 /************************************************************************/
3 /* AContent                                                             */
4 /************************************************************************/
5 /* Copyright (c) 2010                                                   */
6 /* Inclusive Design Institute                                           */
7 /*                                                                      */
8 /* This program is free software. You can redistribute it and/or        */
9 /* modify it under the terms of the GNU General Public License          */
10 /* as published by the Free Software Foundation.                        */
11 /************************************************************************/
12
13 if (!defined('TR_INCLUDE_PATH')) { exit; }
14
15 global $db;
16
17 define('TR_INCLUDE_PATH', '../include/');
18 include_once(TR_INCLUDE_PATH.'../home/classes/ContentUtility.class.php');
19
20 $body_text      = htmlspecialchars($stripslashes($_POST['body_text']));
21 $body           = html_entity_decode($body_text);
22
23 require(TR_INCLUDE_PATH.'classes/XML/XML_HTMLSax/XML_HTMLSax.php');     /* for XML_HTMLSax */
24 require(TR_INCLUDE_PATH.'../home/imscc/include/ims_template.inc.php');                          /* for ims templates + print_organizations() */
25
26 /*
27 the following resources are to be identified:
28 even if some of these can't be images, they can still be files in the content dir.
29 theoretically the only urls we wouldn't deal with would be for a <!DOCTYPE and <form>
30
31 img             => src
32 a               => href                         // ignore if href doesn't exist (ie. <a name>)
33 object  => data | classid       // probably only want data
34 applet  => classid | archive                    // whatever these two are should double check to see if it's a valid file (not a dir)
35 script  => src
36 input   => src
37 iframe  => src
38 */
39         
40 class MyHandler {
41         function MyHandler(){}
42         function openHandler(& $parser,$name,$attrs) {
43                 global $my_files;
44
45                 $name = strtolower($name);
46                 $attrs = array_change_key_case($attrs, CASE_LOWER);
47
48                 $elements = array(      'img'           => 'src',
49                                                         'a'                     => 'href',                              
50                                                         'object'        => array('data', 'classid'),
51                                                         'applet'        => array('classid', 'archive'),
52                                                         'script'        => 'src',
53                                                         'input'         => 'src',
54                                                         'iframe'        => 'src',
55                                                         'embed'         => 'src',
56                                                         );
57
58                 /* check if this attribute specifies the files in different ways: (ie. java) */
59                 if (is_array($elements[$name])) {
60                         $items = $elements[$name];
61         
62                         foreach ($items as $item) {
63                                 if ($attrs[$item] != '') {
64
65                                         /* some attributes allow a listing of files to include seperated by commas (ie. applet->archive). */
66                                         if (strpos($attrs[$item], ',') !== false) {
67                                                 $files = explode(',', $attrs[$item]);
68                                                 foreach ($files as $file) {
69                                                         $my_files[] = trim($file);
70                                                 }
71                                         } else {
72                                                 $my_files[] = $attrs[$item];
73                                         }
74                                 }
75                         }       
76                 } else if (isset($elements[$name]) && ($attrs[$elements[$name]] != '')) {
77                         /* we know exactly which attribute contains the reference to the file. */
78                         $my_files[] = $attrs[$elements[$name]];
79                 }
80         }
81         function closeHandler(& $parser,$name) { }
82         }
83
84 /* get all the content */
85 $handler=new MyHandler();
86 $parser = new XML_HTMLSax();
87 $parser->set_object($handler);
88 $parser->set_element_handler('openHandler','closeHandler');
89
90 /* generate the resources and save the HTML files */
91                         
92 global $parser, $my_files;
93 global $course_id;
94
95 /* add the resource dependancies */
96 $my_files               = array();
97 $content_files  = "\n";
98
99 //in order to control if some [media] is in the body_text
100 //$body = ContentUtility::embedMedia($body_t);
101
102 $parser->parse($body);
103
104 // find all [media] resources
105 preg_match_all("/\[media[0-9a-z\|]*\](.*)\[\/media\]/i",$body,$media_matches);
106 $my_files = array_merge($media_matches[1], $my_files);
107
108 // add by Cindy Li. 
109 // This resolves the problem introduced by [media] tag: when [media] is 
110 // parsed into <object>, same resource appears a few times in <object> with different 
111 // format to cater for different browsers or players. This way creates problem that different
112 // formats in <object> are all parsed and considered as different resource. array_unique()
113 // call solves this problem. But, it introduces the new problem that when a same resource
114 // appears at different places in the content and users do want to have them with different
115 // alternatives. With this solution, this same resource only shows up once at "adapt content"
116 // and only can have one alternative associated with. Table and scripts need to be re-designed
117 // to solve this problem, for example, include line number in table. 
118 $my_files = array_unique($my_files);
119
120 /* handle @import */
121 $import_files   = get_import_files($body);
122                         
123 if (count($import_files) > 0) $my_files = array_merge($my_files, $import_files);
124
125 $i=0;
126
127 foreach ($my_files as $file) {
128         /* filter out full urls */
129         $url_parts = @parse_url($file);
130 //      if (isset($url_parts['scheme']) && substr($file, 0, strlen(TR_BASE_HREF)) != TR_BASE_HREF) {
131 //              continue;
132 //      }
133
134         /* file should be relative to content. let's double check */
135         if ((substr($file, 0, 1) == '/')) {
136                 continue;
137         }
138         
139     // The URL of the movie from youtube.com has been converted above in embed_media().
140         // For example:  http://www.youtube.com/watch?v=a0ryB0m0MiM is converted to
141         // http://www.youtube.com/v/a0ryB0m0MiM to make it playable. This creates the problem
142         // that the parsed-out url (http://www.youtube.com/v/a0ryB0m0MiM) does not match with
143         // the URL saved in content table (http://www.youtube.com/watch?v=a0ryB0m0MiM).
144         // The code below is to convert the URL back to original.
145         $file = preg_replace("/(http:\/\/[a-z0-9\.]*)?youtube.com\/v\/(.*)/",
146                              "\\1youtube.com/watch?v=\\2", $file);
147         
148         $resources[$i] = $file;
149         $i++;
150 }
151 ?>