remove old readme
[atutor.git] / docs / include / lib / html_resource_parser.inc.php
1 <?php
2 /****************************************************************/
3 /* ATutor                                                                                                               */
4 /****************************************************************/
5 /* Copyright (c) 2002-2008 by Greg Gay, Cindy Qi Li,                    */
6 /* & Harris Wong                                                                                                */
7 /* Inclusive Design Institute                                   */
8 /* http://atutor.ca                                                                                             */
9 /*                                                              */
10 /* This program is free software. You can redistribute it and/or*/
11 /* modify it under the terms of the GNU General Public License  */
12 /* as published by the Free Software Foundation.                                */
13 /****************************************************************/
14 // $Id$
15
16 /**
17  * @param 
18  * @param   int     course id, which should be in $_SESSION, but during export to AContent,
19  *                  SESSION is not available, thus we will have to use parameters
20  */
21 function get_html_resources($text, $course_id=0) {
22         $resources = array();    
23     if ($course_id == 0){
24         $course_id = $_SESSION['course_id'];
25     }
26
27         $handler = new XML_HTMLSax_Handler();
28
29         $parser = new XML_HTMLSax();
30         $parser->set_object($handler);
31         $parser->set_element_handler('openHandler','closeHandler');
32
33         $parser->parse($text);
34
35         foreach ($handler->resources as $resource) {
36                 $url_parts = @parse_url($resource);
37
38                 if (isset($url_parts['scheme'])) {
39                         // we don't want full urls
40                         continue;
41                 }
42
43                 if ((substr($resource, 0, 1) == '/')) {
44                         // we don't want absolute urls
45                         continue;
46                 }
47
48                 // make sure this resource exists in this course's content directory:
49                 $resource_server_path = realpath(AT_CONTENT_DIR . $course_id. '/' . $resource);
50                 if (file_exists($resource_server_path) && is_file($resource_server_path)) {
51                         $resources[$resource] = $resource_server_path;
52                 }
53         }
54         return $resources;
55 }
56
57 /*
58         the following resources are to be identified:
59         even if some of these can't be images, they can still be files in the content dir.
60         theoretically the only urls we wouldn't deal with would be for a <!DOCTYPE and <form>
61
62         img             => src
63         a               => href                         // ignore if href doesn't exist (ie. <a name>)
64         object  => data | classid       // probably only want data
65         applet  => classid | archive                    // whatever these two are should double check to see if it's a valid file (not a dir)
66         link    => href
67         script  => src
68         form    => action
69         input   => src
70         iframe  => src
71 */
72 class XML_HTMLSax_Handler {
73         var $elements = array(  'img'    => 'src',
74                                                         'a'              => 'href',                             
75                                                         'object' =>  array('data',    'classid'),
76                                                         'applet' =>  array('classid', 'archive'),
77                                                         'link'   => 'href',
78                                                         'script' => 'src',
79                                                         'form'   => 'action',
80                                                         'input'  => 'src',
81                                                         'iframe' => 'src',
82                                                         'embed'  => 'src',
83                                                         'param'  => 'value');
84         var $resources = array();
85
86     function XML_HTMLSax_Handler() { 
87                 $this->resources = array();
88         }
89
90     function openHandler(& $parser,$name,$attrs) {
91                 $name = strtolower($name);
92                 $attrs = array_change_key_case($attrs, CASE_LOWER);
93
94                 /* check if this attribute specifies the files in different ways: (ie. java) */
95                 if (is_array($this->elements[$name])) {
96                         $items = $this->elements[$name];
97
98                         foreach ($items as $item) {
99                                 if ($attrs[$item] != '') {
100                                         /* some attributes allow a listing of files to include seperated by commas (ie. applet->archive). */
101                                         if (strpos($attrs[$item], ',') !== false) {
102                                                 $files = explode(',', $attrs[$item]);
103                                                 foreach ($files as $file) {
104                                                         $this->resources[] = trim($file);
105                                                 }
106                                         } else {
107                                                 $this->resources[] = $attrs[$item];
108                                         }
109                                 }
110                         }
111                 } else if (isset($this->elements[$name]) && ($attrs[$this->elements[$name]] != '')) {
112                         /* we know exactly which attribute contains the reference to the file. */
113                         $this->resources[] = $attrs[$this->elements[$name]];
114                 }
115     }
116     function closeHandler(& $parser,$name) { }
117 }
118 ?>