2 /************************************************************************/
4 /************************************************************************/
5 /* Copyright (c) 2010 */
6 /* Inclusive Design Institute */
8 /* This program is free software. You can redistribute it and/or */
9 /* modify it under the terms of the GNU General Public License */
10 /* as published by the Free Software Foundation. */
11 /************************************************************************/
13 function get_html_resources($text) {
18 $handler = new XML_HTMLSax_Handler();
20 $parser = new XML_HTMLSax();
21 $parser->set_object($handler);
22 $parser->set_element_handler('openHandler','closeHandler');
24 $parser->parse($text);
26 foreach ($handler->resources as $resource) {
27 $url_parts = @parse_url($resource);
29 if (isset($url_parts['scheme'])) {
30 // we don't want full urls
34 if ((substr($resource, 0, 1) == '/')) {
35 // we don't want absolute urls
39 // make sure this resource exists in this course's content directory:
40 $resource_server_path = realpath(TR_CONTENT_DIR . $_course_id. '/' . $resource);
41 if (file_exists($resource_server_path) && is_file($resource_server_path)) {
42 $resources[$resource] = $resource_server_path;
50 the following resources are to be identified:
51 even if some of these can't be images, they can still be files in the content dir.
52 theoretically the only urls we wouldn't deal with would be for a <!DOCTYPE and <form>
55 a => href // ignore if href doesn't exist (ie. <a name>)
56 object => data | classid // probably only want data
57 applet => classid | archive // whatever these two are should double check to see if it's a valid file (not a dir)
64 class XML_HTMLSax_Handler {
65 var $elements = array( 'img' => 'src',
67 'object' => array('data', 'classid'),
68 'applet' => array('classid', 'archive'),
76 var $resources = array();
78 function XML_HTMLSax_Handler() {
79 $this->resources = array();
82 function openHandler(& $parser,$name,$attrs) {
83 $name = strtolower($name);
84 $attrs = array_change_key_case($attrs, CASE_LOWER);
86 /* check if this attribute specifies the files in different ways: (ie. java) */
87 if (is_array($this->elements[$name])) {
88 $items = $this->elements[$name];
90 foreach ($items as $item) {
91 if ($attrs[$item] != '') {
92 /* some attributes allow a listing of files to include seperated by commas (ie. applet->archive). */
93 if (strpos($attrs[$item], ',') !== false) {
94 $files = explode(',', $attrs[$item]);
95 foreach ($files as $file) {
96 $this->resources[] = trim($file);
99 $this->resources[] = $attrs[$item];
103 } else if (isset($this->elements[$name]) && ($attrs[$this->elements[$name]] != '')) {
104 /* we know exactly which attribute contains the reference to the file. */
105 $this->resources[] = $attrs[$this->elements[$name]];
108 function closeHandler(& $parser,$name) { }