remove old readme
[atutor.git] / docs / mods / _standard / rss_feeds / classes / lastRSS.php
1 <?php\r
2 /*\r
3  ======================================================================\r
4  lastRSS 0.9.1\r
5  \r
6  Simple yet powerfull PHP class to parse RSS files.\r
7  \r
8  by Vojtech Semecky, webmaster @ webdot . cz\r
9  \r
10  Latest version, features, manual and examples:\r
11         http://lastrss.webdot.cz/\r
12 \r
13  ----------------------------------------------------------------------\r
14  LICENSE\r
15 \r
16  This program is free software; you can redistribute it and/or\r
17  modify it under the terms of the GNU General Public License (GPL)\r
18  as published by the Free Software Foundation; either version 2\r
19  of the License, or (at your option) any later version.\r
20 \r
21  This program is distributed in the hope that it will be useful,\r
22  but WITHOUT ANY WARRANTY; without even the implied warranty of\r
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
24  GNU General Public License for more details.\r
25 \r
26  To read the license please visit http://www.gnu.org/copyleft/gpl.html\r
27  ======================================================================\r
28 */\r
29 \r
30 /**\r
31 * lastRSS\r
32 * Simple yet powerfull PHP class to parse RSS files.\r
33 */\r
34 class lastRSS {\r
35         // -------------------------------------------------------------------\r
36         // Public properties\r
37         // -------------------------------------------------------------------\r
38         var $default_cp = 'UTF-8';\r
39         var $CDATA = 'nochange';\r
40         var $cp = '';\r
41         var $items_limit = 0;\r
42         var $stripHTML = False;\r
43         var $date_format = '';\r
44 \r
45         // -------------------------------------------------------------------\r
46         // Private variables\r
47         // -------------------------------------------------------------------\r
48         var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'lastBuildDate', 'rating', 'docs');\r
49         var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');\r
50         var $imagetags = array('title', 'url', 'link', 'width', 'height');\r
51         var $textinputtags = array('title', 'description', 'name', 'link');\r
52 \r
53         // -------------------------------------------------------------------\r
54         // Parse RSS file and returns associative array.\r
55         // Get() function modified by Heidi Hazelton\r
56         // -------------------------------------------------------------------\r
57         function Get ($rss_url, $rss_feed_id) {\r
58                 \r
59                 $output = '';\r
60 \r
61                 if(!isset($this->num_results)) {\r
62                         $this->num_results = 5;\r
63                 }\r
64                 if (!isset($this->description)) {\r
65                         $this->description = FALSE;\r
66                 }\r
67 \r
68                 $result = $this->Parse($rss_url);\r
69                 if ($result && ($result['items_count'] == 0)) {\r
70                         return null;\r
71                 } else if ($result) {\r
72                         $output = "<ul class='rss_feed'>";\r
73                         for ($i=0; $i < min($this->num_results, $result['items_count']) ; $i++) {\r
74                                 $output .= "<li><a href='".$result['items'][$i]['link']."' target='_new'>".$result['items'][$i]['title']."</a>";\r
75                                 if ($this->description) { $output .= "<br />".$result['items'][$i]['description']; }\r
76                                 $output .= "</li>\n";\r
77                         }\r
78                         $output .= "</ul>\n"; \r
79 \r
80                 } elseif (file_exists($cache_file)) {\r
81                         touch($cache_file);\r
82                 } else {\r
83                         //create an empty file\r
84                         if ($f = @fopen($cache_file, 'w')) {\r
85                                 fclose($f);\r
86                         }\r
87                 }\r
88 \r
89                 return $output;\r
90         }\r
91         \r
92         // -------------------------------------------------------------------\r
93         // Modification of preg_match(); return trimed field with index 1\r
94         // from 'classic' preg_match() array output\r
95         // -------------------------------------------------------------------\r
96         function my_preg_match ($pattern, $subject) {\r
97                 // start regullar expression\r
98                 preg_match($pattern, $subject, $out);\r
99 \r
100                 // if there is some result... process it and return it\r
101                 if(isset($out[1])) {\r
102                         // Process CDATA (if present)\r
103                         if ($this->CDATA == 'content') { // Get CDATA content (without CDATA tag)\r
104                                 $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));\r
105                         } elseif ($this->CDATA == 'strip') { // Strip CDATA\r
106                                 $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));\r
107                         }\r
108 \r
109                         // If code page is set convert character encoding to required\r
110                         if ($this->cp != '')\r
111                                 //$out[1] = $this->MyConvertEncoding($this->rsscp, $this->cp, $out[1]);\r
112                                 $out[1] = iconv($this->rsscp, $this->cp.'//TRANSLIT', $out[1]);\r
113                         // Return result\r
114                         return trim($out[1]);\r
115                 } else {\r
116                 // if there is NO result, return empty string\r
117                         return '';\r
118                 }\r
119         }\r
120 \r
121         // -------------------------------------------------------------------\r
122         // Replace HTML entities &something; by real characters\r
123         // -------------------------------------------------------------------\r
124         function unhtmlentities ($string) {\r
125                 // Get HTML entities table\r
126                 $trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES);\r
127                 // Flip keys<==>values\r
128                 $trans_tbl = array_flip ($trans_tbl);\r
129                 // Add support for &apos; entity (missing in HTML_ENTITIES)\r
130                 $trans_tbl += array('&apos;' => "'");\r
131                 // Replace entities by values\r
132                 return strtr ($string, $trans_tbl);\r
133         }\r
134 \r
135         // -------------------------------------------------------------------\r
136         // Parse() is private method used by Get() to load and parse RSS file.\r
137         // Don't use Parse() in your scripts - use Get($rss_file) instead.\r
138         // -------------------------------------------------------------------\r
139         function Parse ($rss_url) {\r
140                 // Open and load RSS file\r
141                 if ($f = @fopen($rss_url, 'r')) {\r
142                         $rss_content = '';\r
143                         while (!feof($f)) {\r
144                                 $rss_content .= fgets($f, 4096);\r
145                         }\r
146                         fclose($f);\r
147 \r
148                         // Parse document encoding\r
149                         $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content);\r
150                         // if document codepage is specified, use it\r
151                         if ($result['encoding'] != '')\r
152                                 { $this->rsscp = $result['encoding']; } // This is used in my_preg_match()\r
153                         // otherwise use the default codepage\r
154                         else\r
155                                 { $this->rsscp = $this->default_cp; } // This is used in my_preg_match()\r
156 \r
157                         // Parse CHANNEL info\r
158                         preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);\r
159                         foreach($this->channeltags as $channeltag)\r
160                         {\r
161                                 $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]);\r
162                                 if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty\r
163                         }\r
164                         // If date_format is specified and lastBuildDate is valid\r
165                         if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1) {\r
166                                                 // convert lastBuildDate to specified date format\r
167                                                 $result['lastBuildDate'] = date($this->date_format, $timestamp);\r
168                         }\r
169 \r
170                         // Parse TEXTINPUT info\r
171                         preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);\r
172                                 // This a little strange regexp means:\r
173                                 // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag)\r
174                         if (isset($out_textinfo[2])) {\r
175                                 foreach($this->textinputtags as $textinputtag) {\r
176                                         $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]);\r
177                                         if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty\r
178                                 }\r
179                         }\r
180                         // Parse IMAGE info\r
181                         preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);\r
182                         if (isset($out_imageinfo[1])) {\r
183                                 foreach($this->imagetags as $imagetag) {\r
184                                         $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]);\r
185                                         if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty\r
186                                 }\r
187                         }\r
188                         // Parse ITEMS\r
189                         preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);\r
190                         $rss_items = $items[2];\r
191                         $i = 0;\r
192                         $result['items'] = array(); // create array even if there are no items\r
193                         foreach($rss_items as $rss_item) {\r
194                                 // If number of items is lower then limit: Parse one item\r
195                                 if ($i < $this->items_limit || $this->items_limit == 0) {\r
196                                         foreach($this->itemtags as $itemtag) {\r
197                                                 $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);\r
198                                                 if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty\r
199                                         }\r
200                                         // Strip HTML tags and other bullshit from DESCRIPTION\r
201                                         if ($this->stripHTML && $result['items'][$i]['description'])\r
202                                                 $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));\r
203                                         // Strip HTML tags and other bullshit from TITLE\r
204                                         if ($this->stripHTML && $result['items'][$i]['title'])\r
205                                                 $result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title'])));\r
206                                         // If date_format is specified and pubDate is valid\r
207                                         if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) {\r
208                                                 // convert pubDate to specified date format\r
209                                                 $result['items'][$i]['pubDate'] = date($this->date_format, $timestamp);\r
210                                         }\r
211                                         // Item counter\r
212                                         $i++;\r
213                                 }\r
214                         }\r
215 \r
216                         $result['items_count'] = $i;\r
217                         return $result;\r
218                 }\r
219                 else // Error in opening return False\r
220                 {\r
221                         return False;\r
222                 }\r
223         }\r
224 }\r
225 \r
226 ?>