3 ======================================================================
\r
6 Simple yet powerfull PHP class to parse RSS files.
\r
8 by Vojtech Semecky, webmaster @ webdot . cz
\r
10 Latest version, features, manual and examples:
\r
11 http://lastrss.webdot.cz/
\r
13 ----------------------------------------------------------------------
\r
16 This program is free software; you can redistribute it and/or
\r
17 modify it under the terms of the GNU General Public License (GPL)
\r
18 as published by the Free Software Foundation; either version 2
\r
19 of the License, or (at your option) any later version.
\r
21 This program is distributed in the hope that it will be useful,
\r
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
24 GNU General Public License for more details.
\r
26 To read the license please visit http://www.gnu.org/copyleft/gpl.html
\r
27 ======================================================================
\r
32 * Simple yet powerfull PHP class to parse RSS files.
\r
35 // -------------------------------------------------------------------
\r
36 // Public properties
\r
37 // -------------------------------------------------------------------
\r
38 var $default_cp = 'UTF-8';
\r
39 var $CDATA = 'nochange';
\r
41 var $items_limit = 0;
\r
42 var $stripHTML = False;
\r
43 var $date_format = '';
\r
45 // -------------------------------------------------------------------
\r
46 // Private variables
\r
47 // -------------------------------------------------------------------
\r
48 var $channeltags = array ('title', 'link', 'description', 'language', 'copyright', 'managingEditor', 'webMaster', 'lastBuildDate', 'rating', 'docs');
\r
49 var $itemtags = array('title', 'link', 'description', 'author', 'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');
\r
50 var $imagetags = array('title', 'url', 'link', 'width', 'height');
\r
51 var $textinputtags = array('title', 'description', 'name', 'link');
\r
53 // -------------------------------------------------------------------
\r
54 // Parse RSS file and returns associative array.
\r
55 // Get() function modified by Heidi Hazelton
\r
56 // -------------------------------------------------------------------
\r
57 function Get ($rss_url, $rss_feed_id) {
\r
61 if(!isset($this->num_results)) {
\r
62 $this->num_results = 5;
\r
64 if (!isset($this->description)) {
\r
65 $this->description = FALSE;
\r
68 $result = $this->Parse($rss_url);
\r
69 if ($result && ($result['items_count'] == 0)) {
\r
71 } else if ($result) {
\r
72 $output = "<ul class='rss_feed'>";
\r
73 for ($i=0; $i < min($this->num_results, $result['items_count']) ; $i++) {
\r
74 $output .= "<li><a href='".$result['items'][$i]['link']."' target='_new'>".$result['items'][$i]['title']."</a>";
\r
75 if ($this->description) { $output .= "<br />".$result['items'][$i]['description']; }
\r
76 $output .= "</li>\n";
\r
78 $output .= "</ul>\n";
\r
80 } elseif (file_exists($cache_file)) {
\r
83 //create an empty file
\r
84 if ($f = @fopen($cache_file, 'w')) {
\r
92 // -------------------------------------------------------------------
\r
93 // Modification of preg_match(); return trimed field with index 1
\r
94 // from 'classic' preg_match() array output
\r
95 // -------------------------------------------------------------------
\r
96 function my_preg_match ($pattern, $subject) {
\r
97 // start regullar expression
\r
98 preg_match($pattern, $subject, $out);
\r
100 // if there is some result... process it and return it
\r
101 if(isset($out[1])) {
\r
102 // Process CDATA (if present)
\r
103 if ($this->CDATA == 'content') { // Get CDATA content (without CDATA tag)
\r
104 $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
\r
105 } elseif ($this->CDATA == 'strip') { // Strip CDATA
\r
106 $out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
\r
109 // If code page is set convert character encoding to required
\r
110 if ($this->cp != '')
\r
111 //$out[1] = $this->MyConvertEncoding($this->rsscp, $this->cp, $out[1]);
\r
112 $out[1] = iconv($this->rsscp, $this->cp.'//TRANSLIT', $out[1]);
\r
114 return trim($out[1]);
\r
116 // if there is NO result, return empty string
\r
121 // -------------------------------------------------------------------
\r
122 // Replace HTML entities &something; by real characters
\r
123 // -------------------------------------------------------------------
\r
124 function unhtmlentities ($string) {
\r
125 // Get HTML entities table
\r
126 $trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES);
\r
127 // Flip keys<==>values
\r
128 $trans_tbl = array_flip ($trans_tbl);
\r
129 // Add support for ' entity (missing in HTML_ENTITIES)
\r
130 $trans_tbl += array(''' => "'");
\r
131 // Replace entities by values
\r
132 return strtr ($string, $trans_tbl);
\r
135 // -------------------------------------------------------------------
\r
136 // Parse() is private method used by Get() to load and parse RSS file.
\r
137 // Don't use Parse() in your scripts - use Get($rss_file) instead.
\r
138 // -------------------------------------------------------------------
\r
139 function Parse ($rss_url) {
\r
140 // Open and load RSS file
\r
141 if ($f = @fopen($rss_url, 'r')) {
\r
143 while (!feof($f)) {
\r
144 $rss_content .= fgets($f, 4096);
\r
148 // Parse document encoding
\r
149 $result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si", $rss_content);
\r
150 // if document codepage is specified, use it
\r
151 if ($result['encoding'] != '')
\r
152 { $this->rsscp = $result['encoding']; } // This is used in my_preg_match()
\r
153 // otherwise use the default codepage
\r
155 { $this->rsscp = $this->default_cp; } // This is used in my_preg_match()
\r
157 // Parse CHANNEL info
\r
158 preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);
\r
159 foreach($this->channeltags as $channeltag)
\r
161 $temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si", $out_channel[1]);
\r
162 if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty
\r
164 // If date_format is specified and lastBuildDate is valid
\r
165 if ($this->date_format != '' && ($timestamp = strtotime($result['lastBuildDate'])) !==-1) {
\r
166 // convert lastBuildDate to specified date format
\r
167 $result['lastBuildDate'] = date($this->date_format, $timestamp);
\r
170 // Parse TEXTINPUT info
\r
171 preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);
\r
172 // This a little strange regexp means:
\r
173 // Look for tag <textinput> with or without any attributes, but skip truncated version <textinput /> (it's not beggining tag)
\r
174 if (isset($out_textinfo[2])) {
\r
175 foreach($this->textinputtags as $textinputtag) {
\r
176 $temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si", $out_textinfo[2]);
\r
177 if ($temp != '') $result['textinput_'.$textinputtag] = $temp; // Set only if not empty
\r
180 // Parse IMAGE info
\r
181 preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);
\r
182 if (isset($out_imageinfo[1])) {
\r
183 foreach($this->imagetags as $imagetag) {
\r
184 $temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si", $out_imageinfo[1]);
\r
185 if ($temp != '') $result['image_'.$imagetag] = $temp; // Set only if not empty
\r
189 preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);
\r
190 $rss_items = $items[2];
\r
192 $result['items'] = array(); // create array even if there are no items
\r
193 foreach($rss_items as $rss_item) {
\r
194 // If number of items is lower then limit: Parse one item
\r
195 if ($i < $this->items_limit || $this->items_limit == 0) {
\r
196 foreach($this->itemtags as $itemtag) {
\r
197 $temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);
\r
198 if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty
\r
200 // Strip HTML tags and other bullshit from DESCRIPTION
\r
201 if ($this->stripHTML && $result['items'][$i]['description'])
\r
202 $result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));
\r
203 // Strip HTML tags and other bullshit from TITLE
\r
204 if ($this->stripHTML && $result['items'][$i]['title'])
\r
205 $result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title'])));
\r
206 // If date_format is specified and pubDate is valid
\r
207 if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) {
\r
208 // convert pubDate to specified date format
\r
209 $result['items'][$i]['pubDate'] = date($this->date_format, $timestamp);
\r
216 $result['items_count'] = $i;
\r
219 else // Error in opening return False
\r