5 * phpDocumentor :: automatic documentation generator
\r
7 * PHP versions 4 and 5
\r
9 * Copyright (c) 2000-2007 Joshua Eichorn
\r
13 * This library is free software; you can redistribute it
\r
14 * and/or modify it under the terms of the GNU Lesser General
\r
15 * Public License as published by the Free Software Foundation;
\r
16 * either version 2.1 of the License, or (at your option) any
\r
19 * This library is distributed in the hope that it will be useful,
\r
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
22 * Lesser General Public License for more details.
\r
24 * You should have received a copy of the GNU Lesser General Public
\r
25 * License along with this library; if not, write to the Free Software
\r
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
28 * @category ToolsAndUtilities
\r
29 * @package phpDocumentor
\r
30 * @subpackage WordParsers
\r
31 * @author Joshua Eichorn <jeichorn@phpdoc.org>
\r
32 * @copyright 2000-2007 Joshua Eichorn
\r
33 * @license http://www.opensource.org/licenses/lgpl-license.php LGPL
\r
34 * @version CVS: $Id: WordParser.inc,v 1.5 2007/11/14 01:37:03 ashnazg Exp $
\r
35 * @link http://www.phpdoc.org
\r
36 * @link http://pear.php.net/PhpDocumentor
\r
38 * @todo CS cleanup - change package to PhpDocumentor
\r
42 * Retrieves tokens from source code for use by the Parser
\r
44 * @category ToolsAndUtilities
\r
45 * @package phpDocumentor
\r
46 * @subpackage WordParsers
\r
47 * @author Joshua Eichorn <jeichorn@phpdoc.org>
\r
48 * @copyright 2000-2007 Joshua Eichorn
\r
49 * @license http://www.opensource.org/licenses/lgpl-license.php LGPL
\r
50 * @version Release: 1.4.1
\r
51 * @link http://www.phpdoc.org
\r
52 * @link http://pear.php.net/PhpDocumentor
\r
54 * @todo CS cleanup - change package to PhpDocumentor
\r
59 New lines around the world
\r
69 * List of text that separates tokens, used to retrieve tokens
\r
72 var $wordseperators = array();
\r
75 * Position within input of the cursor pointing to the next text to be
\r
76 * retrieved as a token
\r
82 * Size of the input source code
\r
95 * Current line number
\r
100 * Position the cursor was at the last time line numbers were counted, used
\r
101 * to guarantee that line numbers are incremented
\r
104 var $linenumpos = 0;
\r
107 * Used for {@}source} tag, contains currently parsed function source
\r
112 * flag, determines whether tokens are added to {@link $source}
\r
115 var $getsource = false;
\r
118 * If true, then white space is returned as a part of tokens, otherwise
\r
119 * tokens are trimmed
\r
122 var $returnWhiteSpace = false;
\r
126 * Initialize the WordParser
\r
128 * @param string &$input source code
\r
132 function setup(&$input)
\r
134 $this->size = strlen($input);
\r
135 $this->data = & $input;
\r
137 $this->linenum = 0;
\r
138 $this->linenumpos = 0;
\r
139 $this->cache = array();
\r
141 //$this->word = WORD_PARSER_RET_WORD;
\r
145 * Retrieve source code for the last function/method
\r
149 function getSource()
\r
151 $source = $this->source;
\r
152 $this->source = '';
\r
153 $this->getsource = false;
\r
158 * Used to tell the WordParser to start retrieving source code
\r
160 * @param string $word source code
\r
165 function retrievesource($word = '')
\r
167 $this->source = $word;
\r
168 $this->getsource = true;
\r
172 * Retrieve a token from the token list
\r
174 * The {@link Parser} class relies upon this method to retrieve the next
\r
175 * token. The {@link $wordseperators} array is a collection of strings
\r
176 * that delineate tokens for the current parser state. $wordseperators
\r
177 * is set by the parser with a call to {@link Parser::configWordParser()}
\r
178 * every time a new parser state is reached.
\r
180 * For example, while parsing the source code for a class, the word
\r
181 * <code>var</code> is a token, and <code>global</code> is not,
\r
182 * but inside a function, the reverse is true. The parser state
\r
183 * {@link PARSER_STATE_CLASS} has a token list that includes whitespace,
\r
184 * code delimiters like ; and {}, and comment/DocBlock indicators
\r
186 * If the whitespace option has been turned off using
\r
187 * {@link setWhitespace()}, then no whitespace is returned with tokens
\r
190 * In the first segment of the function, the code attempts to find the next
\r
191 * token. A cache is used to speed repetitious tasks. The $tpos variable
\r
192 * is used to hold the position of the next token. $npos is used to
\r
193 * hold the end of the token, and so $npos - $tpos will give the length
\r
194 * of the token. This is used to allow tokens that contain whitespace,
\r
195 * should that option be desired.
\r
197 * {@link $data} is of course the string containing the PHP code to be
\r
198 * parsed, and {@link $pos} is the cursor, or current location within the
\r
202 * @return string|false the next token, an empty string if there are no
\r
203 * token separators in the $wordseperators array,
\r
204 * or false if the end of input has been reached
\r
208 //$st = $this->mtime();
\r
209 if ($this->size == $this->pos) {
\r
213 // assume, for starting, that the token is from $this->pos to the end
\r
214 $npos = $this->size;
\r
215 if (is_array($this->wordseperators)) {
\r
216 //$this->wordseperators = array();
\r
217 foreach ($this->wordseperators as $sep) {
\r
218 // cache is set if this separator has been tested
\r
219 if (isset($this->cache[$sep])) {
\r
220 $tpos = $this->cache[$sep];
\r
224 if ($tpos < $this->pos || !is_int($tpos)) {
\r
225 // find the position of the next token separator
\r
226 $tpos = strpos($this->data, $sep, $this->pos);
\r
229 // was a token separator found
\r
230 // that is closer to the current location?
\r
231 if ( ($tpos < $npos) && !($tpos === false)) {
\r
232 //echo trim($sep) . "=$tpos\n";
\r
233 // set the length of the token
\r
234 // to be from $this->pos to
\r
235 // the next token separator
\r
237 $seplen = strlen($sep);
\r
238 } else if (!($tpos === false)) {
\r
239 $this->cache[$sep] = $tpos;
\r
243 // no token separators, tell the parser to choose a new state
\r
247 $len = $npos - $this->pos;
\r
252 //$st3 = $this->mtime();
\r
253 $word = substr($this->data, $this->pos, $len);
\r
255 // Change random other os newlines to the unix one
\r
256 if ($word == "\r" || $word == "\r\n") {
\r
260 if ($this->linenumpos <= $this->pos) {
\r
261 $this->linenumpos = $this->pos + $len;
\r
262 $this->linenum += count(explode("\n", $word)) - 1;
\r
265 if ($this->getsource) {
\r
266 $this->source .= $word;
\r
268 $this->pos = $this->pos + $len;
\r
269 //$this->word = WORD_PARSER_RET_SEP;
\r
271 // Things like // commenats rely on the newline
\r
272 // to find their end so im going to have to return them
\r
273 // never return worthless white space /t ' '
\r
274 if ($this->returnWhiteSpace == false) {
\r
275 if (strlen(trim($word)) == 0 && $word != "\n") {
\r
276 $word = $this->getWord();
\r
279 //$this->time3 = $this->time3 + ($this->mtime() - $st3);
\r
280 //$this->time = $this->time + ($this->mtime() - $st);
\r
286 * Returns the current pointer position, or 1 character after the end of the word
\r
288 * @return int the position
\r
300 * @param integer $start starting position
\r
301 * @param integer $len length of block to retrieve
\r
303 * @return string the requested block of characters
\r
305 function getBlock($start, $len)
\r
307 return substr($this->data, $start, $len);
\r
311 * Sets the list of possible separator tokens
\r
313 * @param array &$seps array of strings that separate tokens
\r
316 * @uses $wordseperators
\r
318 function setSeperator(&$seps)
\r
320 $this->wordseperators = &$seps;
\r
324 * Set the internal cursor within the source code
\r
326 * @param integer $pos the position
\r
330 function setPos($pos)
\r
336 * Backup to the previous token so that it can be retrieved again in a new
\r
339 * Occasionally, a word will be passed to an event handler that should be
\r
340 * handled by another event handler. This method allows that to happen.
\r
342 * @param string $word token to back up to
\r
346 function backupPos($word)
\r
348 if ($this->getsource) $this->source =
\r
349 substr($this->source, 0, strlen($this->source) - 1);
\r
350 $this->pos = $this->pos - strlen($word);
\r
354 * set parser to return or strip whitespace
\r
356 * @param boolean $val flag to return or strip whitespace
\r
360 function setWhitespace($val = false)
\r
362 $this->returnWhiteSpace = $val;
\r