3 * tokenizer extension-based lexer for PHP code
\r
5 * phpDocumentor :: automatic documentation generator
\r
7 * PHP versions 4 and 5
\r
9 * Copyright (c) 2002-2006 Gregory Beaver
\r
13 * This library is free software; you can redistribute it
\r
14 * and/or modify it under the terms of the GNU Lesser General
\r
15 * Public License as published by the Free Software Foundation;
\r
16 * either version 2.1 of the License, or (at your option) any
\r
19 * This library is distributed in the hope that it will be useful,
\r
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
22 * Lesser General Public License for more details.
\r
24 * You should have received a copy of the GNU Lesser General Public
\r
25 * License along with this library; if not, write to the Free Software
\r
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
28 * @category ToolsAndUtilities
\r
29 * @package phpDocumentor
\r
30 * @subpackage Parsers
\r
31 * @author Gregory Beaver <cellog@php.net>
\r
32 * @copyright 2002-2007 Gregory Beaver
\r
33 * @license http://www.opensource.org/licenses/lgpl-license.php LGPL
\r
34 * @version CVS: $Id: phpDocumentorTWordParser.inc,v 1.8 2007/11/16 11:53:06 ashnazg Exp $
\r
35 * @link http://www.phpdoc.org
\r
36 * @link http://pear.php.net/PhpDocumentor
\r
38 * @todo CS cleanup - change package to PhpDocumentor
\r
39 * @todo CS cleanup - PHPCS needs to ignore CVS Id length
\r
43 * Like WordParser, but expects an array of tokens from the tokenizer
\r
44 * instead of a string.
\r
46 * @category ToolsAndUtilities
\r
47 * @package phpDocumentor
\r
48 * @subpackage WordParsers
\r
49 * @author Gregory Beaver <cellog@php.net>
\r
50 * @copyright 2002-2007 Gregory Beaver
\r
51 * @license http://www.opensource.org/licenses/lgpl-license.php LGPL
\r
52 * @version Release: 1.4.1
\r
53 * @link http://www.phpdoc.org
\r
54 * @link http://pear.php.net/PhpDocumentor
\r
56 * @todo CS cleanup - change package to PhpDocumentor
\r
57 * @todo CS cleanup - change classname to PhpDocumentor_*
\r
59 class phpDocumentorTWordParser extends WordParser
\r
65 * tokenized array from {@link token_get_all()}
\r
70 * List of tokens that can contain a newline
\r
73 var $_nl_check = array(
\r
75 T_ENCAPSED_AND_WHITESPACE,
\r
76 T_CONSTANT_ENCAPSED_STRING,
\r
85 var $_global_search;
\r
87 * current source line number (relative)
\r
92 * Source of the entire file, parsed into arrays of tokens on each line
\r
95 var $_file_source = array();
\r
97 * Line number the last comment was on
\r
100 var $_docblock_linenum;
\r
104 * Uses {@link token_get_all()} to tokenize the source code.
\r
106 * Also, it divides the source tokens into separate lines for use by
\r
107 * the @filesource tag.
\r
111 * @param string &$input source code
\r
115 function setup(&$input)
\r
117 $input = rtrim(ltrim($input, "\r\n"));
\r
118 $this->data = &$input;
\r
119 // fix php warnings on invalid source code
\r
120 $this->_all = @token_get_all($input);
\r
121 $this->_file_source = array();
\r
122 $this->addFileSource($this->_all);
\r
123 $this->_sourceline = 0;
\r
125 $this->linenum = 0;
\r
129 * loads up next set of source code
\r
131 * @return array source code array
\r
133 function getSource()
\r
135 $source = $this->source;
\r
136 $this->source = array();
\r
137 $this->getsource = false;
\r
142 * gets the source code tokens
\r
144 * @return array source code tokens split up by line number
\r
146 function getFileSource()
\r
148 return $this->_file_source;
\r
152 * Begin retrieving source code
\r
154 * @param string $word word to add the beginning of source code
\r
158 * @todo CS cleanup - rename to retrieveSource for camelCase rule
\r
160 function retrievesource($word = '')
\r
162 $this->source = array(array($word));
\r
163 $this->_sourceline = 0;
\r
164 $this->getsource = true;
\r
168 * Utility function to determine whether two tokens from the tokenizer are equal
\r
170 * @param mixed $a first token
\r
171 * @param mixed $b second token
\r
173 * @return bool whether or not the tokens are equal
\r
176 function tokenEquals($a, $b)
\r
178 if (is_array($a)) $a = $a[1];
\r
179 if (is_array($b)) $b = $b[1];
\r
184 * Utility function to convert a series of tokens into a string
\r
186 * @param array $a array of tokens
\r
188 * @return string the resulting string
\r
191 function concatTokens($a)
\r
194 foreach ($a as $c) {
\r
195 if (is_array($c)) {
\r
204 * Retrieve a token for the phpDocumentorTParser
\r
206 * This method adds source code to the array for a function to be returned
\r
207 * to a {@}source} tag, and will return the token unless it is T_WHITESPACE
\r
208 * and {@link $returnWhiteSpace} is false.
\r
210 * The global variable search is more complicated than it is in the
\r
211 * WordParser, as we have to compare an array of tokens to each other, and
\r
212 * that is what this code does}}
\r
214 * @return string|array token from tokenizer
\r
218 if (!isset($this->_all[$this->pos])) {
\r
222 $oldlinenum = $this->linenum;
\r
223 $word = $this->_all[$this->pos++];
\r
225 // if we're looking for a global variable declaration, then this section
\r
226 // will search the upcoming tokens to see if they match the tokens
\r
227 // that define the global variable
\r
228 if (isset($this->_global_search)) {
\r
232 if ($this->tokenEquals($word, $this->_global_search[$gpos++])) {
\r
234 for (;$gpos<count($this->_global_search);$gpos++, $pos++) {
\r
235 if (isset($this->_all[$pos]) &&
\r
236 !$this->tokenEquals($this->_global_search[$gpos],
\r
244 $a = $this->concatTokens($this->_global_search);
\r
245 $this->pos += count($this->_global_search) - 1;
\r
246 unset($this->_global_search);
\r
250 if ($this->getsource) {
\r
251 $this->addSource($word);
\r
253 if (is_array($word)) {
\r
254 if (in_array($word[0], $this->_nl_check)) {
\r
255 $this->linenum += substr_count($word[1], "\n");
\r
257 if ($word[0] == T_WHITESPACE && !$this->returnWhiteSpace) {
\r
258 return $this->getWord();
\r
260 // seeing if we can get line numbers out of the beast
\r
262 if (is_array($word) && $word[0] == T_COMMENT) {
\r
263 $this->_docblock_linenum = $oldlinenum;
\r
269 * Wrapper for {@link addSource()} used to retrieve the entire source code
\r
270 * organized by line number in setup()
\r
272 * @param array $word full file source code
\r
276 function addFileSource($word)
\r
278 $this->_sourceline = 0;
\r
279 foreach ($word as $token) {
\r
280 $this->addSource($token, true);
\r
282 // var_dump($this->_file_source);
\r
286 * Generate source token arrays organized by line number
\r
288 * This code will split up tokens that contain "\n" and add them to the
\r
289 * source code as separate tokens on different lines.
\r
291 * @param array|string $word token to add
\r
292 * @param bool $file true if this should be added
\r
293 * to {@link $_file_source}
\r
296 * @uses _set_sars()
\r
298 function addSource($word, $file = false)
\r
300 if (is_array($word)) {
\r
301 $lines = str_replace("\r", '', explode("\n", $word[1]));
\r
302 foreach ($lines as $i => $line) {
\r
303 $this->_set_sars($file, array($word[0], $line));
\r
304 if ($i < count($lines) - 1) {
\r
305 // increment sourceline
\r
306 $this->_sourceline++;
\r
310 $this->_set_sars($file, $word);
\r
315 * Add tokens to source code
\r
319 * @param bool $type true if this is file source,
\r
320 * otherwise it is function source
\r
321 * @param string|array $word token to add
\r
325 * @todo CS cleanup - rename to _setSars for camelCasing rule
\r
327 function _set_sars($type, $word)
\r
330 $this->_file_source[$this->_sourceline][] = $word;
\r
332 $this->source[$this->_sourceline][] = $word;
\r
337 * Tell the phpDocumentorTWordParser to return the entire global variable
\r
340 * @param array $tokens tokens that represent the global variable definition
\r
343 * @uses $_global_search
\r
345 function findGlobal($tokens)
\r
348 unset($this->_global_search);
\r
350 $this->_global_search = $tokens;
\r
355 * backs the parser up to the previous position
\r
357 * @return int|void can return a word
\r
359 function backupPos()
\r
362 $word = $this->_all[$this->pos];
\r
363 if ($this->getsource) {
\r
364 unset($this->source[$this->_sourceline]
\r
365 [count($this->source[$this->_sourceline]) - 1]);
\r
366 if (empty($this->source[$this->_sourceline])) {
\r
367 unset($this->source[$this->_sourceline]);
\r
369 $this->source[$this->_sourceline]
\r
370 = array_values($this->source[$this->_sourceline]);
\r
373 if (is_array($word)) {
\r
374 if ($word[0] == T_WHITESPACE && !$this->returnWhiteSpace) {
\r
375 return $this->getWord();
\r
377 // seeing if we can get line numbers out of the beast
\r
378 if (in_array($word[0], $this->_nl_check)) {
\r
379 $this->linenum -= substr_count($word[1], "\n");
\r