mods/phpdoc2/PhpDocumentor/phpDocumentor/WordParser.inc

   1 <?php\r
   2 /**\r
   3  * a generic lexer\r
   4  * \r
   5  * phpDocumentor :: automatic documentation generator\r
   6  * \r
   7  * PHP versions 4 and 5\r
   8  *\r
   9  * Copyright (c) 2000-2007 Joshua Eichorn\r
  10  * \r
  11  * LICENSE:\r
  12  * \r
  13  * This library is free software; you can redistribute it\r
  14  * and/or modify it under the terms of the GNU Lesser General\r
  15  * Public License as published by the Free Software Foundation;\r
  16  * either version 2.1 of the License, or (at your option) any\r
  17  * later version.\r
  18  * \r
  19  * This library is distributed in the hope that it will be useful,\r
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
  22  * Lesser General Public License for more details.\r
  23  * \r
  24  * You should have received a copy of the GNU Lesser General Public\r
  25  * License along with this library; if not, write to the Free Software\r
  26  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\r
  27  *\r
  28  * @category   ToolsAndUtilities\r
  29  * @package    phpDocumentor\r
  30  * @subpackage WordParsers\r
  31  * @author     Joshua Eichorn <jeichorn@phpdoc.org>\r
  32  * @copyright  2000-2007 Joshua Eichorn\r
  33  * @license    http://www.opensource.org/licenses/lgpl-license.php LGPL\r
  34  * @version    CVS: $Id: WordParser.inc,v 1.5 2007/11/14 01:37:03 ashnazg Exp $\r
  35  * @link       http://www.phpdoc.org\r
  36  * @link       http://pear.php.net/PhpDocumentor\r
  37  * @since      0.1\r
  38  * @todo       CS cleanup - change package to PhpDocumentor\r
  39  */\r
  40 \r
  41 /**\r
  42  * Retrieves tokens from source code for use by the Parser\r
  43  *\r
  44  * @category   ToolsAndUtilities\r
  45  * @package    phpDocumentor\r
  46  * @subpackage WordParsers\r
  47  * @author     Joshua Eichorn <jeichorn@phpdoc.org>\r
  48  * @copyright  2000-2007 Joshua Eichorn\r
  49  * @license    http://www.opensource.org/licenses/lgpl-license.php LGPL\r
  50  * @version    Release: 1.4.1\r
  51  * @link       http://www.phpdoc.org\r
  52  * @link       http://pear.php.net/PhpDocumentor\r
  53  * @see        Parser\r
  54  * @todo       CS cleanup - change package to PhpDocumentor\r
  55  */\r
  56 class WordParser\r
  57 {\r
  58     /*\r
  59     New lines around the world\r
  60     Macintosh: \r \r
  61         Unix : \n \r
  62     Windows : \r\n \r
  63      */\r
  64     \r
  65     /**#@+\r
  66      * @access private\r
  67      */\r
  68     /**\r
  69      * List of text that separates tokens, used to retrieve tokens\r
  70      * @var array\r
  71      */\r
  72     var $wordseperators = array();\r
  73     \r
  74     /**\r
  75      * Position within input of the cursor pointing to the next text to be\r
  76      * retrieved as a token\r
  77      * @var integer\r
  78      */\r
  79     var $pos = 0;\r
  80 \r
  81     /**\r
  82      * Size of the input source code\r
  83      * @var integer\r
  84      */\r
  85     var $size;\r
  86 \r
  87     /**\r
  88      * Source code\r
  89      * @var string\r
  90      */\r
  91     var $data;\r
  92 \r
  93     var $cache;\r
  94     /**\r
  95      * Current line number\r
  96      * @var integer\r
  97      */\r
  98     var $linenum = 0;\r
  99     /**\r
 100      * Position the cursor was at the last time line numbers were counted, used\r
 101      * to guarantee that line numbers are incremented\r
 102      * @var integer\r
 103      */\r
 104     var $linenumpos = 0;\r
 105     \r
 106     /**\r
 107      * Used for {@}source} tag, contains currently parsed function source\r
 108      * @var string\r
 109      */\r
 110     var $source = '';\r
 111     /**\r
 112      * flag, determines whether tokens are added to {@link $source}\r
 113      * @var boolean\r
 114      */\r
 115     var $getsource = false;\r
 116 \r
 117     /**\r
 118      * If true, then white space is returned as a part of tokens, otherwise\r
 119      * tokens are trimmed\r
 120      * @var boolean\r
 121      */\r
 122     var $returnWhiteSpace = false;\r
 123     /**#@-*/\r
 124 \r
 125     /**\r
 126      * Initialize the WordParser\r
 127      *\r
 128      * @param string &$input source code\r
 129      * \r
 130      * @return void\r
 131      */\r
 132     function setup(&$input)\r
 133     {\r
 134         $this->size       = strlen($input);\r
 135         $this->data       = & $input;\r
 136         $this->pos        = 0;\r
 137         $this->linenum    = 0;\r
 138         $this->linenumpos = 0;\r
 139         $this->cache      = array();\r
 140         //$this->run      = 0;\r
 141         //$this->word     = WORD_PARSER_RET_WORD;\r
 142     }\r
 143     \r
 144     /**\r
 145      * Retrieve source code for the last function/method\r
 146      *\r
 147      * @return string\r
 148      */\r
 149     function getSource()\r
 150     {\r
 151         $source          = $this->source;\r
 152         $this->source    = '';\r
 153         $this->getsource = false;\r
 154         return $source;\r
 155     }\r
 156     \r
 157     /**\r
 158      * Used to tell the WordParser to start retrieving source code\r
 159      *\r
 160      * @param string $word source code\r
 161      *\r
 162      * @return void\r
 163      * @access private\r
 164      */\r
 165     function retrievesource($word = '')\r
 166     {\r
 167         $this->source    = $word;\r
 168         $this->getsource = true;\r
 169     }\r
 170 \r
 171     /**\r
 172      * Retrieve a token from the token list\r
 173      *\r
 174      * The {@link Parser} class relies upon this method to retrieve the next\r
 175      * token.  The {@link $wordseperators} array is a collection of strings\r
 176      * that delineate tokens for the current parser state.  $wordseperators\r
 177      * is set by the parser with a call to {@link Parser::configWordParser()}\r
 178      * every time a new parser state is reached.\r
 179      *\r
 180      * For example, while parsing the source code for a class, the word\r
 181      * <code>var</code> is a token, and <code>global</code> is not,\r
 182      * but inside a function, the reverse is true.  The parser state\r
 183      * {@link PARSER_STATE_CLASS} has a token list that includes whitespace,\r
 184      * code delimiters like ; and {}, and comment/DocBlock indicators\r
 185      *\r
 186      * If the whitespace option has been turned off using\r
 187      * {@link setWhitespace()}, then no whitespace is returned with tokens\r
 188      *\r
 189      * {@internal\r
 190      * In the first segment of the function, the code attempts to find the next\r
 191      * token.  A cache is used to speed repetitious tasks.  The $tpos variable\r
 192      * is used to hold the position of the next token.  $npos is used to\r
 193      * hold the end of the token, and so $npos - $tpos will give the length\r
 194      * of the token.  This is used to allow tokens that contain whitespace,\r
 195      * should that option be desired.\r
 196      *\r
 197      * {@link $data} is of course the string containing the PHP code to be\r
 198      * parsed, and {@link $pos} is the cursor, or current location within the\r
 199      * parsed data.\r
 200      * }}\r
 201      *\r
 202      * @return string|false the next token, an empty string if there are no\r
 203      *                      token separators in the $wordseperators array,\r
 204      *                      or false if the end of input has been reached\r
 205      */\r
 206     function getWord()\r
 207     {\r
 208         //$st = $this->mtime();\r
 209         if ($this->size == $this->pos) {\r
 210             return false;\r
 211         }\r
 212 \r
 213         // assume, for starting, that the token is from $this->pos to the end\r
 214         $npos = $this->size;\r
 215         if (is_array($this->wordseperators)) {\r
 216             //$this->wordseperators = array();\r
 217             foreach ($this->wordseperators as $sep) {\r
 218                 // cache is set if this separator has been tested\r
 219                 if (isset($this->cache[$sep])) {\r
 220                     $tpos = $this->cache[$sep];\r
 221                 } else {\r
 222                     $tpos = false;\r
 223                 }\r
 224                 if ($tpos < $this->pos || !is_int($tpos)) {\r
 225                     // find the position of the next token separator\r
 226                     $tpos = strpos($this->data, $sep, $this->pos);\r
 227                 }\r
 228 \r
 229                 // was a token separator found \r
 230                 // that is closer to the current location?\r
 231                 if ( ($tpos < $npos) && !($tpos === false)) {\r
 232                     //echo trim($sep) . "=$tpos\n";\r
 233                     // set the length of the token \r
 234                     // to be from $this->pos to\r
 235                     // the next token separator\r
 236                     $npos   = $tpos;\r
 237                     $seplen = strlen($sep);\r
 238                 } else if (!($tpos === false)) {\r
 239                     $this->cache[$sep] = $tpos;\r
 240                 }\r
 241             }\r
 242         } else {\r
 243             // no token separators, tell the parser to choose a new state\r
 244             return "";\r
 245         }\r
 246 \r
 247         $len = $npos - $this->pos;\r
 248         if ($len == 0) {\r
 249             $len = $seplen;\r
 250         }\r
 251 \r
 252         //$st3 = $this->mtime();\r
 253         $word = substr($this->data, $this->pos, $len);\r
 254         \r
 255         // Change random other os newlines to the unix one\r
 256         if ($word == "\r" || $word == "\r\n") {\r
 257             $word = "\n";\r
 258         }\r
 259         \r
 260         if ($this->linenumpos <= $this->pos) {\r
 261             $this->linenumpos = $this->pos + $len;\r
 262             $this->linenum   += count(explode("\n", $word)) - 1;\r
 263         }\r
 264 \r
 265         if ($this->getsource) {\r
 266             $this->source .= $word;\r
 267         }\r
 268         $this->pos = $this->pos + $len;\r
 269         //$this->word = WORD_PARSER_RET_SEP;\r
 270 \r
 271         // Things like // commenats rely on the newline \r
 272         // to find their end so im going to have to return them\r
 273         // never return worthless white space /t ' '\r
 274         if ($this->returnWhiteSpace == false) {\r
 275             if (strlen(trim($word)) == 0 && $word != "\n") {\r
 276                 $word = $this->getWord();\r
 277             }\r
 278         }\r
 279         //$this->time3 = $this->time3 + ($this->mtime() - $st3);\r
 280         //$this->time = $this->time + ($this->mtime() - $st);\r
 281         return $word;\r
 282     }\r
 283     \r
 284 \r
 285     /**\r
 286      * Returns the current pointer position, or 1 character after the end of the word\r
 287      *\r
 288      * @return int the position\r
 289      */\r
 290     function getPos()\r
 291     {\r
 292         return $this->pos;\r
 293     }\r
 294 \r
 295     /**\r
 296      * Unused\r
 297      *\r
 298      * {@source}\r
 299      *\r
 300      * @param integer $start starting position\r
 301      * @param integer $len   length of block to retrieve\r
 302      *\r
 303      * @return string the requested block of characters\r
 304      */\r
 305     function getBlock($start, $len)\r
 306     {\r
 307         return substr($this->data, $start, $len);\r
 308     }\r
 309 \r
 310     /**\r
 311      * Sets the list of possible separator tokens\r
 312      *\r
 313      * @param array &$seps array of strings that separate tokens\r
 314      *\r
 315      * @return void\r
 316      * @uses $wordseperators\r
 317      */\r
 318     function setSeperator(&$seps)\r
 319     {\r
 320         $this->wordseperators = &$seps;\r
 321     }\r
 322 \r
 323     /**\r
 324      * Set the internal cursor within the source code\r
 325      *\r
 326      * @param integer $pos the position\r
 327      *\r
 328      * @return void\r
 329      */\r
 330     function setPos($pos)\r
 331     {\r
 332         $this->pos = $pos;\r
 333     }\r
 334     \r
 335     /**\r
 336      * Backup to the previous token so that it can be retrieved again in a new\r
 337      * context.\r
 338      *\r
 339      * Occasionally, a word will be passed to an event handler that should be\r
 340      * handled by another event handler.  This method allows that to happen.\r
 341      *\r
 342      * @param string $word token to back up to\r
 343      *\r
 344      * @return void\r
 345      */\r
 346     function backupPos($word)\r
 347     {\r
 348         if ($this->getsource) $this->source = \r
 349             substr($this->source, 0, strlen($this->source) - 1);\r
 350         $this->pos = $this->pos - strlen($word);\r
 351     }\r
 352 \r
 353     /**\r
 354      * set parser to return or strip whitespace\r
 355      *\r
 356      * @param boolean $val flag to return or strip whitespace\r
 357      *\r
 358      * @return void\r
 359      */\r
 360     function setWhitespace($val = false)\r
 361     {\r
 362         $this->returnWhiteSpace = $val;\r
 363     }\r
 364 }\r
 365 ?>\r