3 * Provides basic parser functions.
\r
5 * Provides basic parser functions to extract doc comments, analyse tags and variable
\r
8 * @version $Id: PhpdocParserCore.php,v 1.3 2000/12/03 22:37:37 uw Exp $
\r
10 class PhpdocParserCore extends PhpdocParserTags {
\r
13 * Scans code for documented and undocumented phpdoc keywords (classes, functions, class variables, uses, constants).
\r
15 * This method is somewhat the heart of the phpdoc parser. It takes a string of
\r
16 * phpcode and extracts all classes, functions, class variables, uses (include and friends),
\r
17 * and constants (define) from it. Extract does not mean that the whole class or another element
\r
18 * gets extracted. It does not take the code from the class definition and it's opening
\r
19 * curly brace to the closing one. PHPDoc just extracts the class definition itself and
\r
20 * if available a trailing doc comment. This has some drawbacks: phpdoc can't handle
\r
21 * files that contain more than one class it wouldn't know which method/class variable belongs to
\r
22 * a certain class. It's possible to provide a workaround but phpdoc would slow down dramatically.
\r
23 * As PHPDoc does not have a real parser but does a simple grep using a bunch of regular expressions
\r
24 * there're indeed more limitations. Nevertheless I doubt that you'll have problems with "normal" code.
\r
26 * The search algorithm looks pretty strange but belive me it's fast. I have tried several other ways
\r
27 * (really complex regexps >500 chars, preg_match_all + looking backwards for comments, ...) but none was
\r
28 * faster. This one takes 13s on my machine to scan the current (14/08/2000) code (7130 lines), the
\r
29 * big RegExp way took more than 5 Minutes, the preg_match_all + looking backwards 52s.
\r
31 * @param string PHP code to scan.
\r
32 * @param mixed String of one keyword or array of keywords not to scan for. Known keywords are:
\r
33 * "classes", "functions", "variables", "uses", "consts".
\r
34 * @return array Hash of phpdoc elements found, indexed by "variables", "functions", "classes", "consts", "uses".
\r
35 * @see $PHP_BASE, $PHP_COMPLEX, $C_BASE, $C_COMPLEX, extractPhpdoc(), getModuleDoc()
\r
37 function getPhpdocParagraphs($phpcode, $keywords="none") {
\r
39 // what are we not looking for?
\r
40 if ( !is_array($keywords) ) {
\r
41 if ("none" == $keywords)
\r
42 $keywords = array ();
\r
44 $keywords = array ( $keywords => true );
\r
48 $paragraphs = array(
\r
49 "classes" => array(),
\r
50 "functions" => array(),
\r
51 "variables" => array(),
\r
52 "consts" => array(),
\r
54 "modules" => array()
\r
58 // remember the documented elements to be able to compare with the list of all elements
\r
59 $variables = array();
\r
60 $functions = array();
\r
61 $variables = array();
\r
62 $constants = array();
\r
66 // Module docs are somewhat more difficult to grep. Always
\r
67 // use this function.
\r
69 if (!isset($keywords["modules"]))
\r
70 list($paragraphs["modules"], $phpcode) = $this->getModuleDoc($phpcode);
\r
72 list( , $phpcode) = $this->getModuleDoc($phpcode);
\r
75 // Find documented elements
\r
80 $start = strpos($phpcode, "/**", $start);
\r
81 if (0==(int)$start && "integer" != gettype($start) )
\r
84 $end = strpos($phpcode, "*/", $start);
\r
85 $remaining = trim(substr($phpcode, $end+2));
\r
87 if ( !isset($keywords["classes"]) && preg_match($this->PHP_COMPLEX["class"], $remaining, $regs) || preg_match($this->PHP_COMPLEX["class_extends"], $remaining, $regs)) {
\r
89 $paragraphs["classes"][] = array(
\r
91 "extends" => (isset($regs[2])) ? $regs[2] : "",
\r
92 "doc" => $this->extractPhpdoc(substr($phpcode, $start+3, ($end-$start)-2))
\r
94 $classes[$regs[1]] = true;
\r
96 } else if ( !isset($keywords["functions"]) && preg_match($this->PHP_COMPLEX["function"], $remaining, $regs)) {
\r
98 $head = substr($remaining, strpos($remaining, $regs[0])+strlen($regs[0]));
\r
99 $head = substr( trim($this->getValue($head, array( "{" => true) )), 0, -1);
\r
100 $paragraphs["functions"][] = array(
\r
101 "name" => $regs[1],
\r
102 "doc" => $this->extractPhpdoc( substr($phpcode, $start+3, ($end-$start)-2) ),
\r
105 $functions[$regs[1]] = true;
\r
107 } else if ( !isset($keywords["variables"]) && preg_match($this->PHP_COMPLEX["var"], $remaining, $regs)) {
\r
109 if ("=" == $regs[2])
\r
110 $value = trim($this->getValue( substr($remaining, strpos($remaining, $regs[0])+strlen($regs[0]) ), array( ";" => true)));
\r
114 $paragraphs["variables"][] = array(
\r
115 "name" => $regs[1],
\r
117 "doc" => $this->extractPhpdoc(substr($phpcode, $start+3, ($end-$start)-2))
\r
119 $variables[$regs[1]] = true;
\r
121 } else if ( !isset($keywords["consts"]) && preg_match($this->PHP_COMPLEX["const"], $remaining, $regs) ) {
\r
123 $name = (""!=$regs[2]) ? substr($regs[1], 1, -1) : $regs[1];
\r
125 if (isset($regs[5])) {
\r
127 $case = "case insensitive, userdefined: '$regs[5]'";
\r
129 $case = "case sensitive, userdefined: '$regs[5]'";
\r
131 $case = "default: case sensitive";
\r
134 $paragraphs["consts"][] = array(
\r
136 "value" => (""!=$regs[4]) ? substr($regs[3], 1, -1) : $regs[3],
\r
138 "doc" => $this->extractPhpdoc(substr($phpcode, $start+3, ($end-$start)-2))
\r
140 $constants[$name] = true;
\r
142 } else if ( !isset($keywords["uses"]) && preg_match($this->PHP_COMPLEX["use"], $remaining, $regs)) {
\r
144 $filename = isset($regs[5]) ? $regs[5] : $regs[4];
\r
145 $paragraphs["uses"][] = array(
\r
146 "type" => $regs[1],
\r
147 "file" => $filename,
\r
148 "doc" => $this->extractPhpdoc(substr($phpcode, $start+3, ($end-$start)-2))
\r
150 $uses[$filename] = true;
\r
158 // Find undocumented elements
\r
160 if (!isset($keywords["classes"])) {
\r
162 preg_match_all($this->PHP_COMPLEX["undoc_class"], $phpcode, $regs, PREG_SET_ORDER);
\r
164 while (list($k, $data)=each($regs))
\r
165 if (!isset($classes[$data[1]]))
\r
166 $paragraphs["classes"][] = array(
\r
167 "name" => $data[1],
\r
172 preg_match_all($this->PHP_COMPLEX["undoc_class_extends"], $phpcode, $regs, PREG_SET_ORDER);
\r
174 while (list($k, $data)=each($regs))
\r
175 if (!isset($classes[$data[1]]))
\r
176 $paragraphs["classes"][] = array(
\r
177 "name" => $data[1],
\r
178 "extends" => $data[2],
\r
184 if (!isset($keywords["functions"])) {
\r
186 preg_match_all($this->PHP_COMPLEX["undoc_function"], $phpcode, $regs, PREG_SET_ORDER);
\r
188 while (list($k, $data)=each($regs))
\r
189 if (!isset($functions[$data[1]])) {
\r
191 $head = substr($phpcode, strpos($phpcode, $data[0])+strlen($data[0]));
\r
192 $head = substr( trim( $this->getValue($head, array( "{" => true) )), 0, -1);
\r
193 $paragraphs["functions"][] = array(
\r
194 "name" => $data[1],
\r
203 if (!isset($keywords["variables"])) {
\r
205 preg_match_all($this->PHP_COMPLEX["undoc_var"], $phpcode, $regs, PREG_SET_ORDER);
\r
207 while (list($k, $data)=each($regs))
\r
208 if (!isset($variables[$data[1]])) {
\r
210 if ("=" == $data[2])
\r
211 $value = trim($this->getValue( substr($phpcode, strpos($phpcode, $data[0])+strlen($data[0]) ), array( ";" => true)));
\r
215 $paragraphs["variables"][] = array(
\r
216 "name" => $data[1],
\r
223 if (!isset($keywords["consts"])) {
\r
225 preg_match_all($this->PHP_COMPLEX["undoc_const"], $phpcode, $regs, PREG_SET_ORDER);
\r
227 while (list($k, $data)=each($regs)) {
\r
229 $name = (""!=$data[2]) ? substr($data[1], 1, -1) : $data[1];
\r
230 if (!isset($constants[$name])) {
\r
232 if (isset($data[5])) {
\r
234 $case = "case insensitive, userdefined: '$data[5]'";
\r
236 $case = "case sensitive, userdefined: '$data[5]'";
\r
238 $case = "default: case sensitive";
\r
241 $paragraphs["consts"][] = array(
\r
243 "value" => (""!=$data[4]) ? substr($data[3], 1, -1) : $data[3],
\r
251 if (!isset($keywords["uses"])) {
\r
253 preg_match_all($this->PHP_COMPLEX["undoc_use"], $phpcode, $regs, PREG_SET_ORDER);
\r
256 while (list($k, $data)=each($regs)) {
\r
258 $filename = isset($data[5]) ? $data[5] : $data[4];
\r
259 if (!isset($uses[$filename])) {
\r
261 $paragraphs["uses"][] = array(
\r
262 "type" => $data[1],
\r
263 "file" => $filename,
\r
272 return $paragraphs;
\r
273 } // end func getPhpdocParagraphs
\r
276 * Does a quick prescan to find modules an classes.
\r
277 * @param string Code to scan
\r
278 * @return array Hash of modules and classes found in the given code
\r
280 * @see getPhpdocParagraphs()
\r
282 function getModulesAndClasses($phpcode) {
\r
285 list( $para["modules"], $phpdcode) = $this->getModuleDoc($phpcode);
\r
286 $para["classes"] = $this->getClasses($phpcode);
\r
289 } // end func getModulesAndClasses
\r
292 * Tries to extract a module doc.
\r
294 * The syntax for modules is not final yet. The implementation and meaning of "module"
\r
295 * might change at every time! Please do not ask for implementation details.
\r
297 * @param string PHP Code to scan
\r
298 * @return array $module $module[0] = array with module data,
\r
299 * $module[1] = php code without the leading module doc
\r
301 function getModuleDoc($phpcode) {
\r
305 if (preg_match($this->C_COMPLEX["module_doc"], $phpcode, $regs) ) {
\r
307 $start = strlen($regs[0]);
\r
308 $end = strpos($phpcode, "*/", $start);
\r
309 $remaining = substr($phpcode, $end+2);
\r
310 $doc_comment= substr($phpcode, $start, $end-$start);
\r
312 // Do we have OO Code? If not, continue.
\r
313 if ( !preg_match($this->PHP_COMPLEX["class"], $remaining) && !preg_match($this->PHP_COMPLEX["class_extends"], $remaining) ) {
\r
315 // Is there a module tag?
\r
316 if ( preg_match($this->C_COMPLEX["module_tags"], $doc_comment) ) {
\r
318 $doc_comment = $this->extractPhpDoc($doc_comment);
\r
319 $tags = $this->getTags( $doc_comment);
\r
322 "modulegroup" => true
\r
325 $tags = $this->analyseTags( $tags, array(), array( "module" => true, "modulegroup" => true) );
\r
328 "doc" => $doc_comment,
\r
330 "name" => (isset($tags["module"])) ? $tags["module"] : "",
\r
331 "group" => (isset($tags["modulegroup"])) ? $tags["modulegroup"] : ""
\r
337 // Try the remaining keywords. If one matches it's not a module doc
\r
338 // assume that the module doc is missing. If none matches assume that
\r
339 // it's a module doc which lacks the module tags.
\r
340 if ( preg_match($this->PHP_COMPLEX["function"], $remaining) ||
\r
341 preg_match($this->PHP_COMPLEX["use"], $remaining) ||
\r
342 preg_match($this->PHP_COMPLEX["const"], $remaining) ||
\r
343 preg_match($this->PHP_COMPLEX["var"], $remaining)
\r
348 "status" => "missing",
\r
352 $remaining = $phpcode;
\r
357 "doc" => $doc_comment,
\r
358 "status" => "tags missing",
\r
365 } // end if module_tags
\r
369 $remaining = $phpcode;
\r
375 $remaining = $phpcode;
\r
379 return array($module, $remaining);
\r
380 } // end func getModuleDoc
\r
383 * Returns a list of classes found in the given code.
\r
385 * In early versions PHPdoc parsed all the code at once which restulted in huge
\r
386 * memory intensive hashes. Now it scans for classes, builds a classtree and
\r
387 * does the parsing step by step, writing information to the destination
\r
388 * (renderer, exporter) as soon as possible. This reduces the memory consumption
\r
389 * dramatically. getPhpdocParagraphs() could be used to extract the class definitions
\r
390 * as well but this specialized function is somewhat faster.
\r
392 * @param string PHP code to scan.
\r
393 * @return array $classes Array of classes found in the code. $classes[classname] = extends
\r
395 function getClasses($phpcode) {
\r
397 $classes = array();
\r
399 preg_match_all($this->PHP_COMPLEX["undoc_class"], $phpcode, $regs, PREG_SET_ORDER);
\r
401 while (list($k, $data)=each($regs))
\r
402 $classes[] = array(
\r
403 "name" => $data[1],
\r
407 preg_match_all($this->PHP_COMPLEX["undoc_class_extends"], $phpcode, $regs, PREG_SET_ORDER);
\r
409 while (list($k, $data)=each($regs))
\r
410 $classes[] = array(
\r
411 "name" => $data[1],
\r
412 "extends" => $data[2]
\r
416 } // end func getClasses
\r
419 * Strips "/xx", "x/" and x from doc comments (x means asterix).
\r
420 * @param string Doc comment to clean up.
\r
421 * @return string $phpdoc
\r
423 function extractPhpdoc($paragraph) {
\r
425 $lines = split( $this->PHP_BASE["break"], $paragraph);
\r
429 while (list($k, $line)=each($lines)) {
\r
431 $line = trim($line);
\r
435 if ("*" == $line[0])
\r
436 $phpdoc.= trim(substr($line, 1))."\n";
\r
438 $phpdoc.= $line."\n";
\r
442 return substr($phpdoc, 0, -1);
\r
443 } // end func extractPhpdoc
\r
446 * Extract the description from a PHPDoc doc comment.
\r
448 * Every PHPDoc doc comment has the same syntax: /xx[break][x]short description
\r
449 * [break][[x]multiple line long description[break]][[x]@list of tags[. This function
\r
450 * returns an array of the short description and long description.
\r
452 * @param string Doc comment to examine.
\r
453 * @return array $description $description[0] = short description (first line),
\r
454 * $description[1] = long description (second line upto the first tag)
\r
456 function getDescription($phpdoc) {
\r
458 // find the position of the first doc tag
\r
459 $positions = $this->getTagPos($phpdoc);
\r
461 if (0 == count($positions))
\r
462 $desc = trim($phpdoc); // no doc tags
\r
464 $desc = trim(substr($phpdoc, 0, $positions[0]["pos"])); // strip tags
\r
466 $lines = split($this->PHP_BASE["break"], $desc);
\r
468 if (1 == count($lines) || "" == $desc) {
\r
470 // only a short description but no long description - or even none of both
\r
471 $description = array ($desc, "");
\r
475 $sdesc = trim($lines[0]);
\r
478 $description = array ( $sdesc, implode("", $lines) );
\r
482 return $description;
\r
483 } // end func getDescription
\r
486 * Scans a code passage for a value.
\r
488 * There some cases where you can hardly use a regex to grep a value
\r
489 * because the value might contain unescaped charaters that end the value.
\r
490 * Value means something like "array ( ";", '\;' );" or "'phpdoc; ';" where
\r
491 * the delimiter would be ";".
\r
493 * @param string The php code to examine.
\r
494 * @param mixed String of one delimiter or array of delimiters.
\r
495 * @return string Value found in the code
\r
496 * @todo Racecondition: comments
\r
498 function getValue($code, $delimiter) {
\r
502 if (!is_array($delimiter))
\r
503 $delimiter = array( $delimiter => true );
\r
505 $code = trim($code);
\r
506 $len = strlen($code);
\r
510 if ( isset($delimiter[$code[0]]) ) {
\r
516 for ($i=0; $i<$len; $i++) {
\r
520 if (('"'==$char || "'"==$char) && ($char == $enclosed_by || ""==$enclosed_by) && (0==$i || ($i>0 && "\\"!=$code[$i-1]))) {
\r
523 $enclosed_by = $char;
\r
527 $enclosed = !$enclosed;
\r
530 if (!$enclosed && isset($delimiter[$char]))
\r
537 return substr($code, 0, $i);
\r
538 } // end func getValue
\r
541 * Analyses a code snipped and returns the type and value of the first variable found.
\r
543 * With version 0.3 PHPDoc tries to analyse variable declarations to find
\r
544 * type and value. This is used to analyse class variable declarations and
\r
545 * optional function arguments.
\r
547 * Note that all regular expressions in this function start with "^". That means
\r
548 * you have to do some preparations to the code snippet you're passing to this
\r
551 * @param string PHP code to analyse
\r
552 * @param boolean Flag indicating the "type" of code to analyse. Optional
\r
553 * function parameters and class variables have a slightly
\r
554 * different syntax for arrays. By default function parameters
\r
556 * @return array $vartype $vartype[0] = type, $vartype[1] = value, $vartype[2] = raw value
\r
558 function getVariableTypeAndValue($code, $flag_args = true) {
\r
561 $value = "unknown";
\r
562 $raw_value = $code;
\r
565 // Do not change the order the function tries to find out the type.
\r
568 if (preg_match( $this->PHP_COMPLEX["type_boolean"], $code, $regs)) {
\r
571 $raw_value = $regs[0];
\r
574 } else if (preg_match( $this->PHP_COMPLEX["type_string_enclosed"], $code, $regs)) {
\r
577 $raw_value = $regs[0];
\r
580 } else if (preg_match( $this->PHP_COMPLEX["type_int_oct"], $code, $regs)) {
\r
582 $type = "integer (octal)";
\r
583 $raw_value = $regs[0];
\r
584 $value = preg_replace("@\s@", "", $regs[0]);
\r
585 if ( (int)$value != $value )
\r
586 $type.= " [warning: out of integer range, possible overflow trouble]";
\r
587 $value = octdec($value)." ($value)";
\r
590 } else if (preg_match( $this->PHP_COMPLEX["type_int_hex"], $code, $regs)) {
\r
592 $type = "integer (hexadecimal)";
\r
593 $raw_value = $regs[0];
\r
594 $value = preg_replace("@\s@", "", $regs[0]);
\r
595 if ( (int)$value != $value )
\r
596 $type.= " [warning: out of integer range, possible overflow trouble]";
\r
597 $value = hexdec($value)." ($value)";
\r
599 } else if (preg_match( $this->PHP_COMPLEX["type_float_exponent"], $code, $regs)) {
\r
602 $raw_value = $regs[0];
\r
603 $value = (string)preg_replace("@\s@", "", $regs[0]);
\r
604 if ( (float)$value != $value )
\r
605 $type.= " [warning: out of float range]";
\r
606 $value = (float)$value;
\r
608 } else if (preg_match( $this->PHP_COMPLEX["type_float"], $code, $regs)) {
\r
611 $raw_value = $regs[0];
\r
612 $value = preg_replace("@\s@", "", $regs[0]);
\r
613 if ( (float)$value != $value )
\r
614 $type.= " [warning: out of float range]";
\r
615 $value = (float)$value;
\r
617 } else if (preg_match( $this->PHP_COMPLEX["type_number"], $code, $regs)) {
\r
619 $value = preg_replace("@\s@", "", $regs[0]);
\r
620 $raw_value = $regs[0];
\r
622 if ( (int)$value == $value ) {
\r
625 $value = (int)$value;
\r
630 if ( (float)$value != $value )
\r
631 $type.=" [warning: out of float range]";
\r
632 $value = (float)$value;
\r
636 } else if ($flag_args && preg_match( $this->PHP_COMPLEX["type_empty_array"], $code, $regs)) {
\r
638 $value = "array()";
\r
639 $raw_value = $regs[0];
\r
642 } else if (!$flag_args && preg_match( $this->PHP_COMPLEX["type_array"], $code, $regs)) {
\r
644 $value = $this->getValue( $code, array(";" => true));
\r
645 // strpos() is twice as fast as substr()
\r
646 if ( 0 == strpos($value, "array"))
\r
648 $raw_value == $value;
\r
650 } else if (preg_match( $this->PHP_COMPLEX["type_string"], $code, $regs)) {
\r
653 $raw_value = $regs[0];
\r
657 return array($type, $value, $raw_value);
\r
658 } // end func getVariableTypeAndValue
\r
660 } // end class PhpdocParserObject
\r