+++ /dev/null
-<?php\r
-/**\r
-* Provides basic parser functions.\r
-*\r
-* Provides basic parser functions to extract doc comments, analyse tags and variable\r
-* declarations.\r
-*\r
-* @version $Id: PhpdocParserCore.php,v 1.3 2000/12/03 22:37:37 uw Exp $\r
-*/\r
-class PhpdocParserCore extends PhpdocParserTags {\r
- \r
- /**\r
- * Scans code for documented and undocumented phpdoc keywords (classes, functions, class variables, uses, constants).\r
- *\r
- * This method is somewhat the heart of the phpdoc parser. It takes a string of \r
- * phpcode and extracts all classes, functions, class variables, uses (include and friends), \r
- * and constants (define) from it. Extract does not mean that the whole class or another element\r
- * gets extracted. It does not take the code from the class definition and it's opening \r
- * curly brace to the closing one. PHPDoc just extracts the class definition itself and \r
- * if available a trailing doc comment. This has some drawbacks: phpdoc can't handle \r
- * files that contain more than one class it wouldn't know which method/class variable belongs to \r
- * a certain class. It's possible to provide a workaround but phpdoc would slow down dramatically.\r
- * As PHPDoc does not have a real parser but does a simple grep using a bunch of regular expressions\r
- * there're indeed more limitations. Nevertheless I doubt that you'll have problems with "normal" code.\r
- *\r
- * The search algorithm looks pretty strange but belive me it's fast. I have tried several other ways\r
- * (really complex regexps >500 chars, preg_match_all + looking backwards for comments, ...) but none was\r
- * faster. This one takes 13s on my machine to scan the current (14/08/2000) code (7130 lines), the \r
- * big RegExp way took more than 5 Minutes, the preg_match_all + looking backwards 52s.\r
- *\r
- * @param string PHP code to scan.\r
- * @param mixed String of one keyword or array of keywords not to scan for. Known keywords are:\r
- * "classes", "functions", "variables", "uses", "consts".\r
- * @return array Hash of phpdoc elements found, indexed by "variables", "functions", "classes", "consts", "uses".\r
- * @see $PHP_BASE, $PHP_COMPLEX, $C_BASE, $C_COMPLEX, extractPhpdoc(), getModuleDoc()\r
- */ \r
- function getPhpdocParagraphs($phpcode, $keywords="none") {\r
-\r
- // what are we not looking for? \r
- if ( !is_array($keywords) ) {\r
- if ("none" == $keywords) \r
- $keywords = array ();\r
- else\r
- $keywords = array ( $keywords => true );\r
- }\r
- \r
- $start = 0;\r
- $paragraphs = array(\r
- "classes" => array(),\r
- "functions" => array(),\r
- "variables" => array(),\r
- "consts" => array(),\r
- "uses" => array(),\r
- "modules" => array()\r
- );\r
-\r
-\r
- // remember the documented elements to be able to compare with the list of all elements \r
- $variables = array();\r
- $functions = array();\r
- $variables = array();\r
- $constants = array();\r
- $uses = array();\r
- \r
- //\r
- // Module docs are somewhat more difficult to grep. Always\r
- // use this function.\r
- //\r
- if (!isset($keywords["modules"]))\r
- list($paragraphs["modules"], $phpcode) = $this->getModuleDoc($phpcode);\r
- else\r
- list( , $phpcode) = $this->getModuleDoc($phpcode);\r
- \r
- //\r
- // Find documented elements\r
- //\r
-\r
- while (true) {\r
- \r
- $start = strpos($phpcode, "/**", $start);\r
- if (0==(int)$start && "integer" != gettype($start) ) \r
- break;\r
-\r
- $end = strpos($phpcode, "*/", $start);\r
- $remaining = trim(substr($phpcode, $end+2));\r
- \r
- if ( !isset($keywords["classes"]) && preg_match($this->PHP_COMPLEX["class"], $remaining, $regs) || preg_match($this->PHP_COMPLEX["class_extends"], $remaining, $regs)) {\r
- \r
- $paragraphs["classes"][] = array(\r
- "name" => $regs[1],\r
- "extends" => (isset($regs[2])) ? $regs[2] : "",\r
- "doc" => $this->extractPhpdoc(substr($phpcode, $start+3, ($end-$start)-2))\r
- );\r
- $classes[$regs[1]] = true; \r
- \r
- } else if ( !isset($keywords["functions"]) && preg_match($this->PHP_COMPLEX["function"], $remaining, $regs)) {\r
-\r
- $head = substr($remaining, strpos($remaining, $regs[0])+strlen($regs[0]));\r
- $head = substr( trim($this->getValue($head, array( "{" => true) )), 0, -1);\r
- $paragraphs["functions"][] = array(\r
- "name" => $regs[1],\r
- "doc" => $this->extractPhpdoc( substr($phpcode, $start+3, ($end-$start)-2) ),\r
- "head" => $head\r
- );\r
- $functions[$regs[1]] = true; \r
- \r
- } else if ( !isset($keywords["variables"]) && preg_match($this->PHP_COMPLEX["var"], $remaining, $regs)) {\r
-\r
- if ("=" == $regs[2]) \r
- $value = trim($this->getValue( substr($remaining, strpos($remaining, $regs[0])+strlen($regs[0]) ), array( ";" => true)));\r
- else\r
- $value = ""; \r
- \r
- $paragraphs["variables"][] = array(\r
- "name" => $regs[1],\r
- "value" => $value,\r
- "doc" => $this->extractPhpdoc(substr($phpcode, $start+3, ($end-$start)-2))\r
- );\r
- $variables[$regs[1]] = true;\r
- \r
- } else if ( !isset($keywords["consts"]) && preg_match($this->PHP_COMPLEX["const"], $remaining, $regs) ) {\r
- \r
- $name = (""!=$regs[2]) ? substr($regs[1], 1, -1) : $regs[1];\r
- \r
- if (isset($regs[5])) {\r
- if ($regs[5])\r
- $case = "case insensitive, userdefined: '$regs[5]'";\r
- else\r
- $case = "case sensitive, userdefined: '$regs[5]'";\r
- } else {\r
- $case = "default: case sensitive";\r
- }\r
- \r
- $paragraphs["consts"][] = array(\r
- "name" => $name,\r
- "value" => (""!=$regs[4]) ? substr($regs[3], 1, -1) : $regs[3],\r
- "case" => $case,\r
- "doc" => $this->extractPhpdoc(substr($phpcode, $start+3, ($end-$start)-2))\r
- );\r
- $constants[$name] = true;\r
- \r
- } else if ( !isset($keywords["uses"]) && preg_match($this->PHP_COMPLEX["use"], $remaining, $regs)) {\r
-\r
- $filename = isset($regs[5]) ? $regs[5] : $regs[4];\r
- $paragraphs["uses"][] = array(\r
- "type" => $regs[1],\r
- "file" => $filename,\r
- "doc" => $this->extractPhpdoc(substr($phpcode, $start+3, ($end-$start)-2))\r
- );\r
- $uses[$filename] = true; \r
- \r
- } \r
- \r
- $start++;\r
- } \r
-\r
- //\r
- // Find undocumented elements\r
- //\r
- if (!isset($keywords["classes"])) {\r
- \r
- preg_match_all($this->PHP_COMPLEX["undoc_class"], $phpcode, $regs, PREG_SET_ORDER);\r
- reset($regs);\r
- while (list($k, $data)=each($regs))\r
- if (!isset($classes[$data[1]]))\r
- $paragraphs["classes"][] = array(\r
- "name" => $data[1],\r
- "extends" => "",\r
- "doc" => ""\r
- );\r
-\r
- preg_match_all($this->PHP_COMPLEX["undoc_class_extends"], $phpcode, $regs, PREG_SET_ORDER);\r
- reset($regs);\r
- while (list($k, $data)=each($regs))\r
- if (!isset($classes[$data[1]]))\r
- $paragraphs["classes"][] = array(\r
- "name" => $data[1],\r
- "extends" => $data[2],\r
- "doc" => ""\r
- ); \r
- \r
- }\r
-\r
- if (!isset($keywords["functions"])) {\r
- \r
- preg_match_all($this->PHP_COMPLEX["undoc_function"], $phpcode, $regs, PREG_SET_ORDER);\r
- reset($regs);\r
- while (list($k, $data)=each($regs)) \r
- if (!isset($functions[$data[1]])) {\r
- \r
- $head = substr($phpcode, strpos($phpcode, $data[0])+strlen($data[0]));\r
- $head = substr( trim( $this->getValue($head, array( "{" => true) )), 0, -1);\r
- $paragraphs["functions"][] = array(\r
- "name" => $data[1],\r
- "doc" => "",\r
- "head" => $head\r
- );\r
- }\r
- \r
- }\r
- \r
-\r
- if (!isset($keywords["variables"])) {\r
-\r
- preg_match_all($this->PHP_COMPLEX["undoc_var"], $phpcode, $regs, PREG_SET_ORDER);\r
- reset($regs);\r
- while (list($k, $data)=each($regs)) \r
- if (!isset($variables[$data[1]])) {\r
- \r
- if ("=" == $data[2])\r
- $value = trim($this->getValue( substr($phpcode, strpos($phpcode, $data[0])+strlen($data[0]) ), array( ";" => true)));\r
- else \r
- $value = "";\r
- \r
- $paragraphs["variables"][] = array(\r
- "name" => $data[1],\r
- "value" => $value,\r
- "doc" => ""\r
- );\r
- } \r
- }\r
- \r
- if (!isset($keywords["consts"])) {\r
-\r
- preg_match_all($this->PHP_COMPLEX["undoc_const"], $phpcode, $regs, PREG_SET_ORDER);\r
- reset($regs);\r
- while (list($k, $data)=each($regs)) {\r
- \r
- $name = (""!=$data[2]) ? substr($data[1], 1, -1) : $data[1];\r
- if (!isset($constants[$name])) {\r
- \r
- if (isset($data[5])) {\r
- if ($data[5])\r
- $case = "case insensitive, userdefined: '$data[5]'";\r
- else\r
- $case = "case sensitive, userdefined: '$data[5]'";\r
- } else {\r
- $case = "default: case sensitive";\r
- }\r
- \r
- $paragraphs["consts"][] = array(\r
- "name" => $name,\r
- "value" => (""!=$data[4]) ? substr($data[3], 1, -1) : $data[3],\r
- "case" => $case,\r
- "doc" => ""\r
- );\r
- }\r
- }\r
- }\r
- \r
- if (!isset($keywords["uses"])) {\r
-\r
- preg_match_all($this->PHP_COMPLEX["undoc_use"], $phpcode, $regs, PREG_SET_ORDER);\r
-\r
- reset($regs);\r
- while (list($k, $data)=each($regs)) {\r
- \r
- $filename = isset($data[5]) ? $data[5] : $data[4];\r
- if (!isset($uses[$filename])) {\r
- \r
- $paragraphs["uses"][] = array(\r
- "type" => $data[1],\r
- "file" => $filename,\r
- "doc" => ""\r
- );\r
- \r
- }\r
- }\r
- \r
- }\r
-\r
- return $paragraphs;\r
- } // end func getPhpdocParagraphs\r
- \r
- /**\r
- * Does a quick prescan to find modules an classes.\r
- * @param string Code to scan\r
- * @return array Hash of modules and classes found in the given code\r
- * @access public\r
- * @see getPhpdocParagraphs()\r
- */\r
- function getModulesAndClasses($phpcode) {\r
- \r
- $para = array();\r
- list( $para["modules"], $phpdcode) = $this->getModuleDoc($phpcode);\r
- $para["classes"] = $this->getClasses($phpcode);\r
- \r
- return $para;\r
- } // end func getModulesAndClasses\r
-\r
- /**\r
- * Tries to extract a module doc.\r
- * \r
- * The syntax for modules is not final yet. The implementation and meaning of "module" \r
- * might change at every time! Please do not ask for implementation details.\r
- *\r
- * @param string PHP Code to scan\r
- * @return array $module $module[0] = array with module data, \r
- * $module[1] = php code without the leading module doc\r
- */ \r
- function getModuleDoc($phpcode) {\r
- \r
- $module = array();\r
- \r
- if (preg_match($this->C_COMPLEX["module_doc"], $phpcode, $regs) ) {\r
- \r
- $start = strlen($regs[0]);\r
- $end = strpos($phpcode, "*/", $start);\r
- $remaining = substr($phpcode, $end+2);\r
- $doc_comment= substr($phpcode, $start, $end-$start);\r
- \r
- // Do we have OO Code? If not, continue.\r
- if ( !preg_match($this->PHP_COMPLEX["class"], $remaining) && !preg_match($this->PHP_COMPLEX["class_extends"], $remaining) ) {\r
-\r
- // Is there a module tag?\r
- if ( preg_match($this->C_COMPLEX["module_tags"], $doc_comment) ) {\r
- \r
- $doc_comment = $this->extractPhpDoc($doc_comment);\r
- $tags = $this->getTags( $doc_comment);\r
- $allowed = array (\r
- "module" => true,\r
- "modulegroup" => true\r
- \r
- );\r
- $tags = $this->analyseTags( $tags, array(), array( "module" => true, "modulegroup" => true) );\r
- \r
- $module = array (\r
- "doc" => $doc_comment,\r
- "status" => "ok",\r
- "name" => (isset($tags["module"])) ? $tags["module"] : "",\r
- "group" => (isset($tags["modulegroup"])) ? $tags["modulegroup"] : ""\r
- );\r
- \r
- } else {\r
- \r
- // No module tag. \r
- // Try the remaining keywords. If one matches it's not a module doc \r
- // assume that the module doc is missing. If none matches assume that\r
- // it's a module doc which lacks the module tags.\r
- if ( preg_match($this->PHP_COMPLEX["function"], $remaining) ||\r
- preg_match($this->PHP_COMPLEX["use"], $remaining) ||\r
- preg_match($this->PHP_COMPLEX["const"], $remaining) ||\r
- preg_match($this->PHP_COMPLEX["var"], $remaining) \r
- ) {\r
-\r
- $module = array(\r
- "doc" => "",\r
- "status" => "missing",\r
- "name" => "",\r
- "group" => ""\r
- ); \r
- $remaining = $phpcode;\r
- \r
- } else {\r
-\r
- $module = array (\r
- "doc" => $doc_comment,\r
- "status" => "tags missing",\r
- "name" => "",\r
- "group" => ""\r
- ); \r
- \r
- }\r
- \r
- } // end if module_tags\r
- \r
- } else {\r
- \r
- $remaining = $phpcode;\r
- \r
- } // end if class\r
- \r
- } else {\r
- \r
- $remaining = $phpcode;\r
- \r
- }\r
- \r
- return array($module, $remaining);\r
- } // end func getModuleDoc\r
- \r
- /**\r
- * Returns a list of classes found in the given code.\r
- *\r
- * In early versions PHPdoc parsed all the code at once which restulted in huge\r
- * memory intensive hashes. Now it scans for classes, builds a classtree and \r
- * does the parsing step by step, writing information to the destination \r
- * (renderer, exporter) as soon as possible. This reduces the memory consumption \r
- * dramatically. getPhpdocParagraphs() could be used to extract the class definitions\r
- * as well but this specialized function is somewhat faster.\r
- *\r
- * @param string PHP code to scan.\r
- * @return array $classes Array of classes found in the code. $classes[classname] = extends\r
- */\r
- function getClasses($phpcode) {\r
- \r
- $classes = array();\r
- \r
- preg_match_all($this->PHP_COMPLEX["undoc_class"], $phpcode, $regs, PREG_SET_ORDER);\r
- reset($regs);\r
- while (list($k, $data)=each($regs))\r
- $classes[] = array(\r
- "name" => $data[1],\r
- "extends" => ""\r
- );\r
- \r
- preg_match_all($this->PHP_COMPLEX["undoc_class_extends"], $phpcode, $regs, PREG_SET_ORDER);\r
- reset($regs);\r
- while (list($k, $data)=each($regs)) \r
- $classes[] = array(\r
- "name" => $data[1],\r
- "extends" => $data[2]\r
- );\r
- \r
- return $classes;\r
- } // end func getClasses\r
- \r
- /**\r
- * Strips "/xx", "x/" and x from doc comments (x means asterix).\r
- * @param string Doc comment to clean up.\r
- * @return string $phpdoc\r
- */\r
- function extractPhpdoc($paragraph) {\r
-\r
- $lines = split( $this->PHP_BASE["break"], $paragraph);\r
- $phpdoc = "";\r
-\r
- reset($lines);\r
- while (list($k, $line)=each($lines)) {\r
- \r
- $line = trim($line);\r
- if (""==$line)\r
- continue;\r
- \r
- if ("*" == $line[0])\r
- $phpdoc.= trim(substr($line, 1))."\n";\r
- else \r
- $phpdoc.= $line."\n";\r
- \r
- }\r
- \r
- return substr($phpdoc, 0, -1);\r
- } // end func extractPhpdoc\r
- \r
- /**\r
- * Extract the description from a PHPDoc doc comment.\r
- *\r
- * Every PHPDoc doc comment has the same syntax: /xx[break][x]short description\r
- * [break][[x]multiple line long description[break]][[x]@list of tags[. This function\r
- * returns an array of the short description and long description.\r
- *\r
- * @param string Doc comment to examine.\r
- * @return array $description $description[0] = short description (first line),\r
- * $description[1] = long description (second line upto the first tag)\r
- */\r
- function getDescription($phpdoc) {\r
- \r
- // find the position of the first doc tag\r
- $positions = $this->getTagPos($phpdoc);\r
-\r
- if (0 == count($positions))\r
- $desc = trim($phpdoc); // no doc tags\r
- else\r
- $desc = trim(substr($phpdoc, 0, $positions[0]["pos"])); // strip tags\r
-\r
- $lines = split($this->PHP_BASE["break"], $desc);\r
- \r
- if (1 == count($lines) || "" == $desc) {\r
- \r
- // only a short description but no long description - or even none of both\r
- $description = array ($desc, "");\r
- \r
- } else {\r
- \r
- $sdesc = trim($lines[0]);\r
- unset($lines[0]);\r
- \r
- $description = array ( $sdesc, implode("", $lines) );\r
- \r
- }\r
- \r
- return $description;\r
- } // end func getDescription\r
- \r
- /**\r
- * Scans a code passage for a value.\r
- *\r
- * There some cases where you can hardly use a regex to grep a value\r
- * because the value might contain unescaped charaters that end the value.\r
- * Value means something like "array ( ";", '\;' );" or "'phpdoc; ';" where\r
- * the delimiter would be ";".\r
- *\r
- * @param string The php code to examine.\r
- * @param mixed String of one delimiter or array of delimiters.\r
- * @return string Value found in the code\r
- * @todo Racecondition: comments\r
- */\r
- function getValue($code, $delimiter) {\r
- if (""==$code)\r
- return "";\r
- \r
- if (!is_array($delimiter)) \r
- $delimiter = array( $delimiter => true );\r
- \r
- $code = trim($code);\r
- $len = strlen($code);\r
- $enclosed = false;\r
- $enclosed_by = "";\r
- \r
- if ( isset($delimiter[$code[0]]) ) {\r
- \r
- $i = 1;\r
- \r
- } else {\r
- \r
- for ($i=0; $i<$len; $i++) {\r
- \r
- $char = $code[$i];\r
-\r
- if (('"'==$char || "'"==$char) && ($char == $enclosed_by || ""==$enclosed_by) && (0==$i || ($i>0 && "\\"!=$code[$i-1]))) {\r
- \r
- if (!$enclosed)\r
- $enclosed_by = $char;\r
- else \r
- $enclosed_by = "";\r
- \r
- $enclosed = !$enclosed;\r
- \r
- }\r
- if (!$enclosed && isset($delimiter[$char]))\r
- break; \r
- \r
- }\r
- \r
- }\r
- \r
- return substr($code, 0, $i);\r
- } // end func getValue\r
- \r
- /**\r
- * Analyses a code snipped and returns the type and value of the first variable found.\r
- *\r
- * With version 0.3 PHPDoc tries to analyse variable declarations to find \r
- * type and value. This is used to analyse class variable declarations and \r
- * optional function arguments.\r
- * \r
- * Note that all regular expressions in this function start with "^". That means\r
- * you have to do some preparations to the code snippet you're passing to this\r
- * function.\r
- *\r
- * @param string PHP code to analyse\r
- * @param boolean Flag indicating the "type" of code to analyse. Optional \r
- * function parameters and class variables have a slightly \r
- * different syntax for arrays. By default function parameters\r
- are expected.\r
- * @return array $vartype $vartype[0] = type, $vartype[1] = value, $vartype[2] = raw value\r
- */\r
- function getVariableTypeAndValue($code, $flag_args = true) {\r
- \r
- $type = "unknown";\r
- $value = "unknown";\r
- $raw_value = $code;\r
-\r
- //\r
- // Do not change the order the function tries to find out the type.\r
- //\r
- \r
- if (preg_match( $this->PHP_COMPLEX["type_boolean"], $code, $regs)) {\r
-\r
- $type = "boolean";\r
- $raw_value = $regs[0];\r
- $value = $regs[0];\r
- \r
- } else if (preg_match( $this->PHP_COMPLEX["type_string_enclosed"], $code, $regs)) {\r
-\r
- $type = "string";\r
- $raw_value = $regs[0];\r
- $value = $regs[0];\r
- \r
- } else if (preg_match( $this->PHP_COMPLEX["type_int_oct"], $code, $regs)) {\r
- \r
- $type = "integer (octal)";\r
- $raw_value = $regs[0];\r
- $value = preg_replace("@\s@", "", $regs[0]);\r
- if ( (int)$value != $value )\r
- $type.= " [warning: out of integer range, possible overflow trouble]";\r
- $value = octdec($value)." ($value)";\r
- \r
- \r
- } else if (preg_match( $this->PHP_COMPLEX["type_int_hex"], $code, $regs)) {\r
-\r
- $type = "integer (hexadecimal)";\r
- $raw_value = $regs[0];\r
- $value = preg_replace("@\s@", "", $regs[0]);\r
- if ( (int)$value != $value ) \r
- $type.= " [warning: out of integer range, possible overflow trouble]";\r
- $value = hexdec($value)." ($value)";\r
-\r
- } else if (preg_match( $this->PHP_COMPLEX["type_float_exponent"], $code, $regs)) {\r
- \r
- $type = "float";\r
- $raw_value = $regs[0];\r
- $value = (string)preg_replace("@\s@", "", $regs[0]);\r
- if ( (float)$value != $value ) \r
- $type.= " [warning: out of float range]";\r
- $value = (float)$value;\r
- \r
- } else if (preg_match( $this->PHP_COMPLEX["type_float"], $code, $regs)) {\r
-\r
- $type = "float";\r
- $raw_value = $regs[0];\r
- $value = preg_replace("@\s@", "", $regs[0]);\r
- if ( (float)$value != $value ) \r
- $type.= " [warning: out of float range]";\r
- $value = (float)$value;\r
- \r
- } else if (preg_match( $this->PHP_COMPLEX["type_number"], $code, $regs)) {\r
- \r
- $value = preg_replace("@\s@", "", $regs[0]);\r
- $raw_value = $regs[0];\r
- \r
- if ( (int)$value == $value ) {\r
-\r
- $type = "integer";\r
- $value = (int)$value;\r
-\r
- } else {\r
-\r
- $type = "float";\r
- if ( (float)$value != $value )\r
- $type.=" [warning: out of float range]";\r
- $value = (float)$value;\r
-\r
- }\r
- \r
- } else if ($flag_args && preg_match( $this->PHP_COMPLEX["type_empty_array"], $code, $regs)) {\r
- \r
- $value = "array()";\r
- $raw_value = $regs[0];\r
- $type = "array";\r
- \r
- } else if (!$flag_args && preg_match( $this->PHP_COMPLEX["type_array"], $code, $regs)) {\r
- \r
- $value = $this->getValue( $code, array(";" => true));\r
- // strpos() is twice as fast as substr()\r
- if ( 0 == strpos($value, "array")) \r
- $type = "array";\r
- $raw_value == $value;\r
- \r
- } else if (preg_match( $this->PHP_COMPLEX["type_string"], $code, $regs)) {\r
-\r
- $type = "string";\r
- $raw_value = $regs[0];\r
- $value = $regs[0];\r
- } \r
-\r
- return array($type, $value, $raw_value);\r
- } // end func getVariableTypeAndValue\r
- \r
-} // end class PhpdocParserObject\r
-?>
\ No newline at end of file