3 * Defines all regular expressions.
\r
5 * This class defines all regular expressions. To make the
\r
6 * configuration and customization of PHPDoc as simple as
\r
7 * possible I decided to define all regular expressions in one class.
\r
8 * From a programming point of view there's no need to do so.
\r
10 * @version $Id: PhpdocParserRegExp.php,v 1.4 2000/12/03 22:37:37 uw Exp $
\r
12 class PhpdocParserRegExp extends PhpdocObject {
\r
15 * Array of phpdoc tags, indexed by the tagname.
\r
17 * ... grepping information is really not a parser. Don't
\r
18 * change the order the tags are listed. If you introduce
\r
19 * new tags write the long variant of the tagname (parameter)
\r
20 * in front of the shortcut (param).
\r
22 * @var array List of all PHPDoc documentation tags.
\r
24 var $PHPDOC_TAGS = array(
\r
25 "@parameter" => '@param[eter] (object objectname|type) [$varname] [description]',
\r
26 "@param" => '@param[eter] (object objectname|type) [$varname] [description]',
\r
28 "@return" => '@return (object objectname|type) [$varname] [description]',
\r
30 "@access" => '@access',
\r
31 "@abstract" => '@abstract',
\r
32 "@static" => '@static',
\r
33 "@final" => '@final',
\r
35 "@throws" => '@throws exception [, exception]',
\r
37 "@see" => '@see (function()|$varname|(module|class)(function()|$varname)) [, (funtion()|$varname|(module|class)(function()|$varname))]',
\r
38 "@link" => '@link URL [description]',
\r
40 "@var" => '@var (object objectname|type) [$varname]',
\r
41 "@global" => '@global (object objectname|type) $varname [description]',
\r
43 "@constant" => '@const[ant] label [description]',
\r
44 "@const" => '@const[ant] label [description]',
\r
46 "@author" => '@author Name [<email>] [, Name [<email>]',
\r
47 "@copyright" => '@copyright description',
\r
49 "@version" => '@version label',
\r
50 "@since" => '@since label',
\r
52 "@deprecated" => '@deprec[ated] description',
\r
53 "@deprec" => '@deprec[ated] description',
\r
55 "@brother" => '@(brother|sister) (function()|$varname)',
\r
56 "@sister" => '@(brother|sister) (function()|$varname)',
\r
58 "@include" => '@include description',
\r
60 "@exclude" => '@exclude label',
\r
62 "@modulegroup" => '@modulegroup label',
\r
63 "@module" => '@module label',
\r
65 "@package" => '@package label',
\r
67 "@magic" => '@magic description',
\r
68 "@todo" => '@todo description'
\r
72 * Basis regular expressions used to compose complex expressions to grep doc comments.
\r
74 * PHPDoc tries to compose all complex regular expressions
\r
75 * from a list of basic ones. This array contains all expressions
\r
76 * used grep complex doc comments and the surrounding keywords.
\r
78 * @var array List of basic regular expressions matching parts of doc comments:
\r
79 * module names, module separator, vartypes, accesstypes.
\r
81 * @see buildComplexRegExps(), $C_COMPLEX
\r
83 var $C_BASE = array(
\r
84 #"block" => '/\*\*((?:(?!\*).)*(?:\n(?!\s*\*/)\s*\*(?:(?!\*/).)*)*)\*/',
\r
85 "module" => "[^\s]+",
\r
86 "module_separator" => "::",
\r
87 "module_tags" => "(@modulegroup|@module)",
\r
89 "vartype" => "(string|integer|int|long|real|double|float|boolean|bool|mixed|array|object)",
\r
90 "access" => "(private|public)"
\r
94 * List of regular expressions used to grep complex doc comments.
\r
96 * As with $PHP_COMPLEX all complex expressions are build using basic
\r
97 * ones in buildComplexRegExps().
\r
99 * @var array Regular expressions matching see and optional objectnames.
\r
101 * @see buildComplexRegexps(), $C_BASE
\r
103 var $C_COMPLEX = array(
\r
104 "objectname_optional" => "",
\r
107 "see_function" => "",
\r
108 "see_moduleclass" => "",
\r
110 "module_doc" => "",
\r
111 "module_tags" => "",
\r
112 "module_separator" => "",
\r
113 "module_separator_len" => 0,
\r
114 "module_separator_len_neg" => 0
\r
119 * Basic RegExps used to analyse PHP Code.
\r
121 * PHPDoc tries to compose all complex regular expressions
\r
122 * from some basic expressions. This array contains
\r
123 * all expressions used to build $PHP_COMPLEX.
\r
124 * There're some differences to the RegExps in zend-scanner.l,
\r
125 * e.g. I decided to write "\s+" instead of "[ \n\r\t]+" which
\r
126 * should be identical as long as perl compatible regular
\r
127 * expressions are used. Another point is that I did not break
\r
128 * down numbers to LNUM/DNUM.
\r
130 * @var array List of basis regular expressions matching php code elements:
\r
131 * spaces, optional spaces, linebreaks, labels, use (include and friends),
\r
132 * optional argument assignment, boolean, several variable types.
\r
134 * @see $PHP_COMPLEX
\r
136 var $PHP_BASE = array (
\r
139 "space_optional" => "\s*",
\r
140 "break" => "[\n\r]",
\r
142 "php_open_long" => "<\?php\s", # zend_scanner.l use {WHITESPACE} (space in our case) eighter. Might be slightly faster.
\r
143 "php_open_short" => "<\?",
\r
144 "php_open_asp" => "<%",
\r
145 "php_open_short_print" => "<\?=",
\r
146 "php_open_asp_print" => "<%=",
\r
148 # do not change the single quotes to double ones
\r
149 "label" => '[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\xzf-\xff]*',
\r
150 "use" => "(include_once|include|require_once|require)",
\r
151 "assignment" => "\s*([,=])\s*",
\r
153 "boolean" => "(true|false)",
\r
155 "string" => "[^\s]+",
\r
156 "string_enclosed" => "(['\"])(?:\\\\\\1|[^\\1])*?\\1",
\r
158 "int_oct" => "[+-]?\s*0[0-7]+",
\r
159 "int_hex" => "[+-]?\s*0[xX][0-9A-Fa-f]+",
\r
161 "float" => "[+-]?\s*\d*\.\d+",
\r
162 "float_exponent" => "[+-]?\s*\d*(?:\.\d+)*[eE][+-]?\d+",
\r
164 "number" => "[+-]?\s*\d+",
\r
166 "array" => "array\s*\(",
\r
167 "empty_array" => "array\s*\(\s*\)\s*"
\r
171 * List of regular expressions used to grep complex php code elements.
\r
173 * The RegExp of the variable types is slightly changed to that
\r
174 * one in $PHP_BASE, getVariableTypeAndValue() needs this.
\r
175 * "undoc_*" is used to grep all keywords those who have a doc
\r
176 * comment in front and those without. See getPhodocParagraphs()
\r
177 * for more details on this.
\r
179 * @var array RegExps to match: variablenames, functionnames, classnames,
\r
180 * class variable declarations, function declarations,
\r
181 * class declarations, defines, uses (include and friends),
\r
182 * function arguments, several variables types.
\r
183 * @see buildComplexRegExps(), getVariableTypeAndValue(), getPhpdocParagraphs(), $PHP_BASE
\r
185 var $PHP_COMPLEX = array (
\r
187 "functionname" => "",
\r
190 "php_open_script" => "",
\r
196 "undoc_function" => "",
\r
199 "undoc_class" => "",
\r
201 "class_extends" => "",
\r
202 "undoc_class_extends" => "",
\r
205 "undoc_const" => "",
\r
212 "type_boolean" => "",
\r
214 "type_string" => "",
\r
215 "type_string_enclosed" => "",
\r
217 "type_int_oct" => "",
\r
218 "type_int_hex" => "",
\r
220 "type_float" => "",
\r
221 "type_float_exponent" => "",
\r
223 "type_number" => "",
\r
225 "type_array" => "",
\r
226 "type_empty_array" => ""
\r
230 * Array of RegExp matching the syntax of several complex tags.
\r
232 * The array is filled by the constructor.
\r
234 * @var array Used to analyse return, var, param,
\r
235 * global, see and to find tags in general
\r
236 * @see PhpdocParserObject()
\r
238 var $TAGS = array (
\r
240 "var" => "", # @var, @param
\r
244 "module" => "", # @module, @modulegroup
\r
246 "const" => "", # @const, @constant
\r
248 "see_var" => "", # @see
\r
249 "see_function" => "", # @see
\r
250 "see_class" => "", # @see
\r
251 "see_module" => "", # @see
\r
253 "link" => "@([^\s]+)(.*)@is", # @link
\r
257 "author" => "<\s*([a-z]([-a-z0-9_.])*@([-a-z0-9_]*\.)+[a-z]{2,})\s*>", # @author <email> part
\r
259 "all" => "" # list of all known tags
\r
263 * Builds complex regular expressions for the parser.
\r
265 * PHPDoc has a small set of basic regular expressions. All complex
\r
266 * regular expressions are made out of the basic ones. The composition
\r
267 * in done in this method. Note: every derived class must
\r
268 * call this method in it's constructor!
\r
269 * @see $PHP_BASE, $PHP_COMPLEX, $C_BASE, $C_COMPLEX
\r
271 function buildComplexRegExps() {
\r
274 // Do not change the order of the variable initializations there're dependencies.
\r
275 // It starts with some php names.
\r
279 $this->PHP_COMPLEX["varname"] = sprintf("[&]?[$]%s", $this->PHP_BASE["label"] );
\r
280 $this->PHP_COMPLEX["functionname"] = sprintf("[&]?%s", $this->PHP_BASE["label"] );
\r
281 $this->PHP_COMPLEX["classname"] = $this->PHP_BASE["label"];
\r
284 // Now build all regexps used to grep doc comment elements.
\r
287 // optional object name
\r
288 $this->C_COMPLEX["objectname_optional"] = sprintf("(?:object%s%s)?",
\r
289 $this->PHP_BASE["space"],
\r
290 $this->PHP_COMPLEX["classname"]
\r
293 $this->C_COMPLEX["module_separator"] = sprintf("(?:%s)", $this->C_BASE["module_separator"]);
\r
294 $this->C_COMPLEX["module_separator_len"] = strlen($this->C_BASE["module_separator"]);
\r
295 $this->C_COMPLEX["module_separator_len_neg"] = -1*strlen($this->C_BASE["module_separator"]);
\r
297 // References to other elements
\r
298 $this->C_COMPLEX["see_var"] = sprintf("(%s%s)?([$][^:]%s)",
\r
299 $this->C_BASE["module"],
\r
300 $this->C_COMPLEX["module_separator"],
\r
301 $this->PHP_BASE["label"]
\r
304 $this->C_COMPLEX["see_function"] = sprintf("(%s%s)?([^:]%s\(%s\))",
\r
305 $this->C_BASE["module"],
\r
306 $this->C_COMPLEX["module_separator"],
\r
307 $this->PHP_BASE["label"],
\r
308 $this->PHP_BASE["space_optional"]
\r
311 $this->C_COMPLEX["see_moduleclass"] = sprintf("(%s)", $this->C_BASE["module"] );
\r
314 // RegExps used to grep certain php code elements.
\r
318 $this->PHP_COMPLEX["var"] = sprintf("|^%svar%s([$]%s)%s(=?)|is",
\r
319 $this->PHP_BASE["space_optional"],
\r
320 $this->PHP_BASE["space"],
\r
321 $this->PHP_BASE["label"],
\r
322 $this->PHP_BASE["space_optional"],
\r
323 $this->PHP_BASE["space_optional"]
\r
325 $this->PHP_COMPLEX["undoc_var"] = sprintf("|%s|isS", substr($this->PHP_COMPLEX["var"], 2, -3) );
\r
327 // function statements
\r
328 $this->PHP_COMPLEX["function"] = sprintf("|^%sfunction%s(%s)%s\(|is",
\r
329 $this->PHP_BASE["space_optional"],
\r
330 $this->PHP_BASE["space"],
\r
331 $this->PHP_COMPLEX["functionname"],
\r
332 $this->PHP_BASE["space_optional"]
\r
334 $this->PHP_COMPLEX["undoc_function"] = sprintf("|%s|isS", substr($this->PHP_COMPLEX["function"], 2, -3) );
\r
336 // class statements
\r
337 $this->PHP_COMPLEX["class"] = sprintf("|^%sclass%s(%s)%s{|is",
\r
338 $this->PHP_BASE["space_optional"],
\r
339 $this->PHP_BASE["space"],
\r
340 $this->PHP_COMPLEX["classname"],
\r
341 $this->PHP_BASE["space_optional"]
\r
343 $this->PHP_COMPLEX["undoc_class"] = sprintf("|%s|isS", substr($this->PHP_COMPLEX["class"], 2, -3) );
\r
345 $this->PHP_COMPLEX["class_extends"] = sprintf("|^%sclass%s(%s)%sextends%s(%s)%s{|is",
\r
346 $this->PHP_BASE["space_optional"],
\r
347 $this->PHP_BASE["space"],
\r
348 $this->PHP_COMPLEX["classname"],
\r
349 $this->PHP_BASE["space"],
\r
350 $this->PHP_BASE["space"],
\r
351 $this->PHP_COMPLEX["classname"],
\r
352 $this->PHP_BASE["space_optional"]
\r
354 $this->PHP_COMPLEX["undoc_class_extends"] = sprintf("|%s|isS", substr($this->PHP_COMPLEX["class_extends"], 2, -3) );
\r
357 // RegExp used to grep define statements.
\r
358 // NOTE: the backticks do not allow the usage of $this->PHP_BASE
\r
360 $this->PHP_COMPLEX["const"] = sprintf("@^%sdefine%s\(%s(%s)%s,%s(%s)%s(?:,%s(%s))?%s\)%s;@is",
\r
361 $this->PHP_BASE["space_optional"],
\r
362 $this->PHP_BASE["space_optional"],
\r
363 $this->PHP_BASE["space_optional"],
\r
364 "[$]?\w[\w-_]*|(['\"])(?:\\\\\\2|[^\\2])*?\\2",
\r
365 $this->PHP_BASE["space_optional"],
\r
366 $this->PHP_BASE["space_optional"],
\r
367 "(['\"])(?:\\\\\\4|[^\\4])*?\\4|(?:true|false)|[+-]?\s*0[0-7]+|[+-]?\s*0[xX][0-9A-Fa-f]+|[+-]?\s*\d*(?:\.\d+)*[eE][+-]?\d+|[+-]?\s*\d*\.\d+|[+-]?\s*\d+|&?[$]?\w[\w-_]*",
\r
368 $this->PHP_BASE["space_optional"],
\r
369 $this->PHP_BASE["space_optional"],
\r
370 "(?:true|false)|[+-]?\s*0[0-7]+|[+-]?\s*0[xX][0-9A-Fa-f]+|[+-]?\s*\d*(?:\.\d+)*[eE][+-]?\d+|[+-]?\s*\d*\.\d+|[+-]?\s*\d+|&?[$]?\w[\w-_]*|(['])(?:\\\\\\6|[^\\6])*?\\6",
\r
371 $this->PHP_BASE["space_optional"],
\r
372 $this->PHP_BASE["space_optional"]
\r
374 $this->PHP_COMPLEX["undoc_const"] = sprintf("@%s@isS", substr($this->PHP_COMPLEX["const"], 2, -3) );
\r
377 // include, include_once, require, require_once and friends
\r
380 $this->PHP_COMPLEX["use"] = sprintf("@^%s%s[\(]%s((['\"])((?:\\\\\\3|[^\\3])*?)\\3|([^\s]+))%s[\)]%s;@is",
\r
381 $this->PHP_BASE["use"],
\r
382 $this->PHP_BASE["space_optional"],
\r
383 $this->PHP_BASE["space_optional"],
\r
384 $this->PHP_BASE["space_optional"],
\r
385 $this->PHP_BASE["space_optional"]
\r
387 $this->PHP_COMPLEX["undoc_use"] = sprintf("@%s@isS", substr($this->PHP_COMPLEX["use"], 2, -3) );
\r
390 // Variable name with an optional assignment operator. This one is used
\r
391 // to analyse function heads [parameter lists] as well as class variable
\r
394 $this->PHP_COMPLEX["argument"] = sprintf("|(%s)(%s)?|s",
\r
395 $this->PHP_COMPLEX["varname"],
\r
396 $this->PHP_BASE["assignment"]
\r
401 // <script language="php"> syntax
\r
403 $this->PHP_COMPLEX["php_open_script"] = sprintf("<script%slanguage%s=%s[\"']php[\"']%s>",
\r
404 $this->PHP_BASE["space"],
\r
405 $this->PHP_BASE["space_optional"],
\r
406 $this->PHP_BASE["space_optional"],
\r
407 $this->PHP_BASE["space_optional"]
\r
410 $this->PHP_COMPLEX["php_open_all"] = sprintf("(?:%s|%s|%s|%s|%s|%s)",
\r
411 $this->PHP_BASE["php_open_long"],
\r
412 $this->PHP_BASE["php_open_short"],
\r
413 $this->PHP_BASE["php_open_asp"],
\r
414 $this->PHP_BASE["php_open_short_print"],
\r
415 $this->PHP_BASE["php_open_asp_print"],
\r
416 $this->PHP_COMPLEX["php_open_script"]
\r
419 $this->C_COMPLEX["module_doc"] = sprintf("@^%s%s%s/\*\*@is",
\r
420 $this->PHP_BASE["space_optional"],
\r
421 $this->PHP_COMPLEX["php_open_all"],
\r
422 $this->PHP_BASE["space_optional"]
\r
425 $this->C_COMPLEX["module_tags"] = sprintf("/%s/is", $this->C_BASE["module_tags"] );
\r
428 // RegExp used to grep variables types
\r
430 $elements = array(
\r
431 "boolean", "string", "string_enclosed",
\r
432 "int_oct", "int_hex", "float", "float_exponent",
\r
433 "number", "array", "empty_array"
\r
436 while (list($key, $name)=each($elements))
\r
437 $this->PHP_COMPLEX["type_".$name] = sprintf("@^%s@", $this->PHP_BASE[$name]);
\r
440 // Regular expressions used to analyse phpdoc tags.
\r
442 $this->TAGS["var"] = sprintf("/%s(?:%s(%s))?(?:%s(%s))?%s(.*)?/is",
\r
443 $this->C_BASE["vartype"],
\r
444 $this->PHP_BASE["space"],
\r
445 $this->PHP_BASE["label"],
\r
446 $this->PHP_BASE["space"],
\r
447 $this->PHP_COMPLEX["varname"],
\r
448 $this->PHP_BASE["space_optional"]
\r
450 $this->TAGS["return"] = $this->TAGS["var"];
\r
452 $this->TAGS["global"] = sprintf("/%s%s(%s)%s(%s)%s(.*)/is",
\r
453 $this->C_BASE["vartype"],
\r
454 $this->PHP_BASE["space_optional"],
\r
455 $this->C_COMPLEX["objectname_optional"],
\r
456 $this->PHP_BASE["space"],
\r
457 $this->PHP_COMPLEX["varname"],
\r
458 $this->PHP_BASE["space_optional"]
\r
461 $this->TAGS["brother"] = sprintf("/(%s\(\)|\$%s)/is",
\r
462 $this->PHP_BASE["label"],
\r
463 $this->PHP_BASE["label"]
\r
466 $this->TAGS["const"] = sprintf("/(%s)%s(.*)?/is",
\r
467 $this->PHP_BASE["label"],
\r
468 $this->PHP_BASE["space_optional"]
\r
471 $this->TAGS["access"] = sprintf("/%s/is", $this->C_BASE["access"]);
\r
472 $this->TAGS["module"] = sprintf("/%s/is", $this->PHP_BASE["label"]);
\r
474 $this->TAGS["author"] = sprintf("/%s/is", $this->TAGS["author"]);
\r
477 reset($this->PHPDOC_TAGS);
\r
478 while (list($tag, $v)=each($this->PHPDOC_TAGS))
\r
479 $all_tags.= substr($tag, 1)."|";
\r
480 $all_tags = substr($all_tags, 0, -1);
\r
482 $this->TAGS["all"] = "/@($all_tags)/is";
\r
484 $elements = array ( "see_function", "see_var", "see_moduleclass" );
\r
486 while (list($k, $index)=each($elements))
\r
487 $this->TAGS[$index] = sprintf("/%s/is", $this->C_COMPLEX[$index]);
\r
489 } // end func buildComplexRegExps
\r
491 } // end class PhpdocParserRegExp
\r