<?php
/**
* type: cli
* title: class map in .phar
* description: Adds a `map` field to phar meta data, containing identifierβfn maps
*
* Small tool to augment existing library phars with a classmap usable
* by Canonic_Autoloader::addPhar()
*
* Syntax:
*
* pharmap vnd-pkg.phar
*
*
* (This isn't meant to update the shared.phar-internal autoload.map.php list.)
*
*/
// open Phar
$p = new Phar($_SERVER["argv"][1]);
$meta = $p->getMetadata();
$meta["map"] = array("class"=>array(), "function"=>array(), "const"=>array());
map_phar($p, $meta);
$p->setMetadata($meta);
unset($p);
/**
* Traverse Phar entries and augment Phar meta class/function/const `map`,
* which lists identifiers as: ns\vnd\name => internal-filename.php simply.
*
* For existing Phars we can now utilize the recursive dir iterator and
* offsetGet.
*
*/
function map_phar($p, &$meta) {
foreach (new RecursiveIteratorIterator($p) as $fn) {
// filter and normalize filenames to phar-local paths
if (!preg_match("~^(?!.*/tests?/).+\.php$~", $fn)) {
continue;
}
$int_fn = preg_replace("~^phar://.+?\.phar/*~", "", $fn);
// read file using phar:// wrapper, because PharFileInfo/FileObject truncates fread() to the compressed size
$src = file_get_contents($fn);
// generate and add identifier list
$def = new RegexPhpIdentifierDeclarations($src);
foreach ($def->identifiers() as $type=>$list) {
foreach ($list as $id) {
$meta["map"][$type][$type == "const" ? $id : strtolower($id)] = $int_fn;
}
}
}
}
/**
* Shallow regex-lexing to uncover namespace/class/function identifiers.
*
* By relying on keyword context and a bit of block-level skipping, this still
* uncovers correctly nested and deferred declaration constructs. Plain function
* injections within methods however are overlooked. Dynamic declarations within
* strings are ignored due to non-code being stripped beforehand.
*
* This approach doesn't assert any nesting/syntax correctness; as implementing
* it per recursive subroutines wouldn't provide anything like a parse tree via
* PCREs interface / and else inverted the speed advantage here.
*
*/
class RegexPhpIdentifierDeclarations {
/**
* Regex all the things.
*
*/
public function __construct($source) {
/**
* Remove non-code sections (comments and strings actually),
* but convert define() string into constant literal before.
*
*/
$source = preg_replace(
"~\b define \s*\(\s* ([\"\']) ([\\w\\x7F-\\xFF]+) \\1 \s*, ~ix",
"const $2 =", $source
);
$source = preg_replace("~
(?: \A | \?\>) .*? \<\?(?:php|=)+? # Open+closing PHP token
| /\* .*? \*/ # Multiline /* comments */
| // \V* # Singe line // comment
| \# \V* # Hash comment
| \" (?:[^\"\\\\] | \\\\.)* \" # Double quoted string
| \' (?:[^\'\\\\] | \\\\.)* \' # Single quoted string
| <<<\s* (\w+) .+? ^\\1 # Heredoc string
| <<<\s* '(\V+)' .+? ^\\1 # Nowdoc string
~smix",
"", $source
);
/**
* Match identifiers and skip class block {} structures. (While one could recurse
* into methods or namespace{} blocks individually, practically only the outermost
* interface is relevant for the autoloader.)
*
*/
preg_match_all("~
(?: (?<![\\x7F-\\xFF]) \b ) # Only match constructs at word breaks
(?:
namespace \s+
([\\w\\x7F-\\xFF\\\\]+) \s* [{;] # Namespace identifier
| (?is:class|interface|trait) \s+
([\\w\\x7F-\\xFF]+) [^\{\}]* # Class declaration
((?>\{ (?: [^\{\}]* | (?-1) )*\})) # Recursive {...} block skipping
| function \s+
([\\w\\x7F-\\xFF]+) \s* \( # Plain functions
| (?is: const\s+| define\s*\( )
([\\w\\x7F-\\xFF]+) \s* [=,] # Constants (const/define)
)~ix",
$source, $this->matches, PREG_SET_ORDER
);
}
/**
* Nested array of identifier strings
* β Namespaces in [1]
* β Classes in [2]
* β Function names in [4]
* β Constants in [5]
*
*/
var $matches = array();
/**
* Join matched namespace and construct strings into our beloved named identifier groups.
*
*/
public function identifiers() {
// Result list, and current $ns namespace
$r = array(
"class" => array(),
"function" => array(),
"const" => array(),
);
$ns = "";
/**
* Check match group for entries.
* Probe in order of likelihood, at least one will be there. And since identifiers
* with leading zeros are invalid, the plain truthy test is preferrable to strlen.
*
*/
foreach ($this->matches as $name) {
if ($name[1]) {
$ns = $name[1] . "\\";
}
elseif ($name[2]) {
$r["class"][] = $ns . $name[2];
}
elseif ($name[4]) {
$r["function"][] = $ns . $name[4];
}
elseif ($name[5]) {
$r["const"][] = $ns . $name[5];
}
}
return $r;
}
}