<?php
/**
* api: php
* description: Classmap builder which reads through directories and phar collections.
* title: Canonic Classmap
* version: 0.7.7
* catgory: library
* priority: hide
* type: library
* classes: RecursivePharDirIterator, PhpAndPharDirIterator, ExtractPhpIdentifierDeclarations, Canonic_Classmap
* license: Public Domain
*
*
* Classmap generation
*
* → Read through directories and *.phar collections,
* starting from containing __DIR__ as base.
*
* → Tokenize each *.php script.
*
* → Search class/interface, function, const declarations.
*
* → Names are lowercased for later case-insensitive
* comparison. Which is PHPs actual behaviour.
*
* → Create an identifier -> php script pathname list.
* Path and phar:// filenames are kept relative.
*
* → Store into 'autoload.map.php'
* (Update the phar or save in current directory).
*
*
* Tokenizer features
*
* → Multiple class declarations, and subnamespaces per
* include are allowed (like PHP, unlike PSR-x).
*
* → Const XY declarations are kept case-sensitive.
*
* → Traits and Interfaces are equivalent to classes.
*
*
*/
/**
* Traverse directories and iterate over .phar collections alike.
*
*/
class RecursivePharDirIterator extends RecursiveDirectoryIterator {
/**
* Wrap .phar files in a Phar object for traversal,
* or let Filesystem/DirectoryIterator handle normal directories.
*
* @return DirectoryIterator Returns a new iterator for traversable dirs/phars.
*/
public function getChildren() {
// Files ending in .phar will use the Phar-wrapper (which builds on DirectoryIterator already)
$name = $this->current();
if (substr($name, -5) == ".phar") {
return new Phar($name, $this->getFlags());
}
// Assume regular directory
else {
return parent::getChildren();
}
}
/**
* Have the iterator descend on ->getChildren() when
* encountering .phar files too.
*
* @param boolean $allow_links Follow symlinks.
* @return boolean Current() element is a subdirectory or virtual/phar dir.
*/
public function hasChildren($allow_links=false) {
return
parent::hasChildren($allow_links)
or
substr($this->current(), -5) == ".phar";
}
}
/**
* Find all .php files in a given directory, including within .phar packages.
*
*/
class PhpAndPharDirIterator extends RegexIterator {
/**
* Returns a flattened iterator, recursively scanning dirs/phars for *.php files.
*
* @return Iterator
*/
public function __construct($dir) {
$flags = array_sum(array(
FilesystemIterator::KEY_AS_PATHNAME,
FilesystemIterator::SKIP_DOTS,
FilesystemIterator::UNIX_PATHS,
FilesystemIterator::FOLLOW_SYMLINKS,
));
parent::__construct(
new RecursiveIteratorIterator(
// get only leaves, from nested RecursiveDirectory and PharDirIterators
new RecursivePharDirIterator($dir, $flags),
RecursiveIteratorIterator::LEAVES_ONLY, RecursiveIteratorIterator::CATCH_GET_CHILD
),
"/\.php$/" // filtering courtesy of RegexIterator
);
}
}
/**
* Surface-parsing to find namespace/class/function declarations.
*
*/
defined("T_TRAIT") or define("T_TRAIT", -16383);
defined("T_NAMESPACE") or define("T_NAMESPACE", -16382);
defined("T_NS_SEPARATOR") or define("T_NS_SEPARATOR", -16381);
class ExtractPhpIdentifierDeclarations extends ArrayObject {
/**
* Block-nesting {..} level for differentiation between functions and methods.
*
* @var int
*/
public $level = 0;
public $classlevel = -2;
/**
* Identifier strings, grouped by token types.
*
* @var array
*/
public $tnames = array(
T_NAMESPACE => array(),
T_CLASS => array(),
T_TRAIT => array(),
T_INTERFACE => array(),
T_FUNCTION => array(),
T_CONST => array(),
);
/**
* Keep source tokens in $this[] array for looping through.
*
* @param string $source PHP file source to tokenize.
* @return ArrayAccess
*/
public function __construct($source) {
// tokenize
parent::__construct(token_get_all($source));
// extract identifiers
$this->traverse();
}
/**
* Concatenate following string parts from $this[] token list,
* jump over whitespacey things.
* All other token types are considered end markers.
*
* @param array $want Tokens to accumulate data for.
* @param array $skip Whitespacey token types.
* @return string Collected and merged string from token data.
*/
protected function stringdata($want=array(T_STRING, T_NS_SEPARATOR), $skip=array(T_WHITESPACE, T_COMMENT, T_DOC_COMMENT, T_INLINE_HTML/*Inline HTML is actually not allowed within most declaration blocks, just here to be safe*/)) {
$value = "";
while ($t = next($this)) {
// append data
if (in_array($t[0], $want)) {
$value .= $t[1];
}
// stop if non-whitespace token encountered
elseif (!in_array($t[0], $skip)) {
prev($this);
break;
}
}
return($value);
}
/**
* Look for T_NAMESPACE, T_CLASS, T_FUNCTION, T_CONST start token,
* and collect following string parts into identifier lists.
* Also count code { block } nesting to see if outside of class declaration.
*
* @param array $want Tokens to accumulate data for.
* @param array $skip Whitespacey token types.
* @return string Collected and merged string from token data.
*/
protected function traverse() {
// namespace prefix
$ns = "";
// Iterate over each source token
while ($t = next($this)) {
switch ($t = $t[0]) {
case T_NAMESPACE: // Namespaces may occur as prefix declarations, { } block syntax, and multiple of either.
$this->tnames[$t][] =
$ns = $this->stringdata()
and
$ns = trim($ns, "\\") . "\\";
break;
case T_INTERFACE: // No need to differentiate between the three, as SPL-autoloading assumes a single namespace.
case T_TRAIT:
case T_CLASS:
$this->classlevel = $this->level + 1;
$this->tnames[$t][] = $ns . $this->stringdata();
break;
case T_FUNCTION:
case T_CONST: // Skip only exact class block level, inner/deferred function declarations will still be discovered. Anonymous functions are discarded later.
if ($this->level != $this->classlevel) {
// skip empty strings
if ($name = $this->stringdata()) {
$this->tnames[$t][] = $ns . $name;
}
}
break;
case T_DOLLAR_OPEN_CURLY_BRACES:
case T_CURLY_OPEN: // Also need to handle ${ and {$ in double quoted string context, because } closing curlies are also seen later.
case "{":
++$this->level;
break;
case "}":
--$this->level; // Reset classlevel as soon as we step above it. - Nested/deferred class declarations are rare.
if ($this->level < $this->classlevel) {
$this->classlevel = -2;
}
break;
}
}
// check for {} block nesting mismatch
if ($this->level != 0) {
throw new RangeException("Code nesting level {} is off", $this->level);
}
}
/**
* Return grouped identifiers, after merging with namespace prefix.
* Does not normalize strings yet (upper/lowercase left as-is).
*
* @return array Grouped lists of identifiers.
*/
public function identifiers() {
// From token ids into named groups
return
array(
"class" => array_unique(array_merge(
$this->tnames[T_CLASS],
$this->tnames[T_TRAIT],
$this->tnames[T_INTERFACE]
)),
"function" => array_filter( // Filter empty strings (though anonymous functions should already be absent here).
$this->tnames[T_FUNCTION]
),
"const" => $this->tnames[T_CONST]
// $this->tnames[T_DEFINE] not yet handled
);
}
}
/**
* Classmap generation and storage.
*
* This is a plain utility class, with built-in defaults for where to write the classmap
* to. It expects to reside in a phar:, but alternatively resorts to a neighbored output
* file.
* The containing directory is scanned per default.
*
*/
class Canonic_Classmap {
/**
* Accumulated identifier map.
*
*/
public $idmap = array();
/**
* Read subdirectories, build classmap, save it.
*
* @param string $store_fn Location of autoload.map.php to write to.
* @param string $dirs Directories to read from for map generation. Only the first one will have pathname relativized.
* @param string $fingerprint Previous classmap may contain a fingerprint in ["fp"] from filenames/timestamps, to avoid rescanning.
* @return array The collected classmap is returned as well.
*/
public static function update($store_fn, $dirs=array(), $fingerprint=NULL) {
$cm = new self();
// Single directory can be fingerprinted
if (count($dirs) == 1) {
$cm->idmap["fp"] = $cm->fingerprint(new PhpAndPharDirIterator($dirs[0]));
// Do not reprocess if filename/timestamps fp matches.
if ($fingerprint == $cm->idmap["fp"]) {
return;
}
}
// Read through dirs
foreach ($dirs as $i => $dir) {
$cm->process_dir($dir, !$i); /*only first dir is treated as $relative*/
}
// Save classmap
$cm->write($store_fn);
// Map may be updated automatically on autoloader misses, to retry; so return it.
return $cm->idmap;
}
/**
* Nothing to initialize.
*
*/
public function __construct() {
}
/**
* Augment identifier map from reading directory/phars.
* Could be run on multiple locations. But files not below the autoloader basedir should probably retain absolute paths.
*
*/
public function process_dir($dir, $relative=TRUE) {
// List of *.php files
$files = new PhpAndPharDirIterator($dir);
// Tokenize and map class/func names onto filenames.
$this->idmap = array_merge_recursive($this->idmap, $this->map($files, $dir, $relative));
}
function fingerprint($files, $fingerprint="") {
foreach ($files as $f) if (basename($f) != "autoload.map.php") {
$fingerprint .= $f->getMTime() . "=" . $f->getSize() . ",";
}
return md5($fingerprint);
}
/**
* Write to phar:// or separate autoload.map.php
*
*/
public function write($store_fn) {
$src = "<?php\n/**\n * description: autoloader classmap/funcmap\n * last-modified: ".gmdate("c")."\n *\n */\n\n return "
. var_export($this->idmap, TRUE) . ";\n\n?>";
// Open phar basename
if (!strncmp($store_fn, "phar://", 7)) {
// open without phar:// prefix
$phar = new Phar(dirname(substr($store_fn, 7)));
// Update contained map
if ($phar->isWritable()) {
$phar[basename($store_fn)] = $src;
$phar->compressFiles(Phar::GZ);
}
else {
trigger_error("Canonic_Classmap::update() Cannot write to phar '$store_fn'. Adapt phar.readonly=0 in your php.ini", E_USER_WARNING);
}
}
// Store in auxiliary file
else {
file_put_contents($store_fn, $src);
}
}
/**
* Extract identifiers from files, make class/func/const grouped id->file lists.
*
* @param Iterator $files List of *.php file paths to analyze.
* @param string $basedir Base directory, to normalize paths.
* @param boolean $relative Have paths/phars be relative to base directory.
* @return array Collected class/func->filename map.
*/
public function map($files, $basedir, $relative=TRUE) {
$map = array();
// loop
foreach ($files as $path) {
// extract identifiers from source
try {
$add = new ExtractPhpIdentifierDeclarations(file_get_contents("$path"));
}
catch (Exception $e) {
continue;
}
//@todo: make relative path
$this->path = $relative ? $this->relative_path("$path", $basedir) : "$path";
// convert into identifier->pathname list
$id_to_fn = array_map(array($this, "arrayflip_fillpath"), $add->identifiers());
// add to groups (class,function,const)
$map = array_merge_recursive($map, $id_to_fn);
}
// find duplicate definitions
$map = array_map(array($this, "remove_duplicates"), $map);
// lowercase identifiers
$map["class"] = array_change_key_case($map["class"]);
$map["function"] = array_change_key_case($map["function"]);
return $map;
}
/**
* Keep only first filename string if declarations were found in multiple scripts.
*
* @param array $array Array of strings or subarrays - from which only to retain first string then.
* @return array Compacted array of strings.
*/
public function remove_duplicates($array) {
foreach (array_filter($array, "is_array") as $id=>$fn) {
trigger_error("Canonic_Classmap: Duplicate declaration for '$id' found (in " . implode(", ", $fn) . ")", E_USER_NOTICE);
$array[$id] = reset($fn);
}
return $array;
}
/**
* Flips array of (id1,id2,id3) lists into (id1=>path, id2=>path, id3=>path)
* as taken from temporary variable.
*
* This is a workaround for easier array_map() usage, due to class/const/function subarrays.
*
* @param array $list List of identifiers to flip into keys.
* @return array Map.
*/
public function arrayflip_fillpath($list) {
return count($list) ? array_combine($list, array_fill(0, count($list), $this->path)) : array();
}
/**
* @var string $path Value to populate all entries of current array list with.
*/
public $path = "";
/**
* Remove basedir from absolute directory/phar path.
*
*/
public function relative_path($path, $basedir) {
// remove phar:// prefix
if ($is_phar = !strncmp($path, "phar://", 7)) {
$path = substr($path, 7);
}
// add trailing slash to basedir
$basedir = rtrim($basedir, "/") . "/";
// remove basedir
if (!strncmp($path, $basedir, strlen($basedir))) {
$path = substr($path, strlen($basedir));
}
// readd phar:// prefix
return $is_phar ? "phar://" . $path : $path;
}
}
?>