 * api: php
 * description: Classmap builder which reads through directories and phar collections.
 * title: Canonic Classmap
 * version: 0.7.7
 * catgory: library
 * priority: hide
 * type: library
 * classes: RecursivePharDirIterator, PhpAndPharDirIterator, ExtractPhpIdentifierDeclarations, Canonic_Classmap
 * license: Public Domain
 * Classmap generation
 *   → Read through directories and *.phar collections,
 *     starting from containing __DIR__ as base.
 *   → Tokenize each *.php script.
 *   → Search class/interface, function, const declarations.
 *   → Names are lowercased for later case-insensitive
 *     comparison. Which is PHPs actual behaviour.
 *   → Create an identifier -> php script pathname list.
 *     Path and phar:// filenames are kept relative.
 *   → Store into ''
 *     (Update the phar or save in current directory).
 * Tokenizer features
 *   → Multiple class declarations, and subnamespaces per
 *     include are allowed (like PHP, unlike PSR-x).
 *   → Const XY declarations are kept case-sensitive.
 *   → Traits and Interfaces are equivalent to classes.

 * Traverse directories and iterate over .phar collections alike.
class RecursivePharDirIterator extends RecursiveDirectoryIterator {

     * Wrap .phar files in a Phar object for traversal,
     * or let Filesystem/DirectoryIterator handle normal directories.
     * @return DirectoryIterator       Returns a new iterator for traversable dirs/phars.
    public function getChildren() {

        // Files ending in .phar will use the Phar-wrapper (which builds on DirectoryIterator already)
        $name = $this->current();

        if (substr($name, -5) == ".phar") { 

            return new Phar($name, $this->getFlags());
        // Assume regular directory
        else {

            return parent::getChildren();

     * Have the iterator descend on ->getChildren() when
     * encountering .phar files too.
     * @param  boolean $allow_links    Follow symlinks.
     * @return boolean                 Current() element is a subdirectory or virtual/phar dir.
    public function hasChildren($allow_links=false) {
            substr($this->current(), -5) == ".phar";


 * Find all .php files in a given directory, including within .phar packages.
class PhpAndPharDirIterator extends RegexIterator {

     * Returns a flattened iterator, recursively scanning dirs/phars for *.php files.
     * @return Iterator
    public function __construct($dir) {
        $flags = array_sum(array(

            new RecursiveIteratorIterator(
                 // get only leaves, from nested RecursiveDirectory and PharDirIterators
                 new RecursivePharDirIterator($dir, $flags),
                 RecursiveIteratorIterator::LEAVES_ONLY, RecursiveIteratorIterator::CATCH_GET_CHILD
            "/\.php$/"  // filtering courtesy of RegexIterator

 * Surface-parsing to find namespace/class/function declarations.
defined("T_TRAIT") or define("T_TRAIT", -16383);
defined("T_NAMESPACE") or define("T_NAMESPACE", -16382);
defined("T_NS_SEPARATOR") or define("T_NS_SEPARATOR", -16381);

class ExtractPhpIdentifierDeclarations extends ArrayObject {

     * Block-nesting {..} level for differentiation between functions and methods.
     * @var int
    public $level = 0;
    public $classlevel = -2;

     * Identifier strings, grouped by token types.
     * @var array
    public $tnames = array(
        T_NAMESPACE => array(),
        T_CLASS => array(),
        T_TRAIT => array(),
        T_INTERFACE => array(),
        T_FUNCTION => array(),
        T_CONST => array(),

     * Keep source tokens in $this[] array for looping through.
     * @param  string  $source         PHP file source to tokenize.
     * @return ArrayAccess
    public function __construct($source) {
        // tokenize
        // extract identifiers

     * Concatenate following string parts from $this[] token list,
     * jump over whitespacey things.
     * All other token types are considered end markers.
     * @param  array   $want           Tokens to accumulate data for.
     * @param  array   $skip           Whitespacey token types.
     * @return string                  Collected and merged string from token data.
    protected function stringdata($want=array(T_STRING, T_NS_SEPARATOR), $skip=array(T_WHITESPACE, T_COMMENT, T_DOC_COMMENT, T_INLINE_HTML/*Inline HTML is actually not allowed within most declaration blocks, just here to be safe*/)) {

        $value = "";
        while ($t = next($this)) {

            // append data
            if (in_array($t[0], $want)) {
                $value .= $t[1];
            // stop if non-whitespace token encountered
            elseif (!in_array($t[0], $skip)) {

     * Look for T_NAMESPACE, T_CLASS, T_FUNCTION, T_CONST start token,
     * and collect following string parts into identifier lists.
     * Also count code { block } nesting to see if outside of class declaration.
     * @param  array   $want           Tokens to accumulate data for.
     * @param  array   $skip           Whitespacey token types.
     * @return string                  Collected and merged string from token data.
    protected function traverse() {
        // namespace prefix
        $ns = "";

        // Iterate over each source token
        while ($t = next($this)) {
            switch ($t = $t[0]) {

                case T_NAMESPACE:  // Namespaces may occur as prefix declarations, { } block syntax, and multiple of either.
                    $this->tnames[$t][] =
                        $ns = $this->stringdata()
                        $ns = trim($ns, "\\") . "\\";
                case T_INTERFACE:  // No need to differentiate between the three, as SPL-autoloading assumes a single namespace.
                case T_TRAIT:
                case T_CLASS:
                    $this->classlevel = $this->level + 1;
                    $this->tnames[$t][] = $ns . $this->stringdata();

                case T_FUNCTION:
                case T_CONST:   // Skip only exact class block level, inner/deferred function declarations will still be discovered. Anonymous functions are discarded later.
                    if ($this->level != $this->classlevel) {
                        // skip empty strings
                        if ($name = $this->stringdata()) {
                            $this->tnames[$t][] = $ns . $name;
                case T_DOLLAR_OPEN_CURLY_BRACES:
                case T_CURLY_OPEN:   // Also need to handle ${ and {$ in double quoted string context, because } closing curlies are also seen later.
                case "{":

                case "}":
                    --$this->level;  // Reset classlevel as soon as we step above it. - Nested/deferred class declarations are rare.
                    if ($this->level < $this->classlevel) {
                        $this->classlevel = -2;

        // check for {} block nesting mismatch
        if ($this->level != 0) {
            throw new RangeException("Code nesting level {} is off", $this->level);

     * Return grouped identifiers, after merging with namespace prefix.
     * Does not normalize strings yet (upper/lowercase left as-is).
     * @return array                   Grouped lists of identifiers.
    public function identifiers() {
        // From token ids into named groups
                "class" => array_unique(array_merge(
                "function" => array_filter(   // Filter empty strings (though anonymous functions should already be absent here).
                "const" => $this->tnames[T_CONST]
                        // $this->tnames[T_DEFINE]  not yet handled


 * Classmap generation and storage.
 * This is a plain utility class, with built-in defaults for where to write the classmap
 * to. It expects to reside in a phar:, but alternatively resorts to a neighbored output
 * file.
 * The containing directory is scanned per default.
class Canonic_Classmap {

     * Accumulated identifier map.
    public $idmap = array();

     * Read subdirectories, build classmap, save it.
     * @param  string  $store_fn       Location of to write to.
     * @param  string  $dirs           Directories to read from for map generation. Only the first one will have pathname relativized.
     * @param  string  $fingerprint    Previous classmap may contain a fingerprint in ["fp"] from filenames/timestamps, to avoid rescanning.
     * @return array                   The collected classmap is returned as well.
    public static function update($store_fn, $dirs=array(), $fingerprint=NULL) {
        $cm = new self();

        // Single directory can be fingerprinted
        if (count($dirs) == 1) {
            $cm->idmap["fp"] = $cm->fingerprint(new PhpAndPharDirIterator($dirs[0]));
            // Do not reprocess if filename/timestamps fp matches.
            if ($fingerprint == $cm->idmap["fp"]) {
        // Read through dirs
        foreach ($dirs as $i => $dir) {
            $cm->process_dir($dir, !$i); /*only first dir is treated as $relative*/

        // Save classmap
        // Map may be updated automatically on autoloader misses, to retry; so return it.
        return $cm->idmap;

     * Nothing to initialize.
    public function __construct() {

     * Augment identifier map from reading directory/phars.
     * Could be run on multiple locations. But files not below the autoloader basedir should probably retain absolute paths.
    public function process_dir($dir, $relative=TRUE) {
        // List of *.php files
        $files = new PhpAndPharDirIterator($dir);

        // Tokenize and map class/func names onto filenames.
        $this->idmap = array_merge_recursive($this->idmap, $this->map($files, $dir, $relative));

    function fingerprint($files, $fingerprint="") {
        foreach ($files as $f) if (basename($f) != "") {
            $fingerprint .= $f->getMTime() . "=" . $f->getSize() . ",";
        return md5($fingerprint);

     * Write to phar:// or separate
    public function write($store_fn) {

        $src = "<?php\n/**\n * description: autoloader classmap/funcmap\n * last-modified: ".gmdate("c")."\n *\n */\n\n return "
             . var_export($this->idmap, TRUE) . ";\n\n?>";
        // Open phar basename
        if (!strncmp($store_fn, "phar://", 7)) {
                          // open without phar:// prefix
            $phar = new Phar(dirname(substr($store_fn, 7)));
            // Update contained map
            if ($phar->isWritable()) {
               $phar[basename($store_fn)] = $src;
            else {
                trigger_error("Canonic_Classmap::update() Cannot write to phar '$store_fn'. Adapt phar.readonly=0 in your php.ini", E_USER_WARNING);

        // Store in auxiliary file
        else {
            file_put_contents($store_fn, $src);

     * Extract identifiers from files, make class/func/const grouped id->file lists.
     * @param Iterator $files          List of *.php file paths to analyze.
     * @param  string  $basedir        Base directory, to normalize paths.
     * @param  boolean $relative       Have paths/phars be relative to base directory.
     * @return array                   Collected class/func->filename map.
    public function map($files, $basedir, $relative=TRUE) {
        $map = array();

        // loop
        foreach ($files as $path) {

            // extract identifiers from source
            try {
                $add = new ExtractPhpIdentifierDeclarations(file_get_contents("$path"));
            catch (Exception $e) {
            //@todo: make relative path
            $this->path = $relative ? $this->relative_path("$path", $basedir) : "$path";

            // convert into identifier->pathname list
            $id_to_fn = array_map(array($this, "arrayflip_fillpath"), $add->identifiers());

            // add to groups (class,function,const)    
            $map = array_merge_recursive($map, $id_to_fn);
        // find duplicate definitions
        $map = array_map(array($this, "remove_duplicates"), $map);

        // lowercase identifiers 
        $map["class"] = array_change_key_case($map["class"]);
        $map["function"] = array_change_key_case($map["function"]);
        return $map;

     * Keep only first filename string if declarations were found in multiple scripts.
     * @param  array   $array          Array of strings or subarrays - from which only to retain first string then.
     * @return array                   Compacted array of strings.
    public function remove_duplicates($array) {
        foreach (array_filter($array, "is_array") as $id=>$fn) {
            trigger_error("Canonic_Classmap: Duplicate declaration for '$id' found (in " . implode(", ", $fn) . ")", E_USER_NOTICE);
            $array[$id] = reset($fn);
        return $array;

     * Flips array of (id1,id2,id3) lists into (id1=>path, id2=>path, id3=>path)
     * as taken from temporary variable.
     * This is a workaround for easier array_map() usage, due to class/const/function subarrays.
     * @param  array   $list           List of identifiers to flip into keys.
     * @return array                   Map.
    public function arrayflip_fillpath($list) {
        return count($list) ? array_combine($list, array_fill(0, count($list), $this->path)) : array();
     * @var    string  $path           Value to populate all entries of current array list with.
    public $path = "";    

     * Remove basedir from absolute directory/phar path.
    public function relative_path($path, $basedir) {

        // remove phar:// prefix
        if ($is_phar = !strncmp($path, "phar://", 7)) {
            $path = substr($path, 7);

        // add trailing slash to basedir
        $basedir = rtrim($basedir, "/") . "/";
        // remove basedir
        if (!strncmp($path, $basedir, strlen($basedir))) {
            $path = substr($path, strlen($basedir));
        // readd phar:// prefix
        return $is_phar ? "phar://" . $path : $path;
