Map-based autoloader across php and phar resources

βŒˆβŒ‹ βŽ‡ branch:  Canonic Autoloader


Artifact [68d8a95492]

Artifact 68d8a954921d1e66a1799e12fe772801bab80cae:

  • File pharmap.php — part of check-in [369216d430] at 2015-01-22 19:36:24 on branch trunk — Tool to regenerate .phar-internal class/func `map`, which is compatible to the shared.phar/autoload.map.php array structure, but stored as Phar meta data array. (Compatibility to xpm-generated composer deb/rpm/phar bundles.) (user: mario size: 5742)

     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
   100
   101
   102
   103
   104
   105
   106
   107
   108
   109
   110
   111
   112
   113
   114
   115
   116
   117
   118
   119
   120
   121
   122
   123
   124
   125
   126
   127
   128
   129
   130
   131
   132
   133
   134
   135
   136
   137
   138
   139
   140
   141
   142
   143
   144
   145
   146
   147
   148
   149
   150
   151
   152
   153
   154
   155
   156
   157
   158
   159
   160
   161
   162
   163
   164
   165
   166
   167
   168
   169
   170
   171
   172
   173
   174
   175
   176
   177
   178
   179
   180
   181
<?php
/**
 * type: cli
 * title: class map in .phar
 * description: Adds a `map` field to phar meta data, containing identifier→fn maps
 * 
 * Small tool to augment existing library phars with a classmap usable
 * by Canonic_Autoloader::addPhar()
 *
 * Syntax:
 *
 *   pharmap vnd-pkg.phar
 *
 *
 * (This isn't meant to update the shared.phar-internal autoload.map.php list.)
 *
 */


// open Phar
$p = new Phar($_SERVER["argv"][1]);
$meta = $p->getMetadata();
$meta["map"] = array("class"=>array(), "function"=>array(), "const"=>array());
map_phar($p, $meta);
$p->setMetadata($meta);
unset($p);



/**
 * Traverse Phar entries and augment Phar meta class/function/const `map`,
 * which lists identifiers as: ns\vnd\name => internal-filename.php simply.
 *
 * For existing Phars we can now utilize the recursive dir iterator and
 * offsetGet.
 *
 */
function map_phar($p, &$meta) {

    foreach (new RecursiveIteratorIterator($p) as $fn) {

        // filter and normalize filenames to phar-local paths
        if (!preg_match("~^(?!.*/tests?/).+\.php$~", $fn)) {
            continue;
        }
        $int_fn = preg_replace("~^phar://.+?\.phar/*~", "", $fn);

        // read file using phar:// wrapper, because PharFileInfo/FileObject truncates fread() to the compressed size
        $src = file_get_contents($fn);

        // generate and add identifier list
        $def = new RegexPhpIdentifierDeclarations($src);
        foreach ($def->identifiers() as $type=>$list) {
            foreach ($list as $id) {
                $meta["map"][$type][$type == "const" ? $id : strtolower($id)] = $int_fn;
            }
        }
    }
}





/**
 * Shallow regex-lexing to uncover namespace/class/function identifiers.
 *
 * By relying on keyword context and a bit of block-level skipping, this still
 * uncovers correctly nested and deferred declaration constructs. Plain function
 * injections within methods however are overlooked. Dynamic declarations within
 * strings are ignored due to non-code being stripped beforehand.
 *
 * This approach doesn't assert any nesting/syntax correctness; as implementing
 * it per recursive subroutines wouldn't provide anything like a parse tree via
 * PCREs interface / and else inverted the speed advantage here.
 *
 */
class RegexPhpIdentifierDeclarations {

    /**
     * Regex all the things.
     *
     */
    public function __construct($source) {

        /**
         * Remove non-code sections (comments and strings actually),
         * but convert define() string into constant literal before.
         *
         */
        $source = preg_replace(
            "~\b define \s*\(\s* ([\"\']) ([\\w\\x7F-\\xFF]+) \\1 \s*, ~ix",
            "const $2 =", $source
        );
        $source = preg_replace("~
                (?: \A | \?\>) .*? \<\?(?:php|=)+?     # Open+closing PHP token
              | /\* .*? \*/                            # Multiline /* comments */
              | // \V*                                 # Singe line // comment
              | \# \V*                                 # Hash comment
              |  \" (?:[^\"\\\\] | \\\\.)* \"          # Double quoted string
              |  \' (?:[^\'\\\\] | \\\\.)* \'          # Single quoted string
              | <<<\s* (\w+) .+? ^\\1                  # Heredoc string
              | <<<\s* '(\V+)' .+? ^\\1                # Nowdoc string
            ~smix",
            "", $source
        );

        /** 
         * Match identifiers and skip class block {} structures. (While one could recurse
         * into methods or namespace{} blocks individually, practically only the outermost
         * interface is relevant for the autoloader.)
         *
         */
        preg_match_all("~
           (?: (?<![\\x7F-\\xFF]) \b )                 # Only match constructs at word breaks
           (?:
              namespace \s+
                  ([\\w\\x7F-\\xFF\\\\]+) \s* [{;]     # Namespace identifier
            | (?is:class|interface|trait) \s+
                  ([\\w\\x7F-\\xFF]+)  [^\{\}]*        # Class declaration
                  ((?>\{ (?: [^\{\}]* | (?-1) )*\}))   # Recursive {...} block skipping
            | function \s+
                  ([\\w\\x7F-\\xFF]+) \s* \(           # Plain functions
            | (?is: const\s+| define\s*\( )
                  ([\\w\\x7F-\\xFF]+) \s* [=,]         # Constants (const/define)
           )~ix",
           $source, $this->matches, PREG_SET_ORDER
        );
    }

    
    /**
     * Nested array of identifier strings
     *  β†’ Namespaces in [1]
     *  β†’ Classes in [2]
     *  β†’ Function names in [4]
     *  β†’ Constants in [5]
     *
     */
    var $matches = array();


    /**
     * Join matched namespace and construct strings into our beloved named identifier groups.
     *
     */
    public function identifiers() {

        // Result list, and current $ns namespace
        $r = array(
            "class" => array(),
            "function" => array(),
            "const" => array(),
        );
        $ns = "";

        /**
         * Check match group for entries.
         * Probe in order of likelihood, at least one will be there. And since identifiers
         * with leading zeros are invalid, the plain truthy test is preferrable to strlen.
         *
         */
        foreach ($this->matches as $name) {
            if ($name[1]) {
                $ns = $name[1] . "\\";
            }
            elseif ($name[2]) {
                $r["class"][] = $ns . $name[2];
            }
            elseif ($name[4]) {
                $r["function"][] = $ns . $name[4];
            }
            elseif ($name[5]) {
                $r["const"][] = $ns . $name[5];
            }
        }
        return $r;
    }
}