Map-based autoloader across php and phar resources

⌈⌋ branch:  Canonic Autoloader


Check-in [d97a9cc3f3]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix block {} recursion in regex extractor, capture define() statements, default to RegexPhpIdentifierDeclarations for updating.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:d97a9cc3f34a9a53c26688c14e6031f4f2ce8194
User & Date: mario 2014-09-01 17:09:06
Context
2014-09-01
17:50
disable automatic updates per default again check-in: f54a3bc594 user: mario tags: trunk
17:09
Fix block {} recursion in regex extractor, capture define() statements, default to RegexPhpIdentifierDeclarations for updating. check-in: d97a9cc3f3 user: mario tags: trunk
05:10
Introduce regex matcher for identifier extraction (tokenizer stream traversing is pretty slow). check-in: ea71217287 user: mario tags: trunk
Changes

Changes to update.php.

1
2
3
4
5
6
7
8
9
10
11
12
13
...
328
329
330
331
332
333
334
335

336
337




338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357



358
359

360

361
362


363
364
365
366
367
368
369
370
...
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
...
630
631
632
633
634
635
636
637
638
639
640
<?php
/**
 * api: php
 * description: Classmap builder which reads through directories and phar collections.
 * title: Canonic Classmap
 * version: 0.8.0
 * category: library
 * priority: hide
 * type: library
 * classes: RecursivePharDirIterator, PhpAndPharDirIterator, ExtractPhpIdentifierDeclarations, RegexPhpIdentifierDeclarations, Canonic_Classmap
 * license: Public Domain
 *
 *
................................................................................
    /**
     * Regex all the things.
     *
     */
    public function __construct($source) {

        /**
         * Remove non-code sections (comments and strings actually)

         *
         */




        $source = preg_replace("~
                (?: \A | \?\>) .*? \<\?(?:php|=)+?     # Open+closing PHP token
              | /\* .*? \*/                            # Multiline /* comments */
              | // \V*                                 # Singe line // comment
              | \# \V*                                 # Hash comment
              | \" (?:[^\"\\\\] | \\\\.)* \"           # Double quoted string
              | \' (?:[^\'\\\\] | \\\\.)* \'           # Single quoted string
              | <<<\s* (\w+) .+? ^\\1                  # Heredoc string
              | <<<\s* '(\V+)' .+? ^\\1                # Nowdoc string
            ~smix",
            "", $source
        );

        /** 
         * Match identifiers and skip class block {} structures. (While one could recurse
         * into methods or namespace{} blocks individually, practically only the outermost
         * interface is relevant for the autoloader.)
         *
         */
        preg_match_all("~



              namespace \s+ ([\\w\\x7F-\\xFF\\\\]+) \s* [{;]       # Namespace identifier
            | (?is:class|interface|trait) \s+ ([\\w\\x7F-\\xFF]+)  # Class declaration

              [^\{]*  ((?>\{ [^\{\}]* (?-1)? [^\{\}]* \}))+        # Recursive {...} block skipping

            | function \s+ ([\\w\\x7F-\\xFF]+) \s* \(              # Plain functions
            | const \s+ ([\\w\\x7F-\\xFF]+) \s* =                  # Constants (const= keyword)


           ~ix",
           $source, $this->matches, PREG_SET_ORDER
        );
    }

    
    /**
     * Nested array of identifier strings
................................................................................
        $map = array();

        // loop
        foreach ($files as $path) {

            // extract identifiers from source
            try {
                $add = new ExtractPhpIdentifierDeclarations(file_get_contents("$path"));
            }
            catch (Exception $e) {      //@todo complain about probable syntax errors?
                continue;
            }
            
            // make path relative
            $this->path = $relative ? $this->relative_path("$path", $basedir) : "$path";
................................................................................
        }
        
        // readd phar:// prefix
        return $is_phar ? "phar://" . $path : $path;
    }
    
}



?>





|







 







|
>


>
>
>
>





|
|













>
>
>
|
|
>
|
>
|
<
>
>
|







 







|







 







<



1
2
3
4
5
6
7
8
9
10
11
12
13
...
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371

372
373
374
375
376
377
378
379
380
381
...
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
...
641
642
643
644
645
646
647

648
649
650
<?php
/**
 * api: php
 * description: Classmap builder which reads through directories and phar collections.
 * title: Canonic Classmap
 * version: 0.8.1
 * category: library
 * priority: hide
 * type: library
 * classes: RecursivePharDirIterator, PhpAndPharDirIterator, ExtractPhpIdentifierDeclarations, RegexPhpIdentifierDeclarations, Canonic_Classmap
 * license: Public Domain
 *
 *
................................................................................
    /**
     * Regex all the things.
     *
     */
    public function __construct($source) {

        /**
         * Remove non-code sections (comments and strings actually),
         * but convert define() string into constant literal before.
         *
         */
        $source = preg_replace(
            "~\b define \s*\(\s* ([\"\']) ([\\w\\x7F-\\xFF]+) \\1 \s*, ~ix",
            "const $2 =", $source
        );
        $source = preg_replace("~
                (?: \A | \?\>) .*? \<\?(?:php|=)+?     # Open+closing PHP token
              | /\* .*? \*/                            # Multiline /* comments */
              | // \V*                                 # Singe line // comment
              | \# \V*                                 # Hash comment
              |  \" (?:[^\"\\\\] | \\\\.)* \"          # Double quoted string
              |  \' (?:[^\'\\\\] | \\\\.)* \'          # Single quoted string
              | <<<\s* (\w+) .+? ^\\1                  # Heredoc string
              | <<<\s* '(\V+)' .+? ^\\1                # Nowdoc string
            ~smix",
            "", $source
        );

        /** 
         * Match identifiers and skip class block {} structures. (While one could recurse
         * into methods or namespace{} blocks individually, practically only the outermost
         * interface is relevant for the autoloader.)
         *
         */
        preg_match_all("~
           (?: (?<![\\x7F-\\xFF]) \b )                 # Only match constructs at word breaks
           (?:
              namespace \s+
                  ([\\w\\x7F-\\xFF\\\\]+) \s* [{;]     # Namespace identifier
            | (?is:class|interface|trait) \s+
                  ([\\w\\x7F-\\xFF]+)  [^\{\}]*        # Class declaration
                  ((?>\{ (?: [^\{\}]* | (?-1) )*\}))   # Recursive {...} block skipping
            | function \s+
                  ([\\w\\x7F-\\xFF]+) \s* \(           # Plain functions

            | (?is: const\s+| define\s*\( )
                  ([\\w\\x7F-\\xFF]+) \s* [=,]         # Constants (const/define)
           )~ix",
           $source, $this->matches, PREG_SET_ORDER
        );
    }

    
    /**
     * Nested array of identifier strings
................................................................................
        $map = array();

        // loop
        foreach ($files as $path) {

            // extract identifiers from source
            try {
                $add = new RegexPhpIdentifierDeclarations(file_get_contents("$path"));
            }
            catch (Exception $e) {      //@todo complain about probable syntax errors?
                continue;
            }
            
            // make path relative
            $this->path = $relative ? $this->relative_path("$path", $basedir) : "$path";
................................................................................
        }
        
        // readd phar:// prefix
        return $is_phar ? "phar://" . $path : $path;
    }
    
}



?>