PHP utility collection with hybrid and fluent APIs.

⌈⌋ branch:  hybrid7 libraries


Check-in [acded28fdd]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix p_stderr callback and RX_DIRECTIVES alternative list. Change to weird variable name $¶ instead of $state (maybe rename flags to lvl,act,end later).
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:acded28fddc4c3f05fc5c5ec795cf96f5d52aba9
User & Date: mario 2015-02-17 14:32:41
Context
2015-02-17
19:05
Externalize some common macros into `defaults.ph`. (Bundle into separate repository..) Add #pragma(interpolate=regex) substitution names ("erb" and "phpp") and optional delimiters via #pragma(interpolate=regex ##( )##). Start tracking parsed line numbers. Safeguard #include against most commented-out PHP include(); statements. Shortcut find_include for absolute paths. Join tokenizer macro searching and replacement for basic+complex macros, inject [T_FUNC_C, $repl, 0] instead of literal into token stream. Add MacroExpression shortcuts for "0" and "1" literals. Fix expr->run operand order for GT/LT/E comparisions. check-in: 3a12ba3743 user: mario tags: trunk
14:32
Fix p_stderr callback and RX_DIRECTIVES alternative list. Change to weird variable name $¶ instead of $state (maybe rename flags to lvl,act,end later). check-in: acded28fdd user: mario tags: trunk
07:09
Implement tokenizer and regex constant/macro replacement (incomplete). Fix condition block nesting level tracking. Rename main func to `on()`. Implement #include searching and omitting .h output. Change collected macro definitions to contain pre-parsed param names. check-in: bcf53d2741 user: mario tags: trunk
Changes

Changes to macro.php.

1
2
3
4
5

6

7
8
9
10
11
12
13
14
15
16
..
36
37
38
39
40
41
42


43
44
45
46
47
48
49
...
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
...
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
...
172
173
174
175
176
177
178
179
180
181
182
183
184
185



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
...
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
...
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
...
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
...
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
<?php
/**
 * api: php
 * title: PHP Macro Preprocessor
 * description: Context-sensitive C-style #define/macro directive processing.

 * version: 0.1.0

 * type: rewrite
 * category: macro
 * pack: phrep.php, macro.php
 * 
 * Preprocessor to apply C-style #define, #ifdef, #include statements. Intended
 * as build helper and complex macro injector. It's language-specific to PHP,
 * but can consume C header #defines and most preprocess.py comment directives.
 *
 * Preprocessing works in two phases:
 *
................................................................................
use \stdClass, \ArrayAccess, \SplStack,
    \Exception, \OutOfBoundsException;
#endif


#define MAX(a,b) ( (a>b)?(a):(b) )
$test = MAX(1+2, 2+3);




/**
 * 
 *
 */
class MacroProcessor {
................................................................................
       "IFSET@" => [["expr"], '{ return "(isset($expr) ? $expr : NULL)"; }'],
    ];
    

    // Directive splitting
    const RX_DIRECTIVES = "{
      ^ (?: \h? (?:\#|//|/*) \h{1,2} )?                 # preprocess.py prefixes
      \# (include|define|undef|pragma|#stderr           # directives
                ifn?def|if|elif|else|endif|pragma)
        ( \V*  (\\\\ \R \V+)*  (?<!\\\\)\R )            # multi-line expressions
    }mx";


    /**
     * Initialize further pragmas, runtime #defines, and #include search paths.
     *
................................................................................


    /**
     * Output PHP warnings during preprocessing
     *
     */
    function stderr(/*int*/$errno, /*string*/$msg, $file=NULL, $line=NULL, $context=[]) {
        fwrite(STDERR, "$msg\n");
    }
    
    
    /**
     * Main invocation method. Reads and transform source file,
     * returns processed output. Does not write to target itself.
     *
................................................................................
     * Regex-splits the source code, traverses it block-wise.
     * Handles #define, #if, #ifdef, #ifelse, #endif statements,
     * traverses into #include files, and honors #pragma overrides.
     *
     */
    function block(/*string*/ $src, $out="") {

        // State/nesting tracking
        $state = new ConditionState();    // counts nesting level, ->true and ->done states
    
        // Split source into blocks of ["#directive", "args expr", "content"]
        $src = preg_split(self::RX_DIRECTIVES, $src, -1, PREG_SPLIT_DELIM_CAPTURE);
        $src = array_merge(["pragma", "(start=1)"], $src);
        $src = array_chunk($src, 3);



        
        // Run over each #directive + content block
        while ($block = array_shift($src)) {
            $directive = $block[0];
            $args = trim($block[1]);

            // #directives
#print "#$directive $args / t=$state->true s=$state->done\n";
            switch ($directive) {

                case "if": 
                case "ifdef": 
                case "ifndef":
                    if ($state->true) {
                        if ($directive == "if") {
                            $args = MacroExpression::strval($args);
                        }
                        else {
                            $args = isset($this->defines[$args]) ^ ($directive == "ifndef");
                        }
                        $state->level = +1;
                        $state->done = $state->true = intval($args);
                    }
                    else {
                        $state->level = +1;
                    }
                    break;

                case "else":
                    $args = 1;
                case "elif":
                    if ($state->done) {
                        $state->true = 0;
                    }
                    elseif ($state->true = MacroExpression::intval($args)) {
                        $state->done = 1;
                    }
                    break;
                
                case "endif":
                    $state->level = -1;
                    break;

                case "macro": $directive = "define";
                case "define":
                case "undef":
                case "include":
                case "stderr":
                case "pragma":
                    if ($state->true) {
                        $out .= $this->{"d_$directive"}($args);
                    }
                    break;
            }
            
            echo "#state ", json_encode(array_reverse(iterator_to_array($state))), "\n";

            // add to output?
            if ($state->true && $this->pragma["output"]) {
                $out .= $block[2];
            }
        }
        return $out;
    }

    /**
................................................................................
    }

    /**
     * Just prints out a message
     *
     */
    function d_stderr($name) {
        $this->stderr[0, $name);
    }

    /**
     * Sets processing options.
     *   → #pragma(output=1)
     *   → #pragma(target=bld/output_fn.php)
     *   → #pragma(interpolate=erb)
................................................................................
                    $arg = [];
                    continue;
                }
            }
            if ($level >= 1) {
               $arg[] = $t;
            }
#print "search(,) #$i $t[0]\n";
        }
        if ($arg) {   // remaining tokens
            $params[] = $arg;
        }
        return [$params, array_map([$this, "token_join"], $params), $len];
    }

................................................................................
        // complex
        if (is_int(strpos($name, "@"))) {
        }
        
        // basic
        else {
            foreach ($keys as $i=>$k) {
#print "MACROKEY $i -> $k\n";
                if (is_int(strpos($k, ".."))) {
                    $k = trim($k, ".");
                    $body = preg_replace("/\b\Q$k\E\b/", var_export(array_slice($args, $i), TRUE), $body);
                }
                else {
                    $body = preg_replace("/\b\Q$k\E\b/", $args[$i], $body);
                }
................................................................................


    /**
     * Tokenize expression,  into a list of [type, value] pairs.
     *
     */
    function expr($src) {
#print "EXPR_SRC := \"$src\"\n";
        
        // Regex returns a list of optional [0]=>, [type]=>, [1]=>
        preg_match_all(self::RX_EXPR, $src, $tokens, PREG_SET_ORDER);
        $tokens = array_map(function($t) {
            return array_slice($t, -2, 1); // turn into [type=>value]* list
        }, $tokens);
        





>
|
>


|







 







>
>







 







|
|







 







|







 







<
<
<




>
>
>







|





|






|
|


|






|
|

|
|




|








|





|


|







 







|







 







|







 







|







 







|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
..
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
...
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
...
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
...
176
177
178
179
180
181
182



183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
...
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
...
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
...
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
...
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
<?php
/**
 * api: php
 * title: PHP Macro Preprocessor
 * description: Context-sensitive C-style #define/macro directive processing.
 * license: MITL
 * version: 0.1.3
 * depends: php (>= 5.4.0)
 * type: rewrite
 * category: macro
 * pack: phrep.php=.phar/stub.php, macro.php, *.ph, README*
 * 
 * Preprocessor to apply C-style #define, #ifdef, #include statements. Intended
 * as build helper and complex macro injector. It's language-specific to PHP,
 * but can consume C header #defines and most preprocess.py comment directives.
 *
 * Preprocessing works in two phases:
 *
................................................................................
use \stdClass, \ArrayAccess, \SplStack,
    \Exception, \OutOfBoundsException;
#endif


#define MAX(a,b) ( (a>b)?(a):(b) )
$test = MAX(1+2, 2+3);

#stderr Manual warning message


/**
 * 
 *
 */
class MacroProcessor {
................................................................................
       "IFSET@" => [["expr"], '{ return "(isset($expr) ? $expr : NULL)"; }'],
    ];
    

    // Directive splitting
    const RX_DIRECTIVES = "{
      ^ (?: \h? (?:\#|//|/*) \h{1,2} )?                 # preprocess.py prefixes
      \# (include|define|undef|pragma|stderr            # directives
         |ifn?def|if|elif|else|endif|pragma)
        ( \V*  (\\\\ \R \V+)*  (?<!\\\\)\R )            # multi-line expressions
    }mx";


    /**
     * Initialize further pragmas, runtime #defines, and #include search paths.
     *
................................................................................


    /**
     * Output PHP warnings during preprocessing
     *
     */
    function stderr(/*int*/$errno, /*string*/$msg, $file=NULL, $line=NULL, $context=[]) {
        fwrite(STDERR, "[$errno] $msg (processing {$this->pragma['file']}:{$this->pragma['line']}) in $file:$line)\n");
    }
    
    
    /**
     * Main invocation method. Reads and transform source file,
     * returns processed output. Does not write to target itself.
     *
................................................................................
     * Regex-splits the source code, traverses it block-wise.
     * Handles #define, #if, #ifdef, #ifelse, #endif statements,
     * traverses into #include files, and honors #pragma overrides.
     *
     */
    function block(/*string*/ $src, $out="") {




        // Split source into blocks of ["#directive", "args expr", "content"]
        $src = preg_split(self::RX_DIRECTIVES, $src, -1, PREG_SPLIT_DELIM_CAPTURE);
        $src = array_merge(["pragma", "(start=1)"], $src);
        $src = array_chunk($src, 3);

        // Condition block nesting levels, and active/done state tracking
        $¶ = new ConditionState();
        
        // Run over each #directive + content block
        while ($block = array_shift($src)) {
            $directive = $block[0];
            $args = trim($block[1]);

            // #directives
#           print "#$directive $args / t=$¶->true s=$¶->done\n";
            switch ($directive) {

                case "if": 
                case "ifdef": 
                case "ifndef":
                    if ($->true) {
                        if ($directive == "if") {
                            $args = MacroExpression::strval($args);
                        }
                        else {
                            $args = isset($this->defines[$args]) ^ ($directive == "ifndef");
                        }
                        $->level = +1;
                        $->done = $->true = intval($args);
                    }
                    else {
                        $->level = +1;
                    }
                    break;

                case "else":
                    $args = 1;
                case "elif":
                    if ($->done) {
                        $->true = 0;
                    }
                    elseif ($->true = MacroExpression::intval($args)) {
                        $->done = 1;
                    }
                    break;
                
                case "endif":
                    $->level = -1;
                    break;

                case "macro": $directive = "define";
                case "define":
                case "undef":
                case "include":
                case "stderr":
                case "pragma":
                    if ($->true) {
                        $out .= $this->{"d_$directive"}($args);
                    }
                    break;
            }
            
            echo "#state ", json_encode(array_reverse(iterator_to_array($))), "\n";

            // add to output?
            if ($->true && $this->pragma["output"]) {
                $out .= $block[2];
            }
        }
        return $out;
    }

    /**
................................................................................
    }

    /**
     * Just prints out a message
     *
     */
    function d_stderr($name) {
        $this->stderr(0, $name);
    }

    /**
     * Sets processing options.
     *   → #pragma(output=1)
     *   → #pragma(target=bld/output_fn.php)
     *   → #pragma(interpolate=erb)
................................................................................
                    $arg = [];
                    continue;
                }
            }
            if ($level >= 1) {
               $arg[] = $t;
            }
#           print "search(,) #$i $t[0]\n";
        }
        if ($arg) {   // remaining tokens
            $params[] = $arg;
        }
        return [$params, array_map([$this, "token_join"], $params), $len];
    }

................................................................................
        // complex
        if (is_int(strpos($name, "@"))) {
        }
        
        // basic
        else {
            foreach ($keys as $i=>$k) {
#               print "MACROKEY $i -> $k\n";
                if (is_int(strpos($k, ".."))) {
                    $k = trim($k, ".");
                    $body = preg_replace("/\b\Q$k\E\b/", var_export(array_slice($args, $i), TRUE), $body);
                }
                else {
                    $body = preg_replace("/\b\Q$k\E\b/", $args[$i], $body);
                }
................................................................................


    /**
     * Tokenize expression,  into a list of [type, value] pairs.
     *
     */
    function expr($src) {
#       print "EXPR_SRC := \"$src\"\n";
        
        // Regex returns a list of optional [0]=>, [type]=>, [1]=>
        preg_match_all(self::RX_EXPR, $src, $tokens, PREG_SET_ORDER);
        $tokens = array_map(function($t) {
            return array_slice($t, -2, 1); // turn into [type=>value]* list
        }, $tokens);