𝌔 Fossil Hub
Login | Fossil manual |
phptags tag tidier

Check-in [c23de5734a]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add UTF-16/32 detection support, colorization, make regex <?php tag searches case-insensitive.
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:c23de5734afb5ad1cd3befa07d9afdc5f5919ca8
User & Date: mario 2014-11-13 19:43:14
Context
2014-11-13
19:43
remove comment check-in: 9f65e1db7d user: mario tags: trunk
19:43
Add UTF-16/32 detection support, colorization, make regex <?php tag searches case-insensitive. check-in: c23de5734a user: mario tags: trunk
19:42
Now using `fpm` (with exe.rb/makesfx.exe) instead of `epm` for packaging. check-in: 2129130770 user: mario tags: trunk
Changes

Changes to phptags.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
...
136
137
138
139
140
141
142









143
144
145
146
147
148
149
...
150
151
152
153
154
155
156
157

158

159
160
161
162
163
164
165
...
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
...
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
...
462
463
464
465
466
467
468


469

470
471
472
473
474
475
476
...
477
478
479
480
481
482
483
484
485
486
487


488
489
490
491
492
493
494
#!/usr/bin/php -qCdshort_open_tag=1 
<?php
/**
 * api: cli
 * type: application
 * title: PHP tag tidier
 * description: Rewrites PHP scripts' short/long open tags, close tags, apply whitespace fixes
 * version: 1.1
 * license: Public Domain
 * author: mario <mario#include-once:org>
 * category: utilities
 * config: <file type="array" value="$HOME/.config/php/phptags.php" title="configuration defaults file" description="an ordinary return(array(...)); script to set interna options like regex=>1 or verbose=>1" />
 * url: http://freshcode.club/projects/phptags
 * 
 * Simple command-line tool to rewrite PHP <?php open tags into
................................................................................
// were any files specified?
elseif (!$action->files) {
    print "No files or directories specified.\n";
}

// do something, do something!
elseif ($action->long || $action->short || $action->shortall || $action->close || $action->unclosed || $action->white || $action->warn || $action->dry) {









    foreach (files($action->files, $action->recursive) as $I=>$fn) {

        // read in file
        $src = file_get_contents($fn);
        $chksum = md5($src);
        $action->debug and print("$fn: reading [$chksum]\n");

................................................................................

        // whitespace warnings
        if ($action->warn) {
            preg_test("/\?\>([\s\pZ\\0]+)\z/", $src, "TRAILING whitespace");

            # /\\xEF\\xBB\\xBF/ == /\x{FEFF}/u  - But the dependency on a valid UTF-8 encoding can damage binaryish files
            preg_test("/^\\xEF\\xBB\\xBF[\s\pZ]*<\?(php|\W)/i", $src, "UTF-8 BOM before <?php")
            or preg_test("/^\\xEF\\xBB\\xBF/", $src, "UTF-8 BOM alone");



            preg_test("/^[\s\\0]+<\?(php|\W)/i", $src, "Whitespace BEFORE <?php")
            or preg_test("/^[\pZ\s\\0]+<\?(php|\W)/i", $src, "Unicode whitespace BEFORE <?php");
 
            // Consecutive PHP tags which *currently* do not output whitespace
            if ($action->verbose and preg_match("/^\<\?/", $src))
            preg_test("/\?\>(?!\\n<)\s+<\?/", $src, "Consecutive open+close tags with spacing (template?)")
            or preg_test("/\?\>\\r?\\n\<\?/", $src, "Consecutive open+close tags with harmless linebreak");
................................................................................
            // just probe for opening PHP <script> tag
            preg_test("/\<(?:script|server) \s+ language \s*=\s* ([\"\']?) PHP (\\1) \s*\>/ix", $src, "Long <script language=PHP> tags detected");
        }


        // Tokenize source for easier processing.
        if ($action->token && !$action->regex) {
            // only usable (for this purpose) if php-cli.ini hasn't short open tags disabled
            ini_get("short_open_tag") or exit("TokenizerCannotBeUsedAsShortTagsAreStillDisabled");;
            defined("T_OPEN_TAG_WITH_ECHO") or exit("OhGodNoTokenizerIsNotAvailable");;
            /*
               The long T_OPEN_TAG always includes some space "<?php\s",
               but consecutive/others are split into a separate token.
               While the short versions are followed by a distinct
               T_WHITESPACE or maybe completely absent.
               Therefore multiple alternatives are required for each test.
            */
................................................................................
                    "Convert <?␣ into long <?php␣ tag"
                );
            }


            // convert long into short tags
            if ($action->short || $action->shortall) {
                preg_modify( //retain leading newline          // use trailing linebreak            // no linebreak, no output spacing
                     array("/\<\?php(\R\s*)(echo|print)(\s)/", "/\<\?php(\s+)(echo|print)(\R\s*)/", "/\<\?php(\s+)(echo|print)(\s)/"),
                     array("\<\?=$1",                          "\<\?=$3",                           "\<\?="),
                     $src,
                     "Long <?php␣echo to short <?="
                );
                preg_modify( // look for single-line <?php ... ? > occurences, those should always be shortened
                    "/ (?<!^)       # not at the file begin
                       \<\?php      # opening \<\?php
                       (\s+.+\?\>)  # space, filler, closing \?\>
                    /x",
                    "\<\?$1",
                    $src,
                    "Single line <?php...?> into short tag"
                );
            }

            // even the initial open tag and longer sections
            if ($action->shortall) {
                preg_modify( // look for mixed syntax '<? echo', strip any initial spacing type
                    "/\<\?(\s*)(echo|print)\b/", "\<\?=",
                    $src,
                    "Mixed <?␣echo to short <?="
                );
                preg_modify(
                    // keep space character after <?php, do not allow any non-space separate alternative expressions (e.g. <?php( or <?php/ would actually be short tag plus php() function call or an expression const/division).
                    "/\<\?php(\R|\s)/", "\<\?$1",
                    $src,
                    "Any <?php␣ into short tag"
                );
            }
        }


................................................................................

/**
 * Colorize control + non-printable characters and/or replace with C-string escapes.
 *
 */
function nonprint_visualize($str) {
    return preg_replace_callback(


        "~\\xEF\\xBB\\xBF|[^\w-.,;:#*+´`\'\"!§$%&/()={}?<>|]~",

        "nonprint_visualize_rx_cb",
        $str
    );
}
// match callback
function nonprint_visualize_rx_cb($m) {
    global $action;
................................................................................
    // map
    static $map = array();
    if ($action->color) {
        $map = array(
            "\r" => "\x1b[32m\\r\x1b[39m",
            "\n" => "\x1b[31m\\n\x1b[39m",
            "\t" => "\x1b[36m\\t\x1b[39m",
            " "  => "\x1b[1;30m␣\x1b[0;39m",
            "\\0" => "\x1b[41;1;33m\\0\x1b[0;39m",
            "\\"  => "\x1b[35m\\\\\x1b[39m",
            "\xEF\xBB\xBF" => "\x1b[4;1;36m\\xEF\\xBB\\xBF\x1b[0;39m",


        );
    }
    // substitute if in colorization $map
    return isset($map[$m[0]])
         ? $map[$m[0]]  // color replacement for common escapes & chars
         : '\\x' . strtoupper(current(unpack('H*', $m[0])));  // binary
}







|







 







>
>
>
>
>
>
>
>
>







 







|
>

>







 







<
<
<







 







|
|
|







|









|





|







 







>
>
|
>







 







|
|


>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
...
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
...
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
...
224
225
226
227
228
229
230



231
232
233
234
235
236
237
...
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
...
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
...
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
#!/usr/bin/php -qCdshort_open_tag=1 
<?php
/**
 * api: cli
 * type: application
 * title: PHP tag tidier
 * description: Rewrites PHP scripts' short/long open tags, close tags, apply whitespace fixes
 * version: 1.2
 * license: Public Domain
 * author: mario <mario#include-once:org>
 * category: utilities
 * config: <file type="array" value="$HOME/.config/php/phptags.php" title="configuration defaults file" description="an ordinary return(array(...)); script to set interna options like regex=>1 or verbose=>1" />
 * url: http://freshcode.club/projects/phptags
 * 
 * Simple command-line tool to rewrite PHP <?php open tags into
................................................................................
// were any files specified?
elseif (!$action->files) {
    print "No files or directories specified.\n";
}

// do something, do something!
elseif ($action->long || $action->short || $action->shortall || $action->close || $action->unclosed || $action->white || $action->warn || $action->dry) {

    // check options for availability
    // tokenizer is only usable if php-cli.ini hasn't short open tags disabled
    if ($action->token) {
        ini_get("short_open_tag") or exit("TokenizerCannotBeUsedAsShortTagsAreStillDisabled");;
        defined("T_OPEN_TAG_WITH_ECHO") or exit("OhGodNoTokenizerIsNotAvailable");;
    }

    // traverse files
    foreach (files($action->files, $action->recursive) as $I=>$fn) {

        // read in file
        $src = file_get_contents($fn);
        $chksum = md5($src);
        $action->debug and print("$fn: reading [$chksum]\n");

................................................................................

        // whitespace warnings
        if ($action->warn) {
            preg_test("/\?\>([\s\pZ\\0]+)\z/", $src, "TRAILING whitespace");

            # /\\xEF\\xBB\\xBF/ == /\x{FEFF}/u  - But the dependency on a valid UTF-8 encoding can damage binaryish files
            preg_test("/^\\xEF\\xBB\\xBF[\s\pZ]*<\?(php|\W)/i", $src, "UTF-8 BOM before <?php")
            or preg_test("/^\\xEF\\xBB\\xBF/", $src, "UTF-8 BOM alone")
            or preg_test("/^\\x00{0,2}[\\xFE\\xFF]{2}\\x00{0,2}/", $src, "UTF-16/32 BOM");

            // any other preceding whitespace
            preg_test("/^[\s\\0]+<\?(php|\W)/i", $src, "Whitespace BEFORE <?php")
            or preg_test("/^[\pZ\s\\0]+<\?(php|\W)/i", $src, "Unicode whitespace BEFORE <?php");
 
            // Consecutive PHP tags which *currently* do not output whitespace
            if ($action->verbose and preg_match("/^\<\?/", $src))
            preg_test("/\?\>(?!\\n<)\s+<\?/", $src, "Consecutive open+close tags with spacing (template?)")
            or preg_test("/\?\>\\r?\\n\<\?/", $src, "Consecutive open+close tags with harmless linebreak");
................................................................................
            // just probe for opening PHP <script> tag
            preg_test("/\<(?:script|server) \s+ language \s*=\s* ([\"\']?) PHP (\\1) \s*\>/ix", $src, "Long <script language=PHP> tags detected");
        }


        // Tokenize source for easier processing.
        if ($action->token && !$action->regex) {



            /*
               The long T_OPEN_TAG always includes some space "<?php\s",
               but consecutive/others are split into a separate token.
               While the short versions are followed by a distinct
               T_WHITESPACE or maybe completely absent.
               Therefore multiple alternatives are required for each test.
            */
................................................................................
                    "Convert <?␣ into long <?php␣ tag"
                );
            }


            // convert long into short tags
            if ($action->short || $action->shortall) {
                preg_modify( //retain leading newline            // use trailing linebreak            // no linebreak, no output spacing
                     array("/\<\?php(\R\s*)(echo|print)(\s)/i", "/\<\?php(\s+)(echo|print)(\R\s*)/i", "/\<\?php(\s+)(echo|print)(\s)/i"),
                     array("\<\?=$1",                           "\<\?=$3",                            "\<\?="),
                     $src,
                     "Long <?php␣echo to short <?="
                );
                preg_modify( // look for single-line <?php ... ? > occurences, those should always be shortened
                    "/ (?<!^)       # not at the file begin
                       \<\?php      # opening \<\?php
                       (\s+.+\?\>)  # space, filler, closing \?\>
                    /ix",
                    "\<\?$1",
                    $src,
                    "Single line <?php...?> into short tag"
                );
            }

            // even the initial open tag and longer sections
            if ($action->shortall) {
                preg_modify( // look for mixed syntax '<? echo', strip any initial spacing type
                    "/\<\?(\s*)(echo|print)\b/i", "\<\?=",
                    $src,
                    "Mixed <?␣echo to short <?="
                );
                preg_modify(
                    // keep space character after <?php, do not allow any non-space separate alternative expressions (e.g. <?php( or <?php/ would actually be short tag plus php() function call or an expression const/division).
                    "/\<\?php(\R|\s)/i", "\<\?$1",
                    $src,
                    "Any <?php␣ into short tag"
                );
            }
        }


................................................................................

/**
 * Colorize control + non-printable characters and/or replace with C-string escapes.
 *
 */
function nonprint_visualize($str) {
    return preg_replace_callback(
        "~  \\xEF\\xBB\\xBF             |        # UTF-8
            \\xFE\\xFF | \\xFF\\xFE     |        # UTF-16 / UTF-32
            [^\w-.,;:#*+´`\'\"!§$%&/()={}?<>|]   # non-ASCII/printable
        ~x",
        "nonprint_visualize_rx_cb",
        $str
    );
}
// match callback
function nonprint_visualize_rx_cb($m) {
    global $action;
................................................................................
    // map
    static $map = array();
    if ($action->color) {
        $map = array(
            "\r" => "\x1b[32m\\r\x1b[39m",
            "\n" => "\x1b[31m\\n\x1b[39m",
            "\t" => "\x1b[36m\\t\x1b[39m",
            " "  => "\x1b[1;34m␣\x1b[0;39m",
            "\0" => "\x1b[41;1;33m\\0\x1b[0;39m",
            "\\"  => "\x1b[35m\\\\\x1b[39m",
            "\xEF\xBB\xBF" => "\x1b[4;1;36m\\xEF\\xBB\\xBF\x1b[0;39m",
            "\xFF\xFE" => "\x1b[4;1;36m\\xFF\\xFE\x1b[0;39m",
            "\xFE\xFF" => "\x1b[4;1;36m\\xFE\\xFF\x1b[0;39m",
        );
    }
    // substitute if in colorization $map
    return isset($map[$m[0]])
         ? $map[$m[0]]  // color replacement for common escapes & chars
         : '\\x' . strtoupper(current(unpack('H*', $m[0])));  // binary
}