Overview
Comment:add new comment extraction rx
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 3ad4b252ac653966fec058479c231b01256b4477422d726a779f0a4ab8e73078
User & Date: mario on 2022-11-01 16:57:44
Other Links: manifest | tags
Context
2022-11-01
18:50
move name_to_fn and get_readme into MetaUtils check-in: 63fdedee18 user: mario tags: trunk
16:57
add new comment extraction rx check-in: 3ad4b252ac user: mario tags: trunk
2022-10-31
18:56
add pacakge disovery, and additional comment styles (different languages) check-in: f03780244f user: mario tags: trunk
Changes

Modified pluginconf/__init__.py from [97ed648af8] to [7148596eca].

364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
    src = src.replace("\r", "")
    if not literal:
        src = rx.header.sub("", src)
        src = rx.comment.search(src)
        if not src:
            log.warning("Couldn't read source meta information: %s", filename)
            return meta
        src = src.group(0)
        src = rx.hash.sub("", src).strip()

    # Split comment block
    if src.find("\n\n") > 0:
        src, meta["doc"] = src.split("\n\n", 1)

    # Turn key:value lines into dictionary
    for field in rx.keyval.findall(src):







|
|







364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
    src = src.replace("\r", "")
    if not literal:
        src = rx.header.sub("", src)
        src = rx.comment.search(src)
        if not src:
            log.warning("Couldn't read source meta information: %s", filename)
            return meta
        src = src[1] or src[2] or src[3] or src[4]
        src = rx.hash(src).sub("", src).strip()

    # Split comment block
    if src.find("\n\n") > 0:
        src, meta["doc"] = src.split("\n\n", 1)

    # Turn key:value lines into dictionary
    for field in rx.keyval.findall(src):
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490


491
492
493
494
495
496
497
498
499
500
501
502
503
504


505


506














507
508
509
510
511
512
513
    Pretty crude comment splitting approach. But works
    well enough already. Technically a YAML parser would
    do better; but is likely overkill.
    """

    header = re.compile(r"""
        (\A (
            \#! \s+ /.+ |                      # shebang
            <\?php .*
        ) $)+
    """, re.M | re.X)
    comment = re.compile(r"""
        (^ [ ]{0,4} \# .*\n)+ |                # general
        (^ [ ]{0,4} // .*\n)+ |                # C++-style
        /\* [\s\S]+? \*/ |                     # C-multiline
        <\# [\s\S]+? \#> | \{\# [\s\S]+? \#\}  # PS/Perl
    """, re.M | re.X)
    hash = re.compile(r"""
        (^ [ ]{0,4} [#*/]{1,2} [ ]{0,3})
    """, re.M | re.X)
    keyval = re.compile(r"""
        ^([\w-]+):(.*$(?:\n(?![\w-]+:).+$)*)   # plain key:value lines


    """, re.M | re.X)
    config = re.compile(r"""
        \{ ((?: [^\{\}]+ | \{[^\}]*\} )+) \}   # JSOL/YAML scheme {...} dicts
        | \< (.+?) \>                          # old <input> HTML style
    """, re.X)
    options = re.compile(r"""
        ["':$]?   (\w*)  ["']?                 # key or ":key" or '$key'
        \s* [:=] \s*                           # "=" or ":"
     (?:  "  ([^"]*)  "
       |  '  ([^']*)  '                        #  "quoted" or 'singl' values
       |     ([^,]*)                           #  or unquoted literals
     )
    """, re.X)
    select_dict = re.compile(r"(\w+)\s*[=:>]+\s*([^=,|:]+)")


    select_list = re.compile(r"\s*([^,|;]+)\s*")



















# ArgumentParser options conversion
# ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
def argparse_map(opt):
    """
    As variation of in-application config: options, this method converts







|




|
|
|
|

|
|


|
>
>


|
|


|
|

|
|


|
>
>
|
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>







469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
    Pretty crude comment splitting approach. But works
    well enough already. Technically a YAML parser would
    do better; but is likely overkill.
    """

    header = re.compile(r"""
        (\A (
            \#! \s+ /.+ |                        # shebang
            <\?php .*
        ) $)+
    """, re.M | re.X)
    comment = re.compile(r"""
        ((?:^ [ ]{0,4} (\#|//) .*\n)+) |         # general
        /\*+ ([\s\S]+?) \*/ |                    # C-multiline
        <\# ([\s\S]+?) \#>  |                    # PS
        \{- ([\s\S]+?) -\}                       # Haskell
    """, re.M | re.X)
    hash_det = re.compile(r"""
        ^ ([ \t]*) ([#*/]*) ([ ]*) [\w-]*:       # determine indent, craft strip regex
    """, re.M | re.X)
    keyval = re.compile(r"""
        ^ ([\w-]+) : ( .*$                       # plain key:value lines
            (?: \n(?![\w-]+:) .+$ )*             # continuation lines sans ^xyz:
        )
    """, re.M | re.X)
    config = re.compile(r"""
        \{ ((?: [^\{\}]+ | \{[^\}]*\} )+) \}     # JSOL/YAML scheme {...} dicts
        | \< (.+?) \>                            # old <input> HTML style
    """, re.X)
    options = re.compile(r"""
        ["':$]?   (\w*)  ["']?                   # key or ":key" or '$key'
        \s* [:=] \s*                             # "=" or ":"
     (?:  "  ([^"]*)  "
       |  '  ([^']*)  '                          #  "quoted" or 'singl' values
       |     ([^,]*)                             #  or unquoted literals
     )
    """, re.X)
    select_dict = re.compile(r"""
        (\w+) \s* [=:>]+ \s* ([^=,|:]+)          # key=title | k2=t2
    """, re.X)
    select_list = re.compile(r"""
        \s*([^,|;]+)\s*                          # alt | lists
    """, re.X)

    @staticmethod
    def hash(src):
        """ find first comment to generate consistent strip regex for following lines """
        m = rx.hash_det.search(src)
        if not m:# or not m[2]:
            return re.compile("^ ? ?[#*/]{0,2} ?}", re.M) # fallback
        hash_rx = "^"
        if m[1]:  # indent
            hash_rx += m[1] + "{0,2}"   # +- 1 in length?
        if m[2]:  # hash
            hash_rx += "[" + m[2] + "]{1,%s}" % (len(m[2]) + 1)
        if m[3]:  # space
            hash_rx += m[3] + "{0,2}"
        return re.compile(hash_rx, re.M)


# ArgumentParser options conversion
# ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
def argparse_map(opt):
    """
    As variation of in-application config: options, this method converts

Modified test/config_altsyntax.py from [28af7a5552] to [4cae691114].

31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46










47










48
49
50
51
52
53
54
55
56
57
58
59
    assert _parse(c_style).doc == "Do we get a comment?"

def multiline_ps1():
    ps1_style= """
    <#
     # api: cpp
     # title: second
    version: 2.1
     # category: nonpython
     #
     # Won't work without hashes
     #>
    """
    print(_parse(ps1_style))
    assert _parse(ps1_style).version == "2.1"
    # requires adapting the continuation line detection (including spaced points)










    # and detecting multiline markers, and stripping them (end up in doc else)











def indent_cpp():
    cpp_style= """
    // api: cpp
    // title: third
    // version: 3.3
    // category: doubleprefix
    //
    // Basically just // instead of #
    """
    assert _parse(cpp_style).version == "3.3"








|


|




|
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>












31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    assert _parse(c_style).doc == "Do we get a comment?"

def multiline_ps1():
    ps1_style= """
    <#
     # api: cpp
     # title: second
     # version: 2.1
     # category: nonpython
     #
     # Didn't work without hashes
     #>
    """
    print(_parse(ps1_style))
    assert _parse(ps1_style).version == "2.1"
    # Required adapting the continuation line detection (including spaced points).
    # Multiline enclosures contents are now captured, thus trailign #> or */ stripped.
    ps1_style= """
    <#
       api: cpp
       title: second
       version: 2.2
       category: nonpython
       config: {name:x}
       {name:y}
       priority: bad
     
       Didn't work without hashes
     #>
    """
    # Notably will only work with up to 3 spaces. Acceptable format constraint,
    # but makes continuation less readable
    print(_parse(ps1_style))
    assert _parse(ps1_style).version == "2.2"
    assert len(_parse(ps1_style).config) == 2
    assert _parse(ps1_style).priority == "bad"
    # should still migrate to hash() detection and regex generation

def indent_cpp():
    cpp_style= """
    // api: cpp
    // title: third
    // version: 3.3
    // category: doubleprefix
    //
    // Basically just // instead of #
    """
    assert _parse(cpp_style).version == "3.3"

Added test/config_hashdet.py version [a32557b6af].



























































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# title: rx.hash
# description: format consistency checks
# version: 0.1
#
# new rx.hash() should still allow some minor variances

import pytest
from pluginconf import rx



@pytest.mark.parametrize("_in,_out", [
    ["  var: y",       "^  {0,2}"],
    ["   var: y",      "^   {0,2}"],
    ["        var: y", "^        {0,2}"],
])
def spaces(_in, _out):
    assert rx.hash(_in).pattern == _out

@pytest.mark.parametrize("_in,_out", [
    [" # var: y",      r"^ {0,2}[#]{1,2} {0,2}"],
    [" ## var: y",     r"^ {0,2}[##]{1,3} {0,2}"],
    [" // var: y",     r"^ {0,2}[//]{1,3} {0,2}"],
    ["   **   var: y", r"^   {0,2}[**]{1,3}   {0,2}"],
    ["  /*  var: y",   r"^  {0,2}[/*]{1,3}  {0,2}"],
])
def hashvary(_in, _out):
    assert rx.hash(_in).pattern == _out