Overview
| Comment: | add new comment extraction rx |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA3-256: |
3ad4b252ac653966fec058479c231b01 |
| User & Date: | mario on 2022-11-01 16:57:44 |
| Other Links: | manifest | tags |
Context
|
2022-11-01
| ||
| 18:50 | move name_to_fn and get_readme into MetaUtils check-in: 63fdedee18 user: mario tags: trunk | |
| 16:57 | add new comment extraction rx check-in: 3ad4b252ac user: mario tags: trunk | |
|
2022-10-31
| ||
| 18:56 | add pacakge disovery, and additional comment styles (different languages) check-in: f03780244f user: mario tags: trunk | |
Changes
Modified pluginconf/__init__.py from [97ed648af8] to [7148596eca].
| ︙ | ︙ | |||
364 365 366 367 368 369 370 |
src = src.replace("\r", "")
if not literal:
src = rx.header.sub("", src)
src = rx.comment.search(src)
if not src:
log.warning("Couldn't read source meta information: %s", filename)
return meta
| | | | 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 |
src = src.replace("\r", "")
if not literal:
src = rx.header.sub("", src)
src = rx.comment.search(src)
if not src:
log.warning("Couldn't read source meta information: %s", filename)
return meta
src = src[1] or src[2] or src[3] or src[4]
src = rx.hash(src).sub("", src).strip()
# Split comment block
if src.find("\n\n") > 0:
src, meta["doc"] = src.split("\n\n", 1)
# Turn key:value lines into dictionary
for field in rx.keyval.findall(src):
|
| ︙ | ︙ | |||
469 470 471 472 473 474 475 |
Pretty crude comment splitting approach. But works
well enough already. Technically a YAML parser would
do better; but is likely overkill.
"""
header = re.compile(r"""
(\A (
| | | | | | | | | > > | | | | | | | > > | > > > > > > > > > > > > > > > > | 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 |
Pretty crude comment splitting approach. But works
well enough already. Technically a YAML parser would
do better; but is likely overkill.
"""
header = re.compile(r"""
(\A (
\#! \s+ /.+ | # shebang
<\?php .*
) $)+
""", re.M | re.X)
comment = re.compile(r"""
((?:^ [ ]{0,4} (\#|//) .*\n)+) | # general
/\*+ ([\s\S]+?) \*/ | # C-multiline
<\# ([\s\S]+?) \#> | # PS
\{- ([\s\S]+?) -\} # Haskell
""", re.M | re.X)
hash_det = re.compile(r"""
^ ([ \t]*) ([#*/]*) ([ ]*) [\w-]*: # determine indent, craft strip regex
""", re.M | re.X)
keyval = re.compile(r"""
^ ([\w-]+) : ( .*$ # plain key:value lines
(?: \n(?![\w-]+:) .+$ )* # continuation lines sans ^xyz:
)
""", re.M | re.X)
config = re.compile(r"""
\{ ((?: [^\{\}]+ | \{[^\}]*\} )+) \} # JSOL/YAML scheme {...} dicts
| \< (.+?) \> # old <input> HTML style
""", re.X)
options = re.compile(r"""
["':$]? (\w*) ["']? # key or ":key" or '$key'
\s* [:=] \s* # "=" or ":"
(?: " ([^"]*) "
| ' ([^']*) ' # "quoted" or 'singl' values
| ([^,]*) # or unquoted literals
)
""", re.X)
select_dict = re.compile(r"""
(\w+) \s* [=:>]+ \s* ([^=,|:]+) # key=title | k2=t2
""", re.X)
select_list = re.compile(r"""
\s*([^,|;]+)\s* # alt | lists
""", re.X)
@staticmethod
def hash(src):
""" find first comment to generate consistent strip regex for following lines """
m = rx.hash_det.search(src)
if not m:# or not m[2]:
return re.compile("^ ? ?[#*/]{0,2} ?}", re.M) # fallback
hash_rx = "^"
if m[1]: # indent
hash_rx += m[1] + "{0,2}" # +- 1 in length?
if m[2]: # hash
hash_rx += "[" + m[2] + "]{1,%s}" % (len(m[2]) + 1)
if m[3]: # space
hash_rx += m[3] + "{0,2}"
return re.compile(hash_rx, re.M)
# ArgumentParser options conversion
# ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
def argparse_map(opt):
"""
As variation of in-application config: options, this method converts
|
| ︙ | ︙ |
Modified test/config_altsyntax.py from [28af7a5552] to [4cae691114].
| ︙ | ︙ | |||
31 32 33 34 35 36 37 |
assert _parse(c_style).doc == "Do we get a comment?"
def multiline_ps1():
ps1_style= """
<#
# api: cpp
# title: second
| | | | > > > > > > > > > > | > > > > > > > > > > | 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
assert _parse(c_style).doc == "Do we get a comment?"
def multiline_ps1():
ps1_style= """
<#
# api: cpp
# title: second
# version: 2.1
# category: nonpython
#
# Didn't work without hashes
#>
"""
print(_parse(ps1_style))
assert _parse(ps1_style).version == "2.1"
# Required adapting the continuation line detection (including spaced points).
# Multiline enclosures contents are now captured, thus trailign #> or */ stripped.
ps1_style= """
<#
api: cpp
title: second
version: 2.2
category: nonpython
config: {name:x}
{name:y}
priority: bad
Didn't work without hashes
#>
"""
# Notably will only work with up to 3 spaces. Acceptable format constraint,
# but makes continuation less readable
print(_parse(ps1_style))
assert _parse(ps1_style).version == "2.2"
assert len(_parse(ps1_style).config) == 2
assert _parse(ps1_style).priority == "bad"
# should still migrate to hash() detection and regex generation
def indent_cpp():
cpp_style= """
// api: cpp
// title: third
// version: 3.3
// category: doubleprefix
//
// Basically just // instead of #
"""
assert _parse(cpp_style).version == "3.3"
|
Added test/config_hashdet.py version [a32557b6af].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# title: rx.hash
# description: format consistency checks
# version: 0.1
#
# new rx.hash() should still allow some minor variances
import pytest
from pluginconf import rx
@pytest.mark.parametrize("_in,_out", [
[" var: y", "^ {0,2}"],
[" var: y", "^ {0,2}"],
[" var: y", "^ {0,2}"],
])
def spaces(_in, _out):
assert rx.hash(_in).pattern == _out
@pytest.mark.parametrize("_in,_out", [
[" # var: y", r"^ {0,2}[#]{1,2} {0,2}"],
[" ## var: y", r"^ {0,2}[##]{1,3} {0,2}"],
[" // var: y", r"^ {0,2}[//]{1,3} {0,2}"],
[" ** var: y", r"^ {0,2}[**]{1,3} {0,2}"],
[" /* var: y", r"^ {0,2}[/*]{1,3} {0,2}"],
])
def hashvary(_in, _out):
assert rx.hash(_in).pattern == _out
|