Overview
Comment: | add new comment extraction rx |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
3ad4b252ac653966fec058479c231b01 |
User & Date: | mario on 2022-11-01 16:57:44 |
Other Links: | manifest | tags |
Context
2022-11-01
| ||
18:50 | move name_to_fn and get_readme into MetaUtils check-in: 63fdedee18 user: mario tags: trunk | |
16:57 | add new comment extraction rx check-in: 3ad4b252ac user: mario tags: trunk | |
2022-10-31
| ||
18:56 | add pacakge disovery, and additional comment styles (different languages) check-in: f03780244f user: mario tags: trunk | |
Changes
Modified pluginconf/__init__.py from [97ed648af8] to [7148596eca].
︙ | ︙ | |||
364 365 366 367 368 369 370 | src = src.replace("\r", "") if not literal: src = rx.header.sub("", src) src = rx.comment.search(src) if not src: log.warning("Couldn't read source meta information: %s", filename) return meta | | | | 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | src = src.replace("\r", "") if not literal: src = rx.header.sub("", src) src = rx.comment.search(src) if not src: log.warning("Couldn't read source meta information: %s", filename) return meta src = src[1] or src[2] or src[3] or src[4] src = rx.hash(src).sub("", src).strip() # Split comment block if src.find("\n\n") > 0: src, meta["doc"] = src.split("\n\n", 1) # Turn key:value lines into dictionary for field in rx.keyval.findall(src): |
︙ | ︙ | |||
469 470 471 472 473 474 475 | Pretty crude comment splitting approach. But works well enough already. Technically a YAML parser would do better; but is likely overkill. """ header = re.compile(r""" (\A ( | | | | | | | | | > > | | | | | | | > > | > > > > > > > > > > > > > > > > | 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 | Pretty crude comment splitting approach. But works well enough already. Technically a YAML parser would do better; but is likely overkill. """ header = re.compile(r""" (\A ( \#! \s+ /.+ | # shebang <\?php .* ) $)+ """, re.M | re.X) comment = re.compile(r""" ((?:^ [ ]{0,4} (\#|//) .*\n)+) | # general /\*+ ([\s\S]+?) \*/ | # C-multiline <\# ([\s\S]+?) \#> | # PS \{- ([\s\S]+?) -\} # Haskell """, re.M | re.X) hash_det = re.compile(r""" ^ ([ \t]*) ([#*/]*) ([ ]*) [\w-]*: # determine indent, craft strip regex """, re.M | re.X) keyval = re.compile(r""" ^ ([\w-]+) : ( .*$ # plain key:value lines (?: \n(?![\w-]+:) .+$ )* # continuation lines sans ^xyz: ) """, re.M | re.X) config = re.compile(r""" \{ ((?: [^\{\}]+ | \{[^\}]*\} )+) \} # JSOL/YAML scheme {...} dicts | \< (.+?) \> # old <input> HTML style """, re.X) options = re.compile(r""" ["':$]? (\w*) ["']? # key or ":key" or '$key' \s* [:=] \s* # "=" or ":" (?: " ([^"]*) " | ' ([^']*) ' # "quoted" or 'singl' values | ([^,]*) # or unquoted literals ) """, re.X) select_dict = re.compile(r""" (\w+) \s* [=:>]+ \s* ([^=,|:]+) # key=title | k2=t2 """, re.X) select_list = re.compile(r""" \s*([^,|;]+)\s* # alt | lists """, re.X) @staticmethod def hash(src): """ find first comment to generate consistent strip regex for following lines """ m = rx.hash_det.search(src) if not m:# or not m[2]: return re.compile("^ ? ?[#*/]{0,2} ?}", re.M) # fallback hash_rx = "^" if m[1]: # indent hash_rx += m[1] + "{0,2}" # +- 1 in length? if m[2]: # hash hash_rx += "[" + m[2] + "]{1,%s}" % (len(m[2]) + 1) if m[3]: # space hash_rx += m[3] + "{0,2}" return re.compile(hash_rx, re.M) # ArgumentParser options conversion # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ def argparse_map(opt): """ As variation of in-application config: options, this method converts |
︙ | ︙ |
Modified test/config_altsyntax.py from [28af7a5552] to [4cae691114].
︙ | ︙ | |||
31 32 33 34 35 36 37 | assert _parse(c_style).doc == "Do we get a comment?" def multiline_ps1(): ps1_style= """ <# # api: cpp # title: second | | | | > > > > > > > > > > | > > > > > > > > > > | 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | assert _parse(c_style).doc == "Do we get a comment?" def multiline_ps1(): ps1_style= """ <# # api: cpp # title: second # version: 2.1 # category: nonpython # # Didn't work without hashes #> """ print(_parse(ps1_style)) assert _parse(ps1_style).version == "2.1" # Required adapting the continuation line detection (including spaced points). # Multiline enclosures contents are now captured, thus trailign #> or */ stripped. ps1_style= """ <# api: cpp title: second version: 2.2 category: nonpython config: {name:x} {name:y} priority: bad Didn't work without hashes #> """ # Notably will only work with up to 3 spaces. Acceptable format constraint, # but makes continuation less readable print(_parse(ps1_style)) assert _parse(ps1_style).version == "2.2" assert len(_parse(ps1_style).config) == 2 assert _parse(ps1_style).priority == "bad" # should still migrate to hash() detection and regex generation def indent_cpp(): cpp_style= """ // api: cpp // title: third // version: 3.3 // category: doubleprefix // // Basically just // instead of # """ assert _parse(cpp_style).version == "3.3" |
Added test/config_hashdet.py version [a32557b6af].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | # title: rx.hash # description: format consistency checks # version: 0.1 # # new rx.hash() should still allow some minor variances import pytest from pluginconf import rx @pytest.mark.parametrize("_in,_out", [ [" var: y", "^ {0,2}"], [" var: y", "^ {0,2}"], [" var: y", "^ {0,2}"], ]) def spaces(_in, _out): assert rx.hash(_in).pattern == _out @pytest.mark.parametrize("_in,_out", [ [" # var: y", r"^ {0,2}[#]{1,2} {0,2}"], [" ## var: y", r"^ {0,2}[##]{1,3} {0,2}"], [" // var: y", r"^ {0,2}[//]{1,3} {0,2}"], [" ** var: y", r"^ {0,2}[**]{1,3} {0,2}"], [" /* var: y", r"^ {0,2}[/*]{1,3} {0,2}"], ]) def hashvary(_in, _out): assert rx.hash(_in).pattern == _out |