Index: pluginconf/__init__.py ================================================================== --- pluginconf/__init__.py +++ pluginconf/__init__.py @@ -366,12 +366,12 @@ src = rx.header.sub("", src) src = rx.comment.search(src) if not src: log.warning("Couldn't read source meta information: %s", filename) return meta - src = src.group(0) - src = rx.hash.sub("", src).strip() + src = src[1] or src[2] or src[3] or src[4] + src = rx.hash(src).sub("", src).strip() # Split comment block if src.find("\n\n") > 0: src, meta["doc"] = src.split("\n\n", 1) @@ -471,41 +471,61 @@ do better; but is likely overkill. """ header = re.compile(r""" (\A ( - \#! \s+ /.+ | # shebang + \#! \s+ /.+ | # shebang <\?php .* ) $)+ """, re.M | re.X) comment = re.compile(r""" - (^ [ ]{0,4} \# .*\n)+ | # general - (^ [ ]{0,4} // .*\n)+ | # C++-style - /\* [\s\S]+? \*/ | # C-multiline - <\# [\s\S]+? \#> | \{\# [\s\S]+? \#\} # PS/Perl + ((?:^ [ ]{0,4} (\#|//) .*\n)+) | # general + /\*+ ([\s\S]+?) \*/ | # C-multiline + <\# ([\s\S]+?) \#> | # PS + \{- ([\s\S]+?) -\} # Haskell """, re.M | re.X) - hash = re.compile(r""" - (^ [ ]{0,4} [#*/]{1,2} [ ]{0,3}) + hash_det = re.compile(r""" + ^ ([ \t]*) ([#*/]*) ([ ]*) [\w-]*: # determine indent, craft strip regex """, re.M | re.X) keyval = re.compile(r""" - ^([\w-]+):(.*$(?:\n(?![\w-]+:).+$)*) # plain key:value lines + ^ ([\w-]+) : ( .*$ # plain key:value lines + (?: \n(?![\w-]+:) .+$ )* # continuation lines sans ^xyz: + ) """, re.M | re.X) config = re.compile(r""" - \{ ((?: [^\{\}]+ | \{[^\}]*\} )+) \} # JSOL/YAML scheme {...} dicts - | \< (.+?) \> # old HTML style + \{ ((?: [^\{\}]+ | \{[^\}]*\} )+) \} # JSOL/YAML scheme {...} dicts + | \< (.+?) \> # old HTML style """, re.X) options = re.compile(r""" - ["':$]? (\w*) ["']? # key or ":key" or '$key' - \s* [:=] \s* # "=" or ":" + ["':$]? (\w*) ["']? # key or ":key" or '$key' + \s* [:=] \s* # "=" or ":" (?: " ([^"]*) " - | ' ([^']*) ' # "quoted" or 'singl' values - | ([^,]*) # or unquoted literals + | ' ([^']*) ' # "quoted" or 'singl' values + | ([^,]*) # or unquoted literals ) """, re.X) - select_dict = re.compile(r"(\w+)\s*[=:>]+\s*([^=,|:]+)") - select_list = re.compile(r"\s*([^,|;]+)\s*") + select_dict = re.compile(r""" + (\w+) \s* [=:>]+ \s* ([^=,|:]+) # key=title | k2=t2 + """, re.X) + select_list = re.compile(r""" + \s*([^,|;]+)\s* # alt | lists + """, re.X) + @staticmethod + def hash(src): + """ find first comment to generate consistent strip regex for following lines """ + m = rx.hash_det.search(src) + if not m:# or not m[2]: + return re.compile("^ ? ?[#*/]{0,2} ?}", re.M) # fallback + hash_rx = "^" + if m[1]: # indent + hash_rx += m[1] + "{0,2}" # +- 1 in length? + if m[2]: # hash + hash_rx += "[" + m[2] + "]{1,%s}" % (len(m[2]) + 1) + if m[3]: # space + hash_rx += m[3] + "{0,2}" + return re.compile(hash_rx, re.M) # ArgumentParser options conversion # ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ def argparse_map(opt): Index: test/config_altsyntax.py ================================================================== --- test/config_altsyntax.py +++ test/config_altsyntax.py @@ -33,20 +33,40 @@ def multiline_ps1(): ps1_style= """ <# # api: cpp # title: second - version: 2.1 + # version: 2.1 # category: nonpython # - # Won't work without hashes + # Didn't work without hashes #> """ print(_parse(ps1_style)) assert _parse(ps1_style).version == "2.1" - # requires adapting the continuation line detection (including spaced points) - # and detecting multiline markers, and stripping them (end up in doc else) + # Required adapting the continuation line detection (including spaced points). + # Multiline enclosures contents are now captured, thus trailign #> or */ stripped. + ps1_style= """ + <# + api: cpp + title: second + version: 2.2 + category: nonpython + config: {name:x} + {name:y} + priority: bad + + Didn't work without hashes + #> + """ + # Notably will only work with up to 3 spaces. Acceptable format constraint, + # but makes continuation less readable + print(_parse(ps1_style)) + assert _parse(ps1_style).version == "2.2" + assert len(_parse(ps1_style).config) == 2 + assert _parse(ps1_style).priority == "bad" + # should still migrate to hash() detection and regex generation def indent_cpp(): cpp_style= """ // api: cpp // title: third ADDED test/config_hashdet.py Index: test/config_hashdet.py ================================================================== --- test/config_hashdet.py +++ test/config_hashdet.py @@ -0,0 +1,29 @@ +# title: rx.hash +# description: format consistency checks +# version: 0.1 +# +# new rx.hash() should still allow some minor variances + +import pytest +from pluginconf import rx + + + +@pytest.mark.parametrize("_in,_out", [ + [" var: y", "^ {0,2}"], + [" var: y", "^ {0,2}"], + [" var: y", "^ {0,2}"], +]) +def spaces(_in, _out): + assert rx.hash(_in).pattern == _out + +@pytest.mark.parametrize("_in,_out", [ + [" # var: y", r"^ {0,2}[#]{1,2} {0,2}"], + [" ## var: y", r"^ {0,2}[##]{1,3} {0,2}"], + [" // var: y", r"^ {0,2}[//]{1,3} {0,2}"], + [" ** var: y", r"^ {0,2}[**]{1,3} {0,2}"], + [" /* var: y", r"^ {0,2}[/*]{1,3} {0,2}"], +]) +def hashvary(_in, _out): + assert rx.hash(_in).pattern == _out +