Check-in [270a3a0f72]
Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add "type": classifiers for some logfmt fields. Support $1$2$3 for expand id= and value= fields. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
270a3a0f727062f7201731745929d8fc |
User & Date: | mario 2020-12-18 10:47:48 |
Context
2020-12-18
| ||
10:48 | Fix dependencies in pluginconf.setup (versioned install_requires, mainly though limiting space between # comment and field: identifier in pmd block) check-in: 72e4f07a35 user: mario tags: trunk | |
10:47 | Add "type": classifiers for some logfmt fields. Support $1$2$3 for expand id= and value= fields. check-in: 270a3a0f72 user: mario tags: trunk | |
2020-12-17
| ||
17:07 | fix var names: path/name and .format check-in: 21efc9c8a6 user: mario tags: trunk | |
Changes
Changes to logfmt1/logfmt1.py.
︙ | ︙ | |||
75 76 77 78 79 80 81 | "(?<!\\\\)([\[\]\|\(\)])": r"\\$1", # escape any regex meta chars in format string "%%": "%", }, "placeholder": "%[<>]?(?:\w*\{[^\}]+\})?\^?\w+", # placeholder definitions to build regex: from "fields": { | | | | | | | | | | | | | | | | | | 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | "(?<!\\\\)([\[\]\|\(\)])": r"\\$1", # escape any regex meta chars in format string "%%": "%", }, "placeholder": "%[<>]?(?:\w*\{[^\}]+\})?\^?\w+", # placeholder definitions to build regex: from "fields": { "%a": { "id": "remote_addr", "rx": "[\d.:a-f]+", "type": "ip" }, "%{c}a": { "id": "remote_addr", "rx": "[\d.:a-f]+", "type": "ip" }, "%h": { "id": "remote_host", "rx": "[\w\-.:]+" }, "%{c}h": { "id": "remote_host", "rx": "[\w\-.:]+" }, "%A": { "id": "local_address", "rx": "[\d.:a-f]+", "type": "ip" }, "%u": { "id": "remote_user", "rx": "[\-\w@.]+" }, "%l": { "id": "remote_logname", "rx": "[\w\-.:]+" }, # %alias `loglevel` (errlog) "%t": { "id": "request_time", "rx": "\[?(\d[\d:\w\s:./\-+,;]+)\]?", "type": "datetime" }, # might be "local" formatting, e.g. [01/Mnt/2020:11:22:33 +0100], %alias `ctime` "%{u}t": { "id": "request_time", "rx": "u|\d+/\w+/\d+:\d+:\d+:\d+\.\d+\s\+\d+" }, # 01/Mnt/2020:11:22:33.12345 +0100 no implicit brackets "%{cu}t": { "id": "request_time", "rx": "ut|\d+-\w+-\d+\s\d+:\d+:\d+\.\d+" }, # error.log-only, 2020-01-31 11:22:33.901234, compact ISO 8601 format, no implicit brackets "%{msec_frac}t": { "id": "msec_frac", "rx": "[\d.]+" }, "%{usec_frac}t": { "id": "usec_frac", "rx": "[\d.]+" }, "%f": { "id": "request_file", "rx": "[^\s\"]+" }, "%b": { "id": "bytes_sent", "rx": "\d+|-" }, "%B": { "id": "bytes_sent", "rx": "\d+|-" }, "%O": { "id": "bytes_out", "rx": "\d+", "type": "int" }, "%I": { "id": "bytes_in", "rx": "\d+", "type": "int" }, "%S": { "id": "bytes_combined", "rx": "\d+", "type": "int" }, "%E": { "id": "apr_status", "rx": "\w+" }, # "AH01071" "%M": { "id": "message", "rx": ".+" }, # error.log-only, not really defined anywhere, ??? "%L": { "id": "log_id", "rx": "[\w\-\.]+" }, "%{c}L": { "id": "log_id", "rx": "[\w\-\.]+" }, "%{C}L": { "id": "log_id", "rx": "[\w\-\.]*" }, "%V": { "id": "server_name", "rx": "[\w\-\.]+" }, "%v": { "id": "virtual_host", "rx": "[\w\-\.]+" }, "%p": { "id": "server_port", "rx": "\d+", "type": "ip" }, "%{local}p": { "id": "server_port", "rx": "\d+", "type": "int" }, "%{canonical}p": { "id": "canonical_port", "rx": "[\w.]+" }, "%{remote}p": { "id": "remote_port", "rx": "\d+" }, "%P": { "id": "pid", "rx": "\d+", "type": "int" }, "%{g}T": { "id": "tid", "rx": "\d+" }, "%{tid}P": { "id": "tid", "rx": "\d+" }, "%{pid}P": { "id": "pid", "rx": "\d+" }, "%{hextid}P": { "id": "tid", "rx": "\w+" }, "%{hexpid}P": { "id": "pid", "rx": "\w+" }, "%H": { "id": "request_protocol", "rx": "[\w/\d.]+" }, "%m": { "id": "request_method", "rx": "[\w.]+" }, # %alias `module_name` (errlog) "%q": { "id": "request_query", "rx": "\??\S*" }, "%F": { "id": "file_line", "rx": "[/\w\-.:(\d)]+" }, # %alias `request_flushed` "%X": { "id": "connection_status", "rx": "[Xx+\-.\d]+" }, "%k": { "id": "keepalives", "rx": "\d+" }, # %alias `requests_on_connection` "%r": { "id": "request_line", "rx": "(?<request_method>\w+) (?<request_path>\S+) (?<request_protocol>[\w/\d.]+)" }, "%D": { "id": "request_duration_microseconds", "rx": "\d+", "type": "int" }, "%T": { "id": "request_duration_scaled", "rx": "[\d.]+", "type": "float" }, "%{s}T": { "id": "request_duration_seconds", "rx": "[\d.]+", "type": "float" }, "%{us}T": { "id": "request_duration_microseconds", "rx": "\d+", "type": "int" }, "%{ms}T": { "id": "request_duration_milliseconds", "rx": "\d+", "type": "int" }, "%U": { "id": "request_uri", "rx": "\S+(?<!\")" }, "%s": { "id": "status", "rx": "\d+", "type": "int" }, "%>s": { "id": "status", "rx": "-|\d\d\d" }, "%R": { "id": "handler", "rx": "[\w:.\-]+" }, "%^FU": { "id": "ttfu", "rx": "-|\d+" }, "%^FB": { "id": "ttfb", "rx": "-|\d+" }, # Apache 2.5, flat key:value structure presumably "%^ÄīS": { "id": "json", "rx": '\{(?:[\w:,\s\[\]]+|"(?:[^\\\\"]+|\\\\.)*")\}' }, # common compound placeholders |
︙ | ︙ | |||
161 162 163 164 165 166 167 | # convert variant placeholders into fields beforehand, # possibly looking up other definitions (strftime) for marshalled placeholders "expand": { "%\{([^{}]+)\}t": { "id": "request_time", "class": "strftime", # different placeholders within \{...\} | | > | 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | # convert variant placeholders into fields beforehand, # possibly looking up other definitions (strftime) for marshalled placeholders "expand": { "%\{([^{}]+)\}t": { "id": "request_time", "class": "strftime", # different placeholders within \{...\} "record": "$1", "type": "datetime" }, "%[<>]?\{([\w\-]+)\}[Conexic]": { "id": "$1", "rx": "\S+" }, "%\{([\w\-]+)\}\^t[io]": { "id": "$1", |
︙ | ︙ | |||
256 257 258 259 260 261 262 | }, "#doc": "http://nginx.org/en/docs/http/ngx_http_core_module.html#var_args", "fields": { "$request": { "id": "request", "rx": "(?<request_method>\w+) (?<request_path>\S+) (?<request_protocol>[\w/\d.]+)" }, "$remote_addr": { "id": "remote_addr", "rx": "[\da-f.:]+" }, "$remote_user": { "id": "remote_user", "rx": "[\w\-@.:]+" }, "$time_local": { "id": "time_local", "rx": "[\d/\w:.+\-]+" }, | | | | | 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | }, "#doc": "http://nginx.org/en/docs/http/ngx_http_core_module.html#var_args", "fields": { "$request": { "id": "request", "rx": "(?<request_method>\w+) (?<request_path>\S+) (?<request_protocol>[\w/\d.]+)" }, "$remote_addr": { "id": "remote_addr", "rx": "[\da-f.:]+" }, "$remote_user": { "id": "remote_user", "rx": "[\w\-@.:]+" }, "$time_local": { "id": "time_local", "rx": "[\d/\w:.+\-]+" }, "$status": { "id": "status", "rx": "\d+", "type": "int" }, "$request_length": { "id": "request_length", "rx": "\d+", "type": "int" }, "$request_time": { "id": "request_time", "rx": "[\d.]+" }, "$msec": { "id": "msec", "rx": "[\d.]+" }, "$scheme": { "id": "scheme", "rx": "\w+" }, "$args": { "id": "args", "rx": "\S*" }, "$is_args": { "id": "is_args", "rx": "\??" }, "$body_bytes_sent": { "id": "body_bytes_sent", "rx": "\d+", "type": "int" }, "$http_referer": { "id": "http_referer", "rx": "\S*" }, "$http_user_agent": { "id": "http_user_agent", "rx": "\S*" }, "$pipe": { "id": "pipe", "rx": "[p.]" }, "$ssl_protocol": { "id": "ssl_protocol", "rx": "[\w.]*" }, "$ssl_cipher": { "id": "ssl_cipher", "rx": "[\w\-.]*" }, }, "expand": { |
︙ | ︙ | |||
369 370 371 372 373 374 375 | for field in ["rewrite", "fields", "expand", "alias", "container"]: if not field in fmt: fmt[field] = {} # pre-cleanup (for irrelevant format string `%!200,500<s` control prefixes) if "rewrite" in rules: for rx, repl in rules["rewrite"].items(): | | | 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | for field in ["rewrite", "fields", "expand", "alias", "container"]: if not field in fmt: fmt[field] = {} # pre-cleanup (for irrelevant format string `%!200,500<s` control prefixes) if "rewrite" in rules: for rx, repl in rules["rewrite"].items(): record = rx_sub(rx, repl, record) # create fields from variant placeholders if "expand" in rules: for rx, expand in rules["expand"].items(): for is_quoted, match, *uu in re.findall(f"(\"?)({rx})", record): if match in fields: continue |
︙ | ︙ | |||
442 443 444 445 446 447 448 449 450 451 452 453 454 455 | # allow for $1, $2, $3 in re.sub() def rx_sub(pattern, replacement, source, flags=0): if replacement.find('$') >= 0: replacement = re.sub(r'[\\\\](?=[0-9])', '$', replacement) return re.sub(pattern, replacement, source, flags) # file-style wrapper that yields parsed dictionaries instead of string lines class parsy_parse: def __init__(self, logfn="", fmt=None, debug=False, fail=False, duplicate=True): """ Open log file and .fmt specifier, to iterate log lines as dictionary. | > > > > | 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | # allow for $1, $2, $3 in re.sub() def rx_sub(pattern, replacement, source, flags=0): if replacement.find('$') >= 0: replacement = re.sub(r'[\\\\](?=[0-9])', '$', replacement) return re.sub(pattern, replacement, source, flags) # replace $0 $1 $2 in string with entries from list def repl_sub_dict(s, row): return re.sub("\$(\d+)", lambda m: row.get(int(m.group(1))), s) # file-style wrapper that yields parsed dictionaries instead of string lines class parsy_parse: def __init__(self, logfn="", fmt=None, debug=False, fail=False, duplicate=True): """ Open log file and .fmt specifier, to iterate log lines as dictionary. |
︙ | ︙ | |||
507 508 509 510 511 512 513 | else: pass # just try next line # pass .close() and similar to file object def __getattr__(self, name): return getattr(self.f, name) | | | > > | > > > | | | | | | | 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 | else: pass # just try next line # pass .close() and similar to file object def __getattr__(self, name): return getattr(self.f, name) # unpack [key "value"] fields def container_expand(self, d): for k,opt in self.container.items(): if not k in d: continue # find `(key)âĶâĶ(val+)` pairs according to regex for row in re.findall(opt["rx"], d[k]): id = repl_sub_dict(opt.get("id", "$1"), row) val = repl_sub_dict(opt.get("val", "$2"), row) # pack into list, if key is duplicated if not id in d: d[id] = val elif not isinstance(d[id], list): d[id] = [d[id], val] else: d[id].append(val) # get column names (from regex, in order of appearance) def names(self): return re.findall("\(\?P?<(\w+)>", self.rx.pattern) # ANSI output for debugging regex/fmt string def debug_rx(self, line): |
︙ | ︙ |
Changes to logfmt1/share/update/nginx.
1 2 3 4 5 6 7 8 9 10 11 12 13 | #!/usr/bin/env python3 # description: extract nginx log_* options to create .log.fmt files # # nginx -T is even simpler than apache2ctl -t -D DUMP_INCLUDES # (for this use case) # import os, re, sys, random import subprocess import traceback import json from pprint import pprint import logfmt1 | > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | #!/usr/bin/env python3 # description: extract nginx log_* options to create .log.fmt files # # nginx -T is even simpler than apache2ctl -t -D DUMP_INCLUDES # (for this use case) # # the error log format is completely undocumented # [%V] "%P#" "*%uA " # https://github.com/phusion/nginx/blob/master/src/core/ngx_log.c # https://forum.nginx.org/read.php?2,239483,239484#msg-239484 # import os, re, sys, random import subprocess import traceback import json from pprint import pprint import logfmt1 |
︙ | ︙ | |||
57 58 59 60 61 62 63 | class vhost: # split *.conf directives, dispatch onto assignment/extract methods def __init__(self, fn, src, cfg_only=False): for dir,name,form in rx.format.findall(src): self.logformat(name, form) for dir,path,name in rx.log.findall(src): self.log(dir, path, name) | < > > | 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | class vhost: # split *.conf directives, dispatch onto assignment/extract methods def __init__(self, fn, src, cfg_only=False): for dir,name,form in rx.format.findall(src): self.logformat(name, form) for dir,path,name in rx.log.findall(src): self.log(dir, path, name) def logformat(self, name, form): form = re.sub("'\s+'", "", form).strip("'") tmp.log_formats[name] = form.replace('\\"', '"') def log(self, dir, path, name): if re.match("^off$|^syslog:|^memory:|^\|", path): return if dir == "error_log": name = "error" #f"error {name}" if name else "error" if not name: name = dir.replace("_log", "") tmp.log_map[path] = name # iterate over all Apache config files, visit relevant ones (vhosts/mod_security configs) def scan_all(): |
︙ | ︙ | |||
93 94 95 96 97 98 99 | def mk_fmt(): for fn,ty in tmp.log_map.items(): fn_fmt = f"{fn}.fmt" fmt_record = tmp.log_formats.get(ty) if not fmt_record: | | | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | def mk_fmt(): for fn,ty in tmp.log_map.items(): fn_fmt = f"{fn}.fmt" fmt_record = tmp.log_formats.get(ty) if not fmt_record: fmt_record = "nginx" j = {} if os.path.exists(fn_fmt): try: j = json.loads(open(fn_fmt, "r", encoding="utf-8").read()) except Exception as e: j = {} |
︙ | ︙ |
Changes to logfmt1/update_logfmt.py.
1 2 3 | #!/usr/bin/env python3 # encoding: utf-8 # title: update-logfmt | | > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | #!/usr/bin/env python3 # encoding: utf-8 # title: update-logfmt # description: invoke âĶ/logfmt/share/update/* scripts # type: virtual # # global *.log.fmt update run-parts import os, re, sys def main(): pass for dir in ["/usr/share/logfmt/update", re.sub("[.\w]+$", "share/update", __file__)]: if os.path.exists(dir): argv = " ".join(sys.argv[1:]) os.system(f"run-parts {argv} {dir}") break |