Check-in [36388dbafb]
Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | logfmt1: Add update/nginx support (untested), fmt2md, #doc and #src comments in .fmt/json files, add logopen.names() to list named groups in regex, fix single backlash in rx_sub() |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
36388dbafb27af9c09dc129854f4536b |
User & Date: | mario 2020-12-17 16:37:01 |
Context
2020-12-17
| ||
16:53 | Use proper dict format for fields defintion check-in: 53635fbeba user: mario tags: trunk | |
16:37 | logfmt1: Add update/nginx support (untested), fmt2md, #doc and #src comments in .fmt/json files, add logopen.names() to list named groups in regex, fix single backlash in rx_sub() check-in: 36388dbafb user: mario tags: trunk | |
16:34 | @inject __getattr__ for simpler tk.Widget lookups check-in: 45a8f2658a user: mario tags: trunk | |
Changes
Changes to Makefile.
︙ | ︙ | |||
13 14 15 16 17 18 19 | t: pytest -v -v -v -v logfmt1: deb whl deb: cd logfmt1 ; xpm -f -s src -t deb -n python3-logfmt1 logfmt1.py | | | | 13 14 15 16 17 18 19 20 21 22 23 24 25 | t: pytest -v -v -v -v logfmt1: deb whl deb: cd logfmt1 ; xpm -f -s src -t deb -n python3-logfmt1 logfmt1.py mv logfmt1/python3-logfmt1_*.deb . dpkg-deb -c python3-logfmt1_*.deb whl: pandoc logfmt1/README.md -o logfmt1/README.rst cd logfmt1 && ./setup.py bdist_wheel |
Added logfmt1/fmt2md.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | #!/usr/bin/env python3 # description: show patterns in markdown # # # import re, json, os, sys fn = sys.argv[1] f = open(fn, "r", encoding="utf-8") fmt = json.loads(f.read()) # doc urls search = { "strftime": "[strftime(3)](https://www.man7.org/linux/man-pages/man3/strftime.3.html#:~:text={})", "grok": "[grok formats](https://duckduckgo.com/?q=grok+format+{})", "apache generic": "[mod_log_config.c/log_io.c](https://github.com/apache/httpd/search?q={})", "else": "[???](https://duckduckgo.com/?q={})", } url = search.get(fmt["class"]) or search["else"] # table head print(f"\n\n## {fmt['class']}\n") print("| placeholder \t | id \t | regex \t \t | grok/fmt-recursion \t | description/reference \t |") print("-------------------------------------------------------------------------------------------") # fields for name, opt in fmt.get("fields", {}).items(): id = opt.get("id") rx = opt.get('rx') if rx: rx = rx.replace("|", "¦") grok = opt.get('grok') or "-" desc = opt.get('desc') or url.format(id) print(f"| {name} \t | {id} \t | `{rx}` \t | {grok} \t | {desc} |") for name, opt in fmt.get("expand", {}).items(): id = opt.get("id") rx = opt.get('rx') or opt.get("record") if rx and len(rx) >= 50: rx = rx[0:50] + "…" if rx: rx = rx.replace("|", "¦") grok = opt.get('grok') or opt.get("class") desc = opt.get('desc') or url.format(id) print(f"| {name} \t | {id} \t | `{rx}` \t | {grok} \t | {desc} |") |
Changes to logfmt1/logfmt1.py.
1 2 3 4 5 6 | # encoding: utf-8 # api: python # title: python3-logfmt1 # description: handle *.log.fmt specifiers and regex conversion # type: transform # category: io | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | # encoding: utf-8 # api: python # title: python3-logfmt1 # description: handle *.log.fmt specifiers and regex conversion # type: transform # category: io # version: 0.5 # license: Apache-2.0 # pack: # logfmt1.py=/usr/lib/python3/dist-packages/ # update_logfmt.py=/usr/bin/update-logfmt # ./logex.py=/usr/bin/logex # share=/usr/share/logfmt # architecture: all |
︙ | ︙ | |||
53 54 55 56 57 58 59 | import re, json, os, sys from copy import copy class rulesdb: # Known format strings and field identifiers. | | > | < < | | | | | 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | import re, json, os, sys from copy import copy class rulesdb: # Known format strings and field identifiers. # This mixes both accesslog and errorlog format strings, should # be split out perhaps, as `%{cu}t` won't work in access.logs. # # - "[client %s:%d]" : "[remote %s:%d]" only in errorlogdefault? # apache = { "class": "apache generic", #"record": "%h %l %u %t \"%r\" %>s %b", #"regex": "(?<remote_host>\S+) …", "separator": " ", "rewrite": { "%[\d!,+\-]+": "%", # strip Apache placehoder conditions "(?<!\\\\)([\[\]\|\(\)])": r"\\$1", # escape any regex meta chars in format string "%%": "%", }, "placeholder": "%[<>]?(?:\w*\{[^\}]+\})?\^?\w+", # placeholder definitions to build regex: from "fields": { "%a": { "id": "remote_addr", "rx": "[\d.:a-f]+" }, "%{c}a": { "id": "remote_addr", "rx": "[\d.:a-f]+" }, "%h": { "id": "remote_host", "rx": "[\w\-.:]+" }, "%{c}h": { "id": "remote_host", "rx": "[\w\-.:]+" }, "%A": { "id": "local_address", "rx": "[\d.:a-f]+" }, "%u": { "id": "remote_user", "rx": "[\-\w@.]+" }, "%l": { "id": "remote_logname", "rx": "[\w\-.:]+" }, # %alias `loglevel` (errlog) "%t": { "id": "request_time", "rx": "\[?(\d[\d:\w\s:./\-+,;]+)\]?" }, # might be "local" formatting, e.g. [01/Mnt/2020:11:22:33 +0100], %alias `ctime` "%{u}t": { "id": "request_time", "rx": "u|\d+/\w+/\d+:\d+:\d+:\d+\.\d+\s\+\d+" }, # 01/Mnt/2020:11:22:33.12345 +0100 no implicit brackets "%{cu}t": { "id": "request_time", "rx": "ut|\d+-\w+-\d+\s\d+:\d+:\d+\.\d+" }, # error.log-only, 2020-01-31 11:22:33.901234, compact ISO 8601 format, no implicit brackets "%{msec_frac}t": { "id": "msec_frac", "rx": "[\d.]+" }, "%{usec_frac}t": { "id": "usec_frac", "rx": "[\d.]+" }, "%f": { "id": "request_file", "rx": "[^\s\"]+" }, "%b": { "id": "bytes_sent", "rx": "\d+|-" }, "%B": { "id": "bytes_sent", "rx": "\d+|-" }, "%O": { "id": "bytes_out", "rx": "\d+" }, "%I": { "id": "bytes_in", "rx": "\d+" }, |
︙ | ︙ | |||
135 136 137 138 139 140 141 142 143 144 145 146 147 148 | "%^FB": { "id": "ttfb", "rx": "-|\d+" }, # Apache 2.5, flat key:value structure presumably "%^Ä´S": { "id": "json", "rx": '\{(?:[\w:,\s\[\]]+|"(?:[^\\\\"]+|\\\\.)*")\}' }, # common compound placeholders "%{Referer}i": { "id": "referer", "rx": "[^\"]*" }, "%{User-Agent}i": { "id": "user_agent", "rx": r'(?:[^"]+|\\")*' }, }, # used by log extraction "alias": { "remote_address": "remote_addr", "ip": "remote_addr", "user": "remote_user", "file": "request_file", | > > | 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | "%^FB": { "id": "ttfb", "rx": "-|\d+" }, # Apache 2.5, flat key:value structure presumably "%^Ä´S": { "id": "json", "rx": '\{(?:[\w:,\s\[\]]+|"(?:[^\\\\"]+|\\\\.)*")\}' }, # common compound placeholders "%{Referer}i": { "id": "referer", "rx": "[^\"]*" }, "%{User-Agent}i": { "id": "user_agent", "rx": r'(?:[^"]+|\\")*' }, }, "#doc": "https://httpd.apache.org/docs/2.4/mod/mod_log_config.html#formats", "#src": "https://github.com/apache/httpd/blob/trunk/modules/loggers/mod_log_config.c", # used by log extraction "alias": { "remote_address": "remote_addr", "ip": "remote_addr", "user": "remote_user", "file": "request_file", |
︙ | ︙ | |||
192 193 194 195 196 197 198 199 200 201 202 203 204 205 | # date/time strings strftime = { "class": "strftime", "placeholder": "%\w", "rewrite": { "%[EO_^0#\-]+(\w)": "%$1" # %E, %O alternative formats, glibc prefix extensions }, "fields": { "%a": { "id": "tm_wday", "rx": "\w+" }, "%A": { "id": "tm_wday", "rx": "\w+" }, "%b": { "id": "tm_mon", "rx": "\w+" }, "%B": { "id": "tm_mon", "rx": "\w+" }, "%c": { "id": "tm_dt", "rx": "[-:/.\w\d]+" }, "%C": { "id": "tm_cent", "rx": "\d\d" }, | > | 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | # date/time strings strftime = { "class": "strftime", "placeholder": "%\w", "rewrite": { "%[EO_^0#\-]+(\w)": "%$1" # %E, %O alternative formats, glibc prefix extensions }, "#doc": "https://www.man7.org/linux/man-pages/man3/strftime.3.html", "fields": { "%a": { "id": "tm_wday", "rx": "\w+" }, "%A": { "id": "tm_wday", "rx": "\w+" }, "%b": { "id": "tm_mon", "rx": "\w+" }, "%B": { "id": "tm_mon", "rx": "\w+" }, "%c": { "id": "tm_dt", "rx": "[-:/.\w\d]+" }, "%C": { "id": "tm_cent", "rx": "\d\d" }, |
︙ | ︙ | |||
240 241 242 243 244 245 246 247 248 249 250 251 252 253 | "%+": { "id": "tm_date", "rx": "[-/:. \w\d]+" }, "%%": { "id": "percent", "rx": "%" }, }, "expand": { "%(\w)": "[\w\d.]+" } } # return builtin definitions or from /usr/share/logfmt/*.*.fmt @staticmethod def get(cls): rules = {} cls = cls.split(" ") while cls: | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 | "%+": { "id": "tm_date", "rx": "[-/:. \w\d]+" }, "%%": { "id": "percent", "rx": "%" }, }, "expand": { "%(\w)": "[\w\d.]+" } } nginx = { "class": "nginx", "separator": " ", "placeholder": "[$](\w+)", "rewrite": { "(?<!\\\\)([\[\]\|\(\)])": r"\\$1", # escape any regex meta chars in format string }, "#doc": "http://nginx.org/en/docs/http/ngx_http_core_module.html#var_args", "fields": { "$request": "(?<request_method>\w+) (?<request_path>\S+) (?<request_protocol>[\w/\d.]+)", "$remote_addr": "[\da-f.:]+", "$remote_user": "[\w\-@.:]+", "$time_local": "[\d/\w:.+\-]+", "$status": "\d+", "$request_length": "\d+", "$request_time": "[\d.]+", "$msec": "[\d.]+", "$scheme": "\w+", "$args": "\S*", "$is_args": "\??", "$body_bytes_sent": "\d+", "$http_referer": "\S*", "$http_user_agent": "\S*", "$pipe": "[p.]", "$ssl_protocol": "[\w.]*", "$ssl_cipher": "[\w\-.]*", }, "expand": { "[$](\w+)": { "id": "$1", "rx": "\S*", "grok": "QS" } }, } inilog = { "#note": "this is an alias for the »logfmt« key=value serialization (as rediscovered by Go/Heroku peeps)", "class": "inilog", "record": "*", "fields": { "*": { "id": "*", "rx": ".+" } }, "container": { "*": { "rx": "(\w+)=(?:(\S+)|\"(.*?)\")", "id": "$1", "value": "$2$3", "class": "inilog" } }, } # return builtin definitions or from /usr/share/logfmt/*.*.fmt @staticmethod def get(cls): rules = {} cls = cls.split(" ") while cls: |
︙ | ︙ | |||
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 | #elif *.lnav: get readymade "regex:" else: add = rulesdb.__dict__.get(lookup, {}) or rulesdb.__dict__.get(lookup_, {}) rulesdb.merge(rules, add) cls.pop() return rules @staticmethod def merge(rules, add): for k,v in add.items(): if isinstance(v, dict): if not k in rules: rules[k] = {} rulesdb.merge(rules[k], v) elif not k in rules: rules[k] = v return rules | > | | | | | | | 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 | #elif *.lnav: get readymade "regex:" else: add = rulesdb.__dict__.get(lookup, {}) or rulesdb.__dict__.get(lookup_, {}) rulesdb.merge(rules, add) cls.pop() return rules # extend set of rules (recursive dictionary merging, without overwriting previous values) @staticmethod def merge(rules, add): for k,v in add.items(): if isinstance(v, dict): if not k in rules: rules[k] = {} rulesdb.merge(rules[k], v) elif not k in rules: rules[k] = v return rules # development: create share/*.fmt dumps from builtin definitions def extract_all(self): for key,val in rulesdb.__dict__.items(): if isinstance(val, dict): open(f"share/{key}.fmt", "w").write(json.dumps(val, indent=4)) #rulesdb().extract_all() # should be the other way round: regex() is meant to be a subset of update() def update(fmt): fmt["regex"] = regex(fmt, update=True) # assemble regex for format string |
︙ | ︙ | |||
387 388 389 390 391 392 393 | # (?<name>) to (?P<name>) def rx2re(rx): return re.sub("\(\?<(?=\w+>)", "(?P<", rx) # allow for $1, $2, $3 in re.sub() def rx_sub(pattern, replacement, source, flags=0): if replacement.find('$') >= 0: | | | 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 | # (?<name>) to (?P<name>) def rx2re(rx): return re.sub("\(\?<(?=\w+>)", "(?P<", rx) # allow for $1, $2, $3 in re.sub() def rx_sub(pattern, replacement, source, flags=0): if replacement.find('$') >= 0: replacement = re.sub(r'[\\\\](?=[0-9])', '$', replacement) return re.sub(pattern, replacement, source, flags) # file-style wrapper that yields parsed dictionaries instead of string lines class parsy_parse: def __init__(self, logfn="", fmt=None, debug=False, fail=False, duplicate=True): |
︙ | ︙ | |||
467 468 469 470 471 472 473 474 475 476 477 478 479 480 | if not id in d: d[id] = val elif not isinstance(d[id], list): d[id] = [d[id], val] else: d[id].append(val) # ANSI output for debugging regex/fmt string def debug_rx(self, line): rx = self.rx.pattern line = line.rstrip() #rx_cut = re.compile("[^)]* \(\?P<\w+> ( [^()]+ | \([^()]+\) )+ \) [^()]* \Z", re.X) # iteratively strip (?...) capture groups while len(rx) and rx.find("(?P<") >= 0: | > > > > | 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 | if not id in d: d[id] = val elif not isinstance(d[id], list): d[id] = [d[id], val] else: d[id].append(val) # get column names (from regex, in order of appearance) def names(self): return re.findall("\(\?P?<(\w+)>", self.rx.pattern) # ANSI output for debugging regex/fmt string def debug_rx(self, line): rx = self.rx.pattern line = line.rstrip() #rx_cut = re.compile("[^)]* \(\?P<\w+> ( [^()]+ | \([^()]+\) )+ \) [^()]* \Z", re.X) # iteratively strip (?...) capture groups while len(rx) and rx.find("(?P<") >= 0: |
︙ | ︙ | |||
501 502 503 504 505 506 507 | matched = "" print("\033[36m" + "failed regex section: \033[1;33;41m" + fail + "\033[40;0m") print("\033[42m" + matched + "\033[41m" + line[len(matched):] + "\033[40;0m") # alias logopen = parsy_parse | < < < < < < < < < < < < | 557 558 559 560 561 562 563 | matched = "" print("\033[36m" + "failed regex section: \033[1;33;41m" + fail + "\033[40;0m") print("\033[42m" + matched + "\033[41m" + line[len(matched):] + "\033[40;0m") # alias logopen = parsy_parse |
Added logfmt1/share/apache.clf.fmt.
> > > > > > > | 1 2 3 4 5 6 7 | { "class": "apache clf", "record": "%h %l %u %t \"%r\" %>s %b", "glob": [ "*.access.log" ] } |
Added logfmt1/share/apache.error.fmt.
> > > > > > > > | 1 2 3 4 5 6 7 8 | { "class": "apache combined", "record": "[%t] [%l] [pid %P] %F: %E: [client %a] %M", "alt": "[%{u}t] [%-m:%l] [pid %P:tid %T] %7F: %E: [client %a] %M", "glob": [ "*.access.log" ] } |
Changes to logfmt1/share/apache.fmt.
1 2 3 4 5 | { "class": "apache generic", "separator": " ", "rewrite": { "%[\\d!,+\\-]+": "%", | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 | { "class": "apache generic", "separator": " ", "rewrite": { "%[\\d!,+\\-]+": "%", "(?<!\\\\)([\\[\\]\\|\\(\\)])": "\\\\$1", "%%": "%" }, "placeholder": "%[<>]?(?:\\w*\\{[^\\}]+\\})?\\^?\\w+", "fields": { "%a": { "id": "remote_addr", "rx": "[\\d.:a-f]+" |
︙ | ︙ | |||
38 39 40 41 42 43 44 | }, "%t": { "id": "request_time", "rx": "\\[?(\\d[\\d:\\w\\s:./\\-+,;]+)\\]?" }, "%{u}t": { "id": "request_time", | | | | 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | }, "%t": { "id": "request_time", "rx": "\\[?(\\d[\\d:\\w\\s:./\\-+,;]+)\\]?" }, "%{u}t": { "id": "request_time", "rx": "u|\\d+/\\w+/\\d+:\\d+:\\d+:\\d+\\.\\d+\\s\\+\\d+" }, "%{cu}t": { "id": "request_time", "rx": "ut|\\d+-\\w+-\\d+\\s\\d+:\\d+:\\d+\\.\\d+" }, "%{msec_frac}t": { "id": "msec_frac", "rx": "[\\d.]+" }, "%{usec_frac}t": { "id": "usec_frac", |
︙ | ︙ | |||
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 | "rx": "[^\"]*" }, "%{User-Agent}i": { "id": "user_agent", "rx": "(?:[^\"]+|\\\\\")*" } }, "alias": { "remote_address": "remote_addr", "ip": "remote_addr", "user": "remote_user", "file": "request_file", "size": "bytes_sent", "datetime": "request_time", "ctime": "request_time", "date": "request_time", "loglevel": "remote_logname", "module_name": "request_method", "request_flushed": "file_line", "requests_on_connection": "keepalives", "error": "apr_status" }, "expand": { "%\\{([^{}]+)\\}t": { "id": "request_time", | > > | > | | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 | "rx": "[^\"]*" }, "%{User-Agent}i": { "id": "user_agent", "rx": "(?:[^\"]+|\\\\\")*" } }, "#doc": "https://httpd.apache.org/docs/2.4/mod/mod_log_config.html#formats", "#src": "https://github.com/apache/httpd/blob/trunk/modules/loggers/mod_log_config.c", "alias": { "remote_address": "remote_addr", "ip": "remote_addr", "user": "remote_user", "file": "request_file", "size": "bytes_sent", "datetime": "request_time", "ctime": "request_time", "date": "request_time", "loglevel": "remote_logname", "module_name": "request_method", "request_flushed": "file_line", "requests_on_connection": "keepalives", "error": "apr_status" }, "expand": { "%\\{([^{}]+)\\}t": { "id": "request_time", "class": "strftime", "record": "$1" }, "%[<>]?\\{([\\w\\-]+)\\}[Conexic]": { "id": "$1", "rx": "\\S+" }, "%\\{([\\w\\-]+)\\}\\^t[io]": { "id": "$1", "rx": "\\S+" } }, "container": { "message": { "id": "$1", "value": "$2", "rx": "\\[(\\w+) \"(.*?)\"\\]", "class": "apache mod_security" } }, "glob": [ "*access.log", "/var/log/apache*/*acc*.log" ] } |
Changes to logfmt1/share/grok.fmt.
1 | { | | | | 1 2 3 4 5 6 7 8 9 10 | { "#license": "Apache-2.0", "#origin": "https://github.com/elastic/logstash/blob/v1.4.2/patterns/", "class": "grok", "separator": " ", "placeholder": "%\\{\\w+:([\\w.-]+)\\}", "rewrite": {}, "alias": {}, "fields": { "%{SYSLOGBASE}": { |
︙ | ︙ |
Added logfmt1/share/inilog.fmt.
> > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | { "#note": "this is an alias for the \u00bblogfmt\u00ab key=value serialization (as rediscovered by Go/Heroku peeps)", "class": "inilog", "record": "*", "fields": { "*": { "id": "*", "rx": ".+" } }, "container": { "*": { "rx": "(\\w+)=(?:(\\S+)|\"(.*?)\")", "id": "$1", "value": "$2$3", "class": "inilog" } } } |
Added logfmt1/share/nginx.fmt.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | { "class": "nginx", "separator": " ", "placeholder": "[$](\\w+)", "rewrite": { "(?<!\\\\)([\\[\\]\\|\\(\\)])": "\\\\$1" }, "#doc": "http://nginx.org/en/docs/http/ngx_http_core_module.html#var_args", "fields": { "$request": "(?<request_method>\\w+) (?<request_path>\\S+) (?<request_protocol>[\\w/\\d.]+)", "$remote_addr": "[\\da-f.:]+", "$remote_user": "[\\w\\-@.:]+", "$time_local": "[\\d/\\w:.+\\-]+", "$status": "\\d+", "$request_length": "\\d+", "$request_time": "[\\d.]+", "$msec": "[\\d.]+", "$scheme": "\\w+", "$args": "\\S*", "$is_args": "\\??", "$body_bytes_sent": "\\d+", "$http_referer": "\\S*", "$http_user_agent": "\\S*", "$pipe": "[p.]", "$ssl_protocol": "[\\w.]*", "$ssl_cipher": "[\\w\\-.]*" }, "expand": { "[$](\\w+)": { "id": "$1", "rx": "\\S*", "grok": "QS" } } } |
Changes to logfmt1/share/strftime.fmt.
1 2 3 4 5 6 7 8 9 10 11 12 13 | { "class": "strftime", "placeholder": "%\\w", "rewrite": { "%[EO_^0#\\-]+(\\w)": "%$1" }, "fields": { "%a": { "id": "tm_wday", "rx": "\\w+" }, "%A": { "id": "tm_wday", | > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 | { "class": "strftime", "placeholder": "%\\w", "rewrite": { "%[EO_^0#\\-]+(\\w)": "%$1" }, "#doc": "https://www.man7.org/linux/man-pages/man3/strftime.3.html", "fields": { "%a": { "id": "tm_wday", "rx": "\\w+" }, "%A": { "id": "tm_wday", |
︙ | ︙ |
Added logfmt1/share/update/nginx.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | #!/usr/bin/env python3 # description: extract nginx log_* options to create .log.fmt files # # nginx -T is even simpler than apache2ctl -t -D DUMP_INCLUDES # (for this use case) # import os, re, sys, random import subprocess import traceback import json from pprint import pprint import logfmt1 # extraction patterns class rx: # # log_format custom '$remote_addr - $remote_user [$time_local] "$request" ' # '$status $body_bytes_sent "$http_referer" ' # '"$http_user_agent" "$http_x_forwarded_for"'; # format = re.compile( """ ^\s* (log_format) \s+ (\w+) \s+ ( (?: '.+?' \s* )+ ) \s*\; """, re.M|re.X) #" # # error_log /var/log/nginx/domain.error.log warn; # access_log /var/log/nginx/access.log custom; # access_log /var/log/nginx/access.log custom [if=$cond]+; # log = re.compile( """ ^\s* (access_log|error_log) \s+ (/\S+) (?: \s (\w+) )? .*\; """, re.M|re.X) #" # temporary state variables class tmp: log_formats = { "combined": '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"' } log_map = { #"../fn.log": "combined" } # encapsulate properties of config file (either vhosts, SecCfg*, or secrule collections) class vhost: # split *.conf directives, dispatch onto assignment/extract methods def __init__(self, fn, src, cfg_only=False): for dir,name,form in rx.lf.findall(src): self.logformat(name, form) for dir,path,name in rx.log.findall(src): self.log(dir, name, form) def logformat(self, name, form): form = re.sub("'\s+'", "", form).strip("'") tmp.log_formats[name] = form.replace('\\"', '"') def log(self, dir, path, name): if re.match("^off$|^syslog:|^memory:|^\|", path): return if not name: name = dir.replace("_log", "") tmp.log_map[path] = name # iterate over all Apache config files, visit relevant ones (vhosts/mod_security configs) def scan_all(): src = ng_dump_config() vhost("*.conf", src) # nginx -T gets combines src of all configs def ng_dump_config(): cmd = ["nginx", "-T"] stdout = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout return stdout.read().decode("utf-8") # traverse log files, create .fmt descriptor with current format string def mk_fmt(): for fn,ty in tmp.log_map.items(): fn_fmt = f"{fn}.fmt" fmt_record = tmp.log_formats.get(ty) if not fmt_record: continue j = {} if os.path.exists(fn_fmt): try: j = json.loads(open(fn_fmt, "r", encoding="utf-8").read()) except Exception as e: j = {} print(f"WARN: {fn_fmt} contained invalid json: {str(e)}") if not "class" in j: j["class"] = f"nginx {ty}" if not "record" in j or j["record"] != fmt_record: j["record"] = fmt_record # add descriptors for known placeholders if not "fields" in j or True: j["regex"] = logfmt1.regex(j) print(f"→ {fn_fmt}") try: f = open(fn_fmt, "w") f.write(json.dumps(j, indent=4)) f.close() except Exception as e: print("ERR: " + str(e)) scan_all() mk_fmt() |
Changes to logfmt1/update_logfmt.py.
1 2 3 4 5 6 7 8 | #!/usr/bin/env python3 # encoding: utf-8 # title: update-logfmt # description: invoke ./share/update/* scripts # type: virtual # # Stub that reimplements run-parts | | > | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | #!/usr/bin/env python3 # encoding: utf-8 # title: update-logfmt # description: invoke ./share/update/* scripts # type: virtual # # Stub that reimplements run-parts import os, re, sys def main(): pass for dir in ["/usr/share/logfmt/update", re.sub("[.\w]+$", "share/update", __file__)]: if os.path.exists(dir): argv = " ".join(sys.argv[1:]) os.system(f"run-parts {argv} {dir}") break |
Added test/logfmt.py.
> > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | # t:t # d: logmft1.regex() conversion import pytest, util import re, json import logfmt1 def _logfmt(i): def run(): inp, out = f"test/logfmt_{i}.inp", f"test/logfmt_{i}.out" fmt = json.loads(util.inp_read(inp)) fmt = logfmt1.regex(fmt) util.out_test(fmt, out) #re.compile(logfmt1.rx2re(regex)) return run logfmt_1 = _logfmt("apache") logfmt_2 = _logfmt("apache_ext") |
Added test/logfmt_apache.inp.
> > > > | 1 2 3 4 | { "class": "apache combined", "record": "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" } |
Added test/logfmt_apache.out.
> | 1 | "(?<remote_host>[\\w\\-.:]+) (?<remote_logname>[\\w\\-.:]+) (?<remote_user>[\\-\\w@.]+) \\[?(?<request_time>\\d[\\d:\\w\\s:./\\-+,;]+)\\]? \"(?<request_line>(?<request_method>\\w+) (?<request_path>\\S+) (?<request_protocol>[\\w/\\d.]+))\" (?<status>-|\\d\\d\\d) (?<bytes_sent>\\d+|-) \"(?<referer>[^\"]*)\" \"(?<1>(?:[^\"]*|\\\\\")+)\"" |
Added test/logfmt_apache_ext.inp.
> > > > | 1 2 3 4 | { "class" : "apache ext", "record" : "%h %{GEOIP_COUNTRY_CODE}e %u [%{%Y-%m-%d %H:%M:%S}t.%{usec_frac}t] \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%{Content-Type}i\" %{remote}p %v %A %p %R %{BALANCER_WORKER_ROUTE}e %X \"%{cookie}n\" %{UNIQUE_ID}e %{SSL_PROTOCOL}x %{SSL_CIPHER}x %I %O %{ratio}n%% %D %{ModSecTimeIn}e %{ApplicationTime}e %{ModSecTimeOut}e %{ModSecAnomalyScoreInPLs}e %{ModSecAnomalyScoreOutPLs}e %{ModSecAnomalyScoreIn}e %{ModSecAnomalyScoreOut}e" } |
Added test/logfmt_apache_ext.out.
> | 1 | "(?<remote_host>[\\w\\-.:]+) (?<1>\\S+) (?<remote_user>[\\-\\w@.]+) \\[(?<request_time>$1).(?<usec_frac>[\\d.]+)\\] \"(?<request_line>(?<request_method>\\w+) (?<request_path>\\S+) (?<request_protocol>[\\w/\\d.]+))\" (?<status>-|\\d\\d\\d) (?<bytes_sent>\\d+|-) \"(?<referer>[^\"]*)\" \"(?<user_agent>(?:[^\"]+|\\\\\")*)\" \"(?<12>(?:[^\"]*|\\\\\")+)\" (?<remote_port>\\d+) (?<virtual_host>[\\w\\-\\.]+) (?<local_address>[\\d.:a-f]+) (?<server_port>\\d+) (?<handler>[\\w:.\\-]+) (?<13>\\S+) (?<connection_status>[Xx+\\-.\\d]+) \"(?<14>(?:[^\"]*|\\\\\")+)\" (?<15>\\S+) (?<16>\\S+) (?<17>\\S+) (?<bytes_in>\\d+) (?<bytes_out>\\d+) (?<18>\\S+)% (?<request_duration_microseconds>\\d+) (?<19>\\S+) (?<110>\\S+) (?<111>\\S+) (?<112>\\S+) (?<113>\\S+) (?<114>\\S+) (?<115>\\S+)" |