GUI editor to tame mod_security rules

⌈⌋ ⎇ branch:  modseccfg


Artifact [f6a343ab31]

Artifact f6a343ab31918c92b2e2f2f146e32d1600f6e36e47585855a4f85f4e32584448:

  • Executable file logfmt1/logex.py — part of check-in [81e5866c7a] at 2021-02-02 12:32:12 on branch trunk — Use dateutil.parser fuzzy=True (user: mario size: 4145)

#!/usr/bin/env python3
# encoding: utf-8
# title: logex
# description: extract fields from log (with .fmt)
# version: 0.3
# type: cli
# category: extract
#
#
# First parameter should be the log file. And a .log.fmt must exist
# alongside (generate with `update-logfmt`).
#
# Syntax:
#   logex.py /var/log/apache2/access.log  request_path  request_time  @host
#
# Other args:
#   --json / --tab / --csv
#   --iso8601 / --debug / --rx
#
# Where any @'s are decoration, and fields can be supplied as individual
# arguments (become space-separated without --tab/--csv). Field names are
# application-type specific (internal) names. (E.g. @request_method, @host
# or @tm_wday for Apache logs. With some predefined aliases, e.g. the w3c
# extended log field names.)
#
# Field name prefixes are irrelevant for normal log entries.
# But may join list-entries from container fields:
#    @name   will just show the first entry
#    %name   space-separated list
#    *name   comma-separated list
#    +name   plus-joined list
#    #name   as json array
#     name   whatever
#
# Fields can be given as individual arguments, or as part of a string
# output groups:
#    logex fn.log --tab  @individual "@combined,@with,@comma" @tabagain
#    logex fn.log --csv  "@lone" "*multi" "#json"
# Though you usually don't wanna overcomplicate the log format again.
#


import sys, errno, re, json
import traceback, dateutil.parser
import logfmt1


def main():
    pass
    
#-- args
argv = sys.argv
space = " "
if "--tab" in argv:
    space = "\t"
if "--csv" in argv:
    space = "," 
iso8601 = any(a in argv for a in ("--iso", "--iso8601", "--date", "--fixdates", "--die-apachedateformat-die"))
as_json = any(a in argv for a in ("--json", "--asjson", "--as-json"))
dodebug = any(a in argv for a in ("--debug", "-D"))
only_rx = any(a in argv for a in ("--regex", "--rx"))
# remove --params
argv = [a for a in argv if not re.match("^--\w+$|^-\w$", a)]
# filename and field list
try:
    log_fn = argv[1]
except:
    sys.stderr.write("logex: no filename given\n")
    sys.exit(errno.ENOENT)
output_fields = space.join(argv[2:])


#-- open log file
try:
    reader = logfmt1.logopen(log_fn, debug=dodebug, duplicate=False)
    #if dodebug:
    #    sys.stdout.write(json.dumps(reader.__dict__, indent=2, default=lambda x:str(x))+"\n")
    if only_rx:
        sys.stdout.write(reader.rx.pattern)
        exit(0)
except Exception as e:
    sys.stderr.write(traceback.format_exc()+"\n")
    sys.stderr.write("Use `update-logfmt-apache` or modseccfg→File→Install→update_logfmt to generate a .fmt descriptor\n")
    sys.exit(errno.ENODATA)


# extra aliases (for apache/httpd)
alias = {
    "time": "request_time",
    "bytes": "bytes_sent",
    "ip": "remote_addr",
    "c-ip": "remote_addr",
    "dns": "server_name",
    "status": "status",
    "method": "request_method",
    "uri": "request_path",
    "url": "request_path",
    "path": "request_path",
    "uri-stem": "request_path",
    "uri-query": "request_query",
}
alias.update(reader.alias)


# substitute occurences
def get_field(m, row):
    pfx, name = m.groups()
    val = row.get(name) or row.get(alias.get(name)) or "-"
    if isinstance(val, list):  # how to handle lists (for unpacked [key "value"] fields)
        if pfx == "@":
            val = val[0]
        elif pfx == "+":
            val = "+".join(val)
        elif pfx == "%":
            val = " ".join(val)
        elif pfx == "*":
            val = ",".join(val)
        elif pfx == "#":
            val = json.dumps(val)
        else:
            val = str(val)
    return val


# loop over lines, and output selection
for row in reader:

    if not row:
        row = {}
    if iso8601:
        for key in ["date", "request_time", "datetime"]:
            if key in row:
                # the […] wrapping should already be gone at this point
                row[key] = dateutil.parser.parse(row[key].strip("[]"), fuzzy=True).isoformat()
    if as_json:
        print(
            json.dumps(row)
        )
    else:
        print(
            re.sub("([@+*#%]?)([\w\-]+)", lambda m: get_field(m, row), output_fields)
        )