cookiedough: Artifact [09138f0d0e]

Artifact 09138f0d0e1b07d67abeb61746096319e014f672cc225aca347ba0a9b3604a17:

Executable file dev/gh_conv.py — part of check-in [ae7f877aba] at 2021-04-06 09:04:06 on branch trunk — Updated API query tools, use dict for collection now, don't skip over publication in december anymore, less sleep(), less json overwriting. Still takes 3 hours. (user: mario size: 5713)
#!/usr/bin/env python3
# description: transform repos to UI json
#
# Compact github.json to UI data set


import re, requests, json, time, pprint
from traceback import format_exc

def write(results):
    with open("uidata.json", "w", encoding="utf-8") as f:
        f.write(json.dumps(results, indent=4, ensure_ascii=False))

def read():
    with open("github.json", "r", encoding="utf-8") as f:
        return json.load(f)
        
def tree2dir(tree, filter):
    """
        convert dir/file/struct into ├── └── lists
        @todo: retain path names in ordered dict even? 
    """
    tree = [p["path"] for p in tree]
    tree = [re.sub("\{\{\s*cookiecutter\.(\w+)\s*\}\}", "{{$\\1}}", p) for p in tree]
    ls = []
    pfx = ["    ", "    ", "    ", "    ", "    ", "    "]
    for p in tree.reverse() or tree:
        if filter:
            if p.find("/") < 0:
                pass  # if dir filter, only skip other subdirs/, but not root files
            elif p.find(filter.strip("/")) < 0:
                continue
        p = p.split("/")
        ind = len(p)
        p = p[-1]
        # same level entry
        if ind == len(pfx):
            pfx[-1] = "├── "
            ls.append("".join(pfx) + p)
        # traversing upwards
        elif ind < len(pfx):
            pfx = pfx[0:ind]
            if pfx[-1] == "    ":  # first ever uplevel traversal in list
                pfx[-1] = "└── "
            else:
                pfx[-1] = "├── "
            ls.append("".join(pfx) + p)
            pfx[-1] = "├── "
        # one level deeper into tree
        elif ind > len(pfx):
            pfx[-1] = "│   "
            pfx.append("└── ")
            ls.append("".join(pfx) + p)
            pfx[-1] = "├── "
    r = "\n".join(ls.reverse() or ls)
    #print(r)
    return r

# extract from tables like """  | `project_slug` | Description of flag... | """
def readme2cfgdesc(readme):
    return dict(re.findall("^\s*\|\s* `?(\w+)`? \s*\|\s* (\w.+) \s*\|$", readme, re.M|re.X))

def ccjson2cfg(kv, desc):
    """
        transform key:value dict into pluginconf options structure,
        so we can differentiate types and add descriptions
    """
    # invalid ex: souravsingh/cookiecutter-bear
    _special = ["_extensions", "_copy_without_render"]
    c = []
    for k,v in kv.items():
        _class, _type = "cookiecutter", "str"
        # class
        if k.startswith("__"):
            _class = "private"
        elif k.startswith("_"):
            _class = "control"
        # types
        if isinstance(v, list):
            c.append({
                "name": k,
                "type": "select",
                "select": v,
                "value": v[0] if len(v) else "",
                "class": _class,
                "description": desc.get(k, ""),
            })
            continue
        elif isinstance(v, dict):
            _type = "dict"
        elif v == None:
            _type = "str"
            v = ""
        elif isinstance(v, int):
            _type = "int"
        c.append({
            "name": k,
            "type": _type,
            "value": v,
            "class": _class,
            "description": desc.get(k, ""),
        })
    return c

def lang2api(lang, name, ccjson_text):
    """ extract lang or api name from vnd/pkg name """
    if not lang:
        lang = "other"
    api = re.sub("\s+", "-", lang.lower())
    if m := re.search("(django|flask|node|wordpress|mediawiki)", api):
        api = m[1]
    if m := re.search('"_api":\s*"([\w\-]+)"', ccjson_text):
        api = m[1]
    return api

def short(name):
    #name = name.lower()
    return re.sub("[_.-]?cookiecutters?[_.-]?", "", name) or name



# 
results = {}
for full_name,d in read().items():
    if not "is_template" in d or not d["is_template"]:
        continue
    print(d["full_name"])

    try:
        d["_disabled"] = False
        config = []
        ccjson = {}
        for dir, cc in d["cc"].items():

            dir_sfx = f":{dir}" if dir else ""
            dir_url = f"?d={dir}" if dir else ""

            # convert config list
            ccjson = json.loads(cc["json"])
            config = ccjson2cfg(
                kv = ccjson,
                desc = readme2cfgdesc(d.get("readme", ""))
            )
            keywords = ccjson.get("_keywords", "") or ccjson.get("_features", "") or ccjson.get("_tags", "")

            # add
            results[d["full_name"] + dir_url] = {
                "name": d["full_name"] + dir_url,
                "short": short(d["name"]) + dir_sfx,
                "description": d["description"],
                "url": d["html_url"],
                "repo": d["clone_url"] + dir_url,
                "homepage": d["homepage"],
                "created_at": d["created_at"],
                "updated_at": d["updated_at"],
                "size": d["size"],
                "stars": d["stargazers_count"] + d["watchers_count"],
                "api": lang2api(d["language"], d["name"], cc["json"]),
                "has_wiki": d["has_wiki"],
                "forks": d["forks_count"],
                "license": d["license"]["spdx_id"] if isinstance(d["license"], dict) else "./.",
                "tickets": d["open_issues"],
                "default_branch": d["default_branch"],
                "_disabled": d["disabled"] or d["archived"],
                "keywords": keywords,
                "dir": tree2dir(d["tree"], dir),
                "readme": d.get("readme", ""),
                "cookiecutterjson_url": cc["url"],
                "config": config,
            }

    except Exception as e:
        print(format_exc())
        d["_disabled"] = True

write(results)