cookiedough: Artifact [6d5862bea0]

Artifact 6d5862bea01687c21a618e8de66d27081c43ab7df7004053f6d066197c71b029:

Executable file dev/gh_tree.py — part of check-in [ae7f877aba] at 2021-04-06 09:04:06 on branch trunk — Updated API query tools, use dict for collection now, don't skip over publication in december anymore, less sleep(), less json overwriting. Still takes 3 hours. (user: mario size: 3928)
#!/usr/bin/env python3
# description: fetch file list and readme for repos
#
# Augment existing github.json
#
# · query file contents via
#   https://api.github.com/repos/VND/PKG/git/trees/master?recursive=true
#
# · add raw file link
#   https://raw.githubusercontent.com/VND/PKG/master/cookiecutter.json
#
#       TODO:
#       - also find \w+/\w+/cookiecutter.json for directoried templates
#       - maybe add a map {
#              "/" => main cookiecutterjson
#              "/subbir/" => other one
#
# 
# · find README
#   https://raw.githubusercontent.com/VND/PKG/master/README.md


import re, requests, json, time, pprint, os, dotenv
from random import randint
dotenv.load_dotenv()


# current file list from default_branch
def tree(vndpkg="vnd/pkg", branch="master"):
    headers = {
        "User-Agent": "cookiedough/0.3.0 (Python; amd64; requests)",
        "Accept": "application/vnd.github.preview",
        # https://docs.github.com/en/developers/apps/authorizing-oauth-apps#non-web-application-flow
        # https://docs.github.com/en/rest/overview/other-authentication-methods#basic-authentication
        # https://github.com/settings/tokens
        "Authorization": "token " + os.getenv("GITHUB_API_TOKEN")
    }
    #for sha in branch, "master", "main", "stable":
    url = f"https://api.github.com/repos/{vndpkg}/git/trees/{branch}?recursive=true"
    print(url)
    r = requests.get(url, headers=headers)
    if r.status_code == 200:
        pass
    else:
        if r.status_code in (403,429,500,):
            raise Exception(pprint.pformat(vars(r)))
        else:
            print("SKIPPING", r.status_code, r.reason)
            return []
    print(r.headers["X-RateLimit-Remaining"])
    if r.headers and "X-RateLimit-Remaining" in r.headers and int(r.headers["X-RateLimit-Remaining"]) < 10:
        print(r.headers)
        print("- EXTRA SLEEP")
        time.sleep(90 / int(r.headers["X-RateLimit-Remaining"]))
    try:
        print(r)
        return r.json()["tree"]
    except:
        return []

# search tree list
def find_cookiecutter(tree):
    for p in tree:
        if m := re.match("^(([\w.\-]+/)*)cookiecutter\.json$", p["path"]):
            yield m[1]
def has_cookiecutter(tree):
    for dir in find_cookiecutter(tree):
        return True

# find cookiecutter.json, add raw retrieval url, and contents
def cc_json(d, vndpkg, tree):
    d["cc"] = cc = {}
    for dir in find_cookiecutter(d["tree"]):
        url = f"https://raw.githubusercontent.com/{vndpkg}/{d['default_branch']}/{dir}cookiecutter.json"
        cc[dir] = {
            "dir": dir or "/",
            "url": url,
            "json": requests.get(url).text,
        }

# check for any README, and add raw text
def readme(d, vndpkg, tree):
    for p in tree:
        if re.match("^README(\.(md|txt|rst|wiki))?$", p["path"]):
            d["readme"] = requests.get(f"https://raw.githubusercontent.com/{vndpkg}/{d['default_branch']}/{p['path']}").text

def write(results):
    with open("github.json", "w", encoding="utf-8") as f:
        f.write(json.dumps(results, indent=4))

def read():
    try:
        with open("github.json", "r", encoding="utf-8") as f:
            return json.loads(f.read())
    except:
        return {}

# loop over existing repos, add file lists + readme + cc.json + status flag
results = read()
for name,d in results.items():
    vndpkg = d["full_name"]
    if "is_template" in d:
        continue
    if "tree" in d:
        continue
    if re.search("hadenlabs/|moorinl/", vndpkg):
        continue
    d["tree"] = tree(vndpkg, branch=d["default_branch"])
    if not len(d["tree"]):
        print(f"- no tree for {vndpkg}")
    elif has_cookiecutter(d["tree"]):
        d["is_template"] = True
        cc_json(d, vndpkg, d["tree"])
        readme(d, vndpkg, d["tree"])
    else:
        d["is_template"] = False
    if not randint(0,20):
        write(results)
    time.sleep(0.25)