#!/usr/bin/env python3
# description: fetch file list and readme for repos
#
# Augment existing github.json
#
# ยท query file contents via
# https://api.github.com/repos/VND/PKG/git/trees/master?recursive=true
#
# ยท add raw file link
# https://raw.githubusercontent.com/VND/PKG/master/cookiecutter.json
#
# TODO:
# - also find \w+/\w+/cookiecutter.json for directoried templates
# - maybe add a map {
# "/" => main cookiecutterjson
# "/subbir/" => other one
#
#
# ยท find README
# https://raw.githubusercontent.com/VND/PKG/master/README.md
import re, requests, json, time, pprint, os, dotenv
from random import randint
dotenv.load_dotenv()
# current file list from default_branch
def tree(vndpkg="vnd/pkg", branch="master"):
headers = {
"User-Agent": "cookiedough/0.3.0 (Python; amd64; requests)",
"Accept": "application/vnd.github.preview",
# https://docs.github.com/en/developers/apps/authorizing-oauth-apps#non-web-application-flow
# https://docs.github.com/en/rest/overview/other-authentication-methods#basic-authentication
# https://github.com/settings/tokens
"Authorization": "token " + os.getenv("GITHUB_API_TOKEN")
}
#for sha in branch, "master", "main", "stable":
url = f"https://api.github.com/repos/{vndpkg}/git/trees/{branch}?recursive=true"
print(url)
r = requests.get(url, headers=headers)
if r.status_code == 200:
pass
else:
if r.status_code in (403,429,500,):
raise Exception(pprint.pformat(vars(r)))
else:
print("SKIPPING", r.status_code, r.reason)
return []
print(r.headers["X-RateLimit-Remaining"])
if r.headers and "X-RateLimit-Remaining" in r.headers and int(r.headers["X-RateLimit-Remaining"]) < 10:
print(r.headers)
print("- EXTRA SLEEP")
time.sleep(90 / int(r.headers["X-RateLimit-Remaining"]))
try:
print(r)
return r.json()["tree"]
except:
return []
# search tree list
def find_cookiecutter(tree):
for p in tree:
if m := re.match("^(([\w.\-]+/)*)cookiecutter\.json$", p["path"]):
yield m[1]
def has_cookiecutter(tree):
for dir in find_cookiecutter(tree):
return True
# find cookiecutter.json, add raw retrieval url, and contents
def cc_json(d, vndpkg, tree):
d["cc"] = cc = {}
for dir in find_cookiecutter(d["tree"]):
url = f"https://raw.githubusercontent.com/{vndpkg}/{d['default_branch']}/{dir}cookiecutter.json"
cc[dir] = {
"dir": dir or "/",
"url": url,
"json": requests.get(url).text,
}
# check for any README, and add raw text
def readme(d, vndpkg, tree):
for p in tree:
if re.match("^README(\.(md|txt|rst|wiki))?$", p["path"]):
d["readme"] = requests.get(f"https://raw.githubusercontent.com/{vndpkg}/{d['default_branch']}/{p['path']}").text
def write(results):
with open("github.json", "w", encoding="utf-8") as f:
f.write(json.dumps(results, indent=4))
def read():
try:
with open("github.json", "r", encoding="utf-8") as f:
return json.loads(f.read())
except:
return {}
# loop over existing repos, add file lists + readme + cc.json + status flag
results = read()
for name,d in results.items():
vndpkg = d["full_name"]
if "is_template" in d:
continue
if "tree" in d:
continue
if re.search("hadenlabs/|moorinl/", vndpkg):
continue
d["tree"] = tree(vndpkg, branch=d["default_branch"])
if not len(d["tree"]):
print(f"- no tree for {vndpkg}")
elif has_cookiecutter(d["tree"]):
d["is_template"] = True
cc_json(d, vndpkg, d["tree"])
readme(d, vndpkg, d["tree"])
else:
d["is_template"] = False
if not randint(0,20):
write(results)
time.sleep(0.25)