Check-in [ae7f877aba]
Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| Comment: | Updated API query tools, use dict for collection now, don't skip over publication in december anymore, less sleep(), less json overwriting. Still takes 3 hours. |
|---|---|
| Downloads: | Tarball | ZIP archive | SQL archive |
| Timelines: | family | ancestors | descendants | both | trunk |
| Files: | files | file ages | folders |
| SHA3-256: |
ae7f877abae7137df47bf43ca16ee5f6 |
| User & Date: | mario 2021-04-06 09:04:06 |
Context
|
2021-04-06
| ||
| 09:05 | Updated database, now contains 4153 entries. check-in: b565b7c8d0 user: mario tags: trunk | |
| 09:04 | Updated API query tools, use dict for collection now, don't skip over publication in december anymore, less sleep(), less json overwriting. Still takes 3 hours. check-in: ae7f877aba user: mario tags: trunk | |
|
2021-03-30
| ||
| 14:18 | minor typos check-in: ffb88d9322 user: mario tags: trunk | |
Changes
Changes to dev/gh_conv.py.
| ︙ | ︙ | |||
117 118 119 120 121 122 123 |
#name = name.lower()
return re.sub("[_.-]?cookiecutters?[_.-]?", "", name) or name
#
results = {}
| | | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
#name = name.lower()
return re.sub("[_.-]?cookiecutters?[_.-]?", "", name) or name
#
results = {}
for full_name,d in read().items():
if not "is_template" in d or not d["is_template"]:
continue
print(d["full_name"])
try:
d["_disabled"] = False
config = []
|
| ︙ | ︙ |
Changes to dev/gh_find.py.
1 2 3 | #!/usr/bin/env python3 # description: search all cookiecutter repos on GH # | | | > | | < < < < < < | | | < | < > > > > | < | | > > | > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
#!/usr/bin/env python3
# description: search all cookiecutter repos on GH
#
# Search all repos. But use sliding date window, to get all.
# So tedious. Yay for proprietary baskets!
#
# to recreate db:
# · gh_find
# · gh_tree
# · gh_conv
#
import os, re, requests, json, time, dotenv
from random import randint
dotenv.load_dotenv()
# get list of repos for search term and sliding `created` window, optionally page_no
def fetch(page=0, created="2021-03", per_page=50):
url = "https://api.github.com/search/repositories"
params = dict(
q = f"cookiecutter created:{created}",
sort = "updated",
order = "desc",
per_page = per_page,
page = page
)
headers = {
"User-Agent": "cookiedough/0.3.0 (Python; amd64; requests)",
"Accept": "application/vnd.github.preview",
"Authorization": "token " + os.getenv("GITHUB_API_TOKEN")
}
print("SEARCH_Q=%r" % params)
r = requests.get(url, params, headers=headers)
return r.json()
def write(results):
with open("github.json", "w", encoding="utf-8") as f:
f.write(json.dumps(results, indent=4))
def read():
try:
with open("github.json", "r", encoding="utf-8") as f:
return json.loads(f.read())
except:
return {}
# iterate per year+month, and optionally pages if more than 50 results
def year_month_page(ys, ms, pg, per_page=100):
for year in ys:
for month in ms:
for page in pg:
d = fetch(created=f"{year:04}-{month:02}", page=page, per_page=per_page)
if "message" in d or not "items" in d:
print("**NO_RESULTS**=%r" % d)
break
print("len_items=%s" % len(d["items"]))
time.sleep(1)
yield d
if len(d["items"]) < per_page:
break
# add more repo items
results = read()
print(len(results))
for d in year_month_page(range(2012, 2022), range(1,13), range(0,19)):
if len(d["items"]):
for repo in d["items"]:
results[repo["full_name"]] = repo
if not randint(0, 20):
write(results)
print("len_results=%s" % len(results))
write(results)
|
Changes to dev/gh_tree.py.
| ︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 |
#
#
# · find README
# https://raw.githubusercontent.com/VND/PKG/master/README.md
import re, requests, json, time, pprint, os, dotenv
dotenv.load_dotenv()
# current file list from default_branch
def tree(vndpkg="vnd/pkg", branch="master"):
headers = {
| > | | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
#
#
# · find README
# https://raw.githubusercontent.com/VND/PKG/master/README.md
import re, requests, json, time, pprint, os, dotenv
from random import randint
dotenv.load_dotenv()
# current file list from default_branch
def tree(vndpkg="vnd/pkg", branch="master"):
headers = {
"User-Agent": "cookiedough/0.3.0 (Python; amd64; requests)",
"Accept": "application/vnd.github.preview",
# https://docs.github.com/en/developers/apps/authorizing-oauth-apps#non-web-application-flow
# https://docs.github.com/en/rest/overview/other-authentication-methods#basic-authentication
# https://github.com/settings/tokens
"Authorization": "token " + os.getenv("GITHUB_API_TOKEN")
}
#for sha in branch, "master", "main", "stable":
|
| ︙ | ︙ | |||
88 89 90 91 92 93 94 |
f.write(json.dumps(results, indent=4))
def read():
try:
with open("github.json", "r", encoding="utf-8") as f:
return json.loads(f.read())
except:
| | | | | > > > | | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
f.write(json.dumps(results, indent=4))
def read():
try:
with open("github.json", "r", encoding="utf-8") as f:
return json.loads(f.read())
except:
return {}
# loop over existing repos, add file lists + readme + cc.json + status flag
results = read()
for name,d in results.items():
vndpkg = d["full_name"]
if "is_template" in d:
continue
if "tree" in d:
continue
if re.search("hadenlabs/|moorinl/", vndpkg):
continue
d["tree"] = tree(vndpkg, branch=d["default_branch"])
if not len(d["tree"]):
print(f"- no tree for {vndpkg}")
elif has_cookiecutter(d["tree"]):
d["is_template"] = True
cc_json(d, vndpkg, d["tree"])
readme(d, vndpkg, d["tree"])
else:
d["is_template"] = False
if not randint(0,20):
write(results)
time.sleep(0.25)
|