Check-in [ae7f877aba]
Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Updated API query tools, use dict for collection now, don't skip over publication in december anymore, less sleep(), less json overwriting. Still takes 3 hours. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
ae7f877abae7137df47bf43ca16ee5f6 |
User & Date: | mario 2021-04-06 09:04:06 |
Context
2021-04-06
| ||
09:05 | Updated database, now contains 4153 entries. check-in: b565b7c8d0 user: mario tags: trunk | |
09:04 | Updated API query tools, use dict for collection now, don't skip over publication in december anymore, less sleep(), less json overwriting. Still takes 3 hours. check-in: ae7f877aba user: mario tags: trunk | |
2021-03-30
| ||
14:18 | minor typos check-in: ffb88d9322 user: mario tags: trunk | |
Changes
Changes to dev/gh_conv.py.
︙ | ︙ | |||
117 118 119 120 121 122 123 | #name = name.lower() return re.sub("[_.-]?cookiecutters?[_.-]?", "", name) or name # results = {} | | | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #name = name.lower() return re.sub("[_.-]?cookiecutters?[_.-]?", "", name) or name # results = {} for full_name,d in read().items(): if not "is_template" in d or not d["is_template"]: continue print(d["full_name"]) try: d["_disabled"] = False config = [] |
︙ | ︙ |
Changes to dev/gh_find.py.
1 2 3 | #!/usr/bin/env python3 # description: search all cookiecutter repos on GH # | | | > | | < < < < < < | | | < | < > > > > | < | | > > | > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | #!/usr/bin/env python3 # description: search all cookiecutter repos on GH # # Search all repos. But use sliding date window, to get all. # So tedious. Yay for proprietary baskets! # # to recreate db: # · gh_find # · gh_tree # · gh_conv # import os, re, requests, json, time, dotenv from random import randint dotenv.load_dotenv() # get list of repos for search term and sliding `created` window, optionally page_no def fetch(page=0, created="2021-03", per_page=50): url = "https://api.github.com/search/repositories" params = dict( q = f"cookiecutter created:{created}", sort = "updated", order = "desc", per_page = per_page, page = page ) headers = { "User-Agent": "cookiedough/0.3.0 (Python; amd64; requests)", "Accept": "application/vnd.github.preview", "Authorization": "token " + os.getenv("GITHUB_API_TOKEN") } print("SEARCH_Q=%r" % params) r = requests.get(url, params, headers=headers) return r.json() def write(results): with open("github.json", "w", encoding="utf-8") as f: f.write(json.dumps(results, indent=4)) def read(): try: with open("github.json", "r", encoding="utf-8") as f: return json.loads(f.read()) except: return {} # iterate per year+month, and optionally pages if more than 50 results def year_month_page(ys, ms, pg, per_page=100): for year in ys: for month in ms: for page in pg: d = fetch(created=f"{year:04}-{month:02}", page=page, per_page=per_page) if "message" in d or not "items" in d: print("**NO_RESULTS**=%r" % d) break print("len_items=%s" % len(d["items"])) time.sleep(1) yield d if len(d["items"]) < per_page: break # add more repo items results = read() print(len(results)) for d in year_month_page(range(2012, 2022), range(1,13), range(0,19)): if len(d["items"]): for repo in d["items"]: results[repo["full_name"]] = repo if not randint(0, 20): write(results) print("len_results=%s" % len(results)) write(results) |
Changes to dev/gh_tree.py.
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 | # # # · find README # https://raw.githubusercontent.com/VND/PKG/master/README.md import re, requests, json, time, pprint, os, dotenv dotenv.load_dotenv() # current file list from default_branch def tree(vndpkg="vnd/pkg", branch="master"): headers = { | > | | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | # # # · find README # https://raw.githubusercontent.com/VND/PKG/master/README.md import re, requests, json, time, pprint, os, dotenv from random import randint dotenv.load_dotenv() # current file list from default_branch def tree(vndpkg="vnd/pkg", branch="master"): headers = { "User-Agent": "cookiedough/0.3.0 (Python; amd64; requests)", "Accept": "application/vnd.github.preview", # https://docs.github.com/en/developers/apps/authorizing-oauth-apps#non-web-application-flow # https://docs.github.com/en/rest/overview/other-authentication-methods#basic-authentication # https://github.com/settings/tokens "Authorization": "token " + os.getenv("GITHUB_API_TOKEN") } #for sha in branch, "master", "main", "stable": |
︙ | ︙ | |||
88 89 90 91 92 93 94 | f.write(json.dumps(results, indent=4)) def read(): try: with open("github.json", "r", encoding="utf-8") as f: return json.loads(f.read()) except: | | | | | > > > | | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | f.write(json.dumps(results, indent=4)) def read(): try: with open("github.json", "r", encoding="utf-8") as f: return json.loads(f.read()) except: return {} # loop over existing repos, add file lists + readme + cc.json + status flag results = read() for name,d in results.items(): vndpkg = d["full_name"] if "is_template" in d: continue if "tree" in d: continue if re.search("hadenlabs/|moorinl/", vndpkg): continue d["tree"] = tree(vndpkg, branch=d["default_branch"]) if not len(d["tree"]): print(f"- no tree for {vndpkg}") elif has_cookiecutter(d["tree"]): d["is_template"] = True cc_json(d, vndpkg, d["tree"]) readme(d, vndpkg, d["tree"]) else: d["is_template"] = False if not randint(0,20): write(results) time.sleep(0.25) |