Browser and install GUI for cookiecutter templates

⌈⌋ ⎇ branch:  cookiedough


Check-in [ae7f877aba]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Updated API query tools, use dict for collection now, don't skip over publication in december anymore, less sleep(), less json overwriting. Still takes 3 hours.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: ae7f877abae7137df47bf43ca16ee5f6b0c49b617591290b2f5d0f52ca6f2a7b
User & Date: mario 2021-04-06 09:04:06
Context
2021-04-06
09:05
Updated database, now contains 4153 entries. check-in: b565b7c8d0 user: mario tags: trunk
09:04
Updated API query tools, use dict for collection now, don't skip over publication in december anymore, less sleep(), less json overwriting. Still takes 3 hours. check-in: ae7f877aba user: mario tags: trunk
2021-03-30
14:18
minor typos check-in: ffb88d9322 user: mario tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to dev/gh_conv.py.

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
    #name = name.lower()
    return re.sub("[_.-]?cookiecutters?[_.-]?", "", name) or name



# 
results = {}
for d in read():
    if not "is_template" in d or not d["is_template"]:
        continue
    print(d["full_name"])

    try:
        d["_disabled"] = False
        config = []







|







117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
    #name = name.lower()
    return re.sub("[_.-]?cookiecutters?[_.-]?", "", name) or name



# 
results = {}
for full_name,d in read().items():
    if not "is_template" in d or not d["is_template"]:
        continue
    print(d["full_name"])

    try:
        d["_disabled"] = False
        config = []

Changes to dev/gh_find.py.

1
2
3
4
5
6
7
8
9
10
11
12
13
14

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

64



65
66
67
68
69
70
71
72
73
74


75

76

#!/usr/bin/env python3
# description: search all cookiecutter repos on GH
#
# Search all repos. But sse sliding date window, to get all.
# So tedious. Yay for proprietary baskets!
#
# to recreate db:
#   · gh_find
#   · gh_tree
#   · gh_conv
#


import re, requests, json, time, dotenv

dotenv.load_dotenv()


# get list of repos for search term and sliding `created` window, optionally page_no
def fetch(page=0, created="2021-03", per_page=50):
    url = "https://api.github.com/search/repositories"
    params = dict(
        q = f"cookiecutter created:{created}",
        sort = "updated",
        order = "desc",
        per_page = per_page,
        page = page
    )
    headers = {
        "User-Agent": "cookiedough/0.1.0 (Python; amd64; requests)",
        "Accept": "application/vnd.github.preview",
        "Authorization": "token " + os.getenv("GITHUB_API_TOKEN")
    }
    print(params)
    r = requests.get(url, params, headers=headers)
    print(r)
    time.sleep(1)
    return r.json()


def unique(results):
    return list({item["id"]: item for item in results}.values())
    # https://stackoverflow.com/a/11092611/345031
    
def write(results):
    with open("github.json", "w", encoding="utf-8") as f:
        f.write(json.dumps(unique(results), indent=4))

def read():
    try:
        with open("github.json", "r", encoding="utf-8") as f:
            return json.loads(f.read())
    except:
        return []

# iterate per year+month, and optionally pages if more than 50 results
def year_month_page(ys, ms, pg, per_page=50):
    for year in ys:
        for month in ms:
            for page in pg:
                print(year, month, page)
                d = fetch(created=f"{year:04}-{month:02}", page=page, per_page=50)
                print(d)
                if "message" in d or not "items" in d:

                    break



                elif len(d["items"]) < per_page:
                    break
                yield d

# add more repo items
results = read()
print(len(results))
for d in year_month_page(range(2013, 2022), range(1,12), range(0,19)):
    if len(d["items"]):
        results = results + d["items"]


        write(results)






|









|
>














|



|

<
<



<
<
<
<


|






|


|



<
|
<

>

>
>
>
|

<




|

|
>
>
|
>

>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


36
37
38




39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54

55

56
57
58
59
60
61
62
63

64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python3
# description: search all cookiecutter repos on GH
#
# Search all repos. But use sliding date window, to get all.
# So tedious. Yay for proprietary baskets!
#
# to recreate db:
#   · gh_find
#   · gh_tree
#   · gh_conv
#


import os, re, requests, json, time, dotenv
from random import randint
dotenv.load_dotenv()


# get list of repos for search term and sliding `created` window, optionally page_no
def fetch(page=0, created="2021-03", per_page=50):
    url = "https://api.github.com/search/repositories"
    params = dict(
        q = f"cookiecutter created:{created}",
        sort = "updated",
        order = "desc",
        per_page = per_page,
        page = page
    )
    headers = {
        "User-Agent": "cookiedough/0.3.0 (Python; amd64; requests)",
        "Accept": "application/vnd.github.preview",
        "Authorization": "token " + os.getenv("GITHUB_API_TOKEN")
    }
    print("SEARCH_Q=%r" % params)
    r = requests.get(url, params, headers=headers)


    return r.json()






def write(results):
    with open("github.json", "w", encoding="utf-8") as f:
        f.write(json.dumps(results, indent=4))

def read():
    try:
        with open("github.json", "r", encoding="utf-8") as f:
            return json.loads(f.read())
    except:
        return {}

# iterate per year+month, and optionally pages if more than 50 results
def year_month_page(ys, ms, pg, per_page=100):
    for year in ys:
        for month in ms:
            for page in pg:

                d = fetch(created=f"{year:04}-{month:02}", page=page, per_page=per_page)

                if "message" in d or not "items" in d:
                    print("**NO_RESULTS**=%r" % d)
                    break
                print("len_items=%s" % len(d["items"]))
                time.sleep(1)
                yield d
                if len(d["items"]) < per_page:
                    break


# add more repo items
results = read()
print(len(results))
for d in year_month_page(range(2012, 2022), range(1,13), range(0,19)):
    if len(d["items"]):
        for repo in d["items"]:
            results[repo["full_name"]] = repo
        if not randint(0, 20):
            write(results)
    print("len_results=%s" % len(results))

write(results)

Changes to dev/gh_tree.py.

17
18
19
20
21
22
23

24
25
26
27
28
29
30
31
32
33
34
35
36
37
#
# 
# · find README
#   https://raw.githubusercontent.com/VND/PKG/master/README.md


import re, requests, json, time, pprint, os, dotenv

dotenv.load_dotenv()


# current file list from default_branch
def tree(vndpkg="vnd/pkg", branch="master"):
    headers = {
        "User-Agent": "cookiedough/0.2.0 (Python; amd64; requests)",
        "Accept": "application/vnd.github.preview",
        # https://docs.github.com/en/developers/apps/authorizing-oauth-apps#non-web-application-flow
        # https://docs.github.com/en/rest/overview/other-authentication-methods#basic-authentication
        # https://github.com/settings/tokens
        "Authorization": "token " + os.getenv("GITHUB_API_TOKEN")
    }
    #for sha in branch, "master", "main", "stable":







>






|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#
# 
# · find README
#   https://raw.githubusercontent.com/VND/PKG/master/README.md


import re, requests, json, time, pprint, os, dotenv
from random import randint
dotenv.load_dotenv()


# current file list from default_branch
def tree(vndpkg="vnd/pkg", branch="master"):
    headers = {
        "User-Agent": "cookiedough/0.3.0 (Python; amd64; requests)",
        "Accept": "application/vnd.github.preview",
        # https://docs.github.com/en/developers/apps/authorizing-oauth-apps#non-web-application-flow
        # https://docs.github.com/en/rest/overview/other-authentication-methods#basic-authentication
        # https://github.com/settings/tokens
        "Authorization": "token " + os.getenv("GITHUB_API_TOKEN")
    }
    #for sha in branch, "master", "main", "stable":
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102


103
104
105
106
107
108
109
110
111
112
113

114
115
116
        f.write(json.dumps(results, indent=4))

def read():
    try:
        with open("github.json", "r", encoding="utf-8") as f:
            return json.loads(f.read())
    except:
        return []

# loop over existing repos, add file lists + readme + cc.json + status flag
results = read()
for i,d in enumerate(results):
    vndpkg = d["full_name"]
    #if "is_template" in d:
    #    continue


    if re.search("hadenlabs/|moorinl/", vndpkg):
        continue
    d["tree"] = tree(vndpkg, branch=d["default_branch"])
    if not len(d["tree"]):
        print(f"- no tree for {vndpkg}")
    elif has_cookiecutter(d["tree"]):
        d["is_template"] = True
        cc_json(d, vndpkg, d["tree"])
        readme(d, vndpkg, d["tree"])
    else:
        d["is_template"] = False

    write(results)
    time.sleep(1)








|



|

|
|
>
>











>
|
|

89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
        f.write(json.dumps(results, indent=4))

def read():
    try:
        with open("github.json", "r", encoding="utf-8") as f:
            return json.loads(f.read())
    except:
        return {}

# loop over existing repos, add file lists + readme + cc.json + status flag
results = read()
for name,d in results.items():
    vndpkg = d["full_name"]
    if "is_template" in d:
        continue
    if "tree" in d:
        continue
    if re.search("hadenlabs/|moorinl/", vndpkg):
        continue
    d["tree"] = tree(vndpkg, branch=d["default_branch"])
    if not len(d["tree"]):
        print(f"- no tree for {vndpkg}")
    elif has_cookiecutter(d["tree"]):
        d["is_template"] = True
        cc_json(d, vndpkg, d["tree"])
        readme(d, vndpkg, d["tree"])
    else:
        d["is_template"] = False
    if not randint(0,20):
        write(results)
    time.sleep(0.25)