|
| 1 | + |
| 2 | +import os |
| 3 | +from github import Github |
| 4 | + |
| 5 | +TIME_FORMAT = "%Y-%m-%dT%H:%S" |
| 6 | + |
| 7 | + |
| 8 | +class StarTrace: |
| 9 | + |
| 10 | + def __init__(self, path=os.getcwd(), token=None): |
| 11 | + self.path = path |
| 12 | + self.token = token |
| 13 | + # external ID to project ID (internal) |
| 14 | + self.e2p = {} |
| 15 | + # external ID to stargazer ID (internal) |
| 16 | + self.e2s = {} |
| 17 | + # language to language ID (internal) |
| 18 | + self.e2l = {} |
| 19 | + |
| 20 | + def search(self, query): |
| 21 | + stargazer_frame = open(self.get_path("project-stargazer.csv"), "w") |
| 22 | + language_frame = open(self.get_path("project-language.csv"), "w") |
| 23 | + |
| 24 | + try: |
| 25 | + gh = Github(self.token) |
| 26 | + search = gh.search_repositories(query, sort='stars') |
| 27 | + for i, repo in enumerate(search): |
| 28 | + print(i, repo.id) |
| 29 | + project_id = self.add_or_get_project(repo.id) |
| 30 | + for lang in repo.get_languages().keys(): |
| 31 | + language_frame.write("{lang_id},{project_id}\n".format( |
| 32 | + lang_id=self.add_or_get_language(lang), |
| 33 | + project_id=project_id |
| 34 | + )) |
| 35 | + for stargazer in repo.get_stargazers_with_dates(): |
| 36 | + stargazer_frame.write("{stargazer_id},{project_id},{starred_at}\n".format( |
| 37 | + stargazer_id=self.add_or_get_stargazer(stargazer.user.id), |
| 38 | + project_id=project_id, |
| 39 | + starred_at=stargazer.starred_at.strftime("%Y-%m-%dT%H:%S") |
| 40 | + )) |
| 41 | + finally: |
| 42 | + stargazer_frame.close() |
| 43 | + language_frame.close() |
| 44 | + |
| 45 | + with open(self.get_path("languages.txt"), "w") as f: |
| 46 | + f.write('\n'.join(k for k, v in sorted(self.e2l.items(), key=lambda kv: kv[1]))) |
| 47 | + |
| 48 | + def add_or_get_project(self, repo_id): |
| 49 | + return self._add_or_get(repo_id, self.e2p) |
| 50 | + |
| 51 | + def add_or_get_stargazer(self, user_id): |
| 52 | + return self._add_or_get(user_id, self.e2s) |
| 53 | + |
| 54 | + def add_or_get_language(self, language): |
| 55 | + return self._add_or_get(language, self.e2l) |
| 56 | + |
| 57 | + def get_path(self, filename): |
| 58 | + return os.path.join(self.path, filename) |
| 59 | + |
| 60 | + @classmethod |
| 61 | + def _add_or_get(cls, external_id, store): |
| 62 | + id = store.get(external_id) |
| 63 | + if id is None: |
| 64 | + id = len(store) |
| 65 | + store[external_id] = id |
| 66 | + return id |
| 67 | + |
| 68 | +def main(): |
| 69 | + if os.path.exists("token"): |
| 70 | + token = open("token").read().strip() |
| 71 | + else: |
| 72 | + token = None |
| 73 | + st = StarTrace(token=token) |
| 74 | + st.search("Austin") |
| 75 | + |
| 76 | +if __name__ == '__main__': |
| 77 | + main() |
0 commit comments