Home > Article > Backend Development > How to retrieve Github Repository Data using Python
Does your organization have (way) too many github repositories, and you need an easy way to summarize and keep record of what each one is for reporting, dashboard, or auditing purposes? Here's a quick script to do that very thing using the Github API.
get_repo_info(owner, repo):
get_collaborators(collaborators_url):
get_languages(languages_url):
get_open_issues(owner, repo):
get_repo_data(repo_url):
import json import requests from pymongo import MongoClient # MongoDB setup (replace with your actual connection details) client = MongoClient("mongodb://localhost:27017/") db = client["github_repos"] # Database name collection = db["repos"] # Collection name def get_repo_info(owner, repo): url = f"https://api.github.com/repos/{owner}/{repo}" headers = {"Accept": "application/vnd.github+json"} response = requests.get(url, headers=headers) if response.status_code == 200: return response.json() else: print(f"Error: {response.status_code}") return None def get_collaborators(collaborators_url): response = requests.get(collaborators_url) if response.status_code == 200: return [collaborator["login"] for collaborator in response.json()] else: return [] def get_languages(languages_url): response = requests.get(languages_url) if response.status_code == 200: return list(response.json().keys()) else: return [] def get_open_issues(owner, repo): url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=open" headers = {"Accept": "application/vnd.github+json"} response = requests.get(url, headers=headers) if response.status_code == 200: return response.json() else: print(f"Error: {response.status_code}") return [] def get_repo_data(repo_url): owner, repo = repo_url.split("/")[-2:] repo_info = get_repo_info(owner, repo) if repo_info: data = { "Github URL": repo_url, "Project name": repo_info["name"], "Project owner": repo_info["owner"]["login"], "List users with access": get_collaborators(repo_info["collaborators_url"].split("{")[0]), # remove template part of URL "Programming languages used": get_languages(repo_info["languages_url"]), "Security/visibility level": repo_info["visibility"], "Summary": repo_info["description"], "Last maintained": repo_info["pushed_at"], "Last release": repo_info["default_branch"], "Open issues": get_open_issues(owner, repo), } # Insert the data into MongoDB collection.insert_one(data) print("Data inserted into MongoDB successfully.") return data else: return None # Example usage repo_url = "https://github.com/URL" repo_data = get_repo_data(repo_url) if repo_data: print(json.dumps(repo_data, indent=4))
The above is the detailed content of How to retrieve Github Repository Data using Python. For more information, please follow other related articles on the PHP Chinese website!