From b606384dc183a4bfb5690de2e76899490b751f5a Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 10 Sep 2024 13:21:20 -0700 Subject: [PATCH 1/8] Init readme gen --- .github/workflows/readme.yaml | 40 ++++++++++++++++++++++++++++++ .github/workflows/update_readme.py | 21 ++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 .github/workflows/readme.yaml create mode 100644 .github/workflows/update_readme.py diff --git a/.github/workflows/readme.yaml b/.github/workflows/readme.yaml new file mode 100644 index 0000000000..103f900e25 --- /dev/null +++ b/.github/workflows/readme.yaml @@ -0,0 +1,40 @@ +name: Update README Files + +on: + pull_request: + types: [opened, reopened, synchronize] + +jobs: + update_readme: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.ref }} + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install dependencies (if needed) + run: pip install -r requirements.txt + + - name: Create or Update README Files + run: python update_readme.py + + - name: Commit changes + run: | + git config --global user.email "action@github.com" + git config --global user.name "GitHub Action" + git add . + git commit + + - name: Push changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ github.event.pull_request.head.ref }} diff --git a/.github/workflows/update_readme.py b/.github/workflows/update_readme.py new file mode 100644 index 0000000000..4a735f4463 --- /dev/null +++ b/.github/workflows/update_readme.py @@ -0,0 +1,21 @@ +import os + +# Get all top-level directories +top_level_dirs = [ + d for d in os.listdir(".") if os.path.isdir(d) and not d.startswith(".") +] + +for directory in top_level_dirs: + readme_path = os.path.join(directory, "README.md") + mode = "w" if not os.path.exists(readme_path) else "a+" + + with open(readme_path, mode, encoding="utf-8") as f: + f.seek(0) # Move to the beginning of the file for 'a+' mode + content = f.read() + + # Check if content exists, otherwise add default content + if not content: + f.write(f"# {directory}\n\nA brief description of this directory.") + else: + # You could add logic here to update existing READMEs if needed + pass From 7def2a2440f6f3bcdc52424bc74c87751dcb70e9 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 10 Sep 2024 13:24:23 -0700 Subject: [PATCH 2/8] Update permissions --- .github/workflows/readme.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/readme.yaml b/.github/workflows/readme.yaml index 103f900e25..b422e51d4e 100644 --- a/.github/workflows/readme.yaml +++ b/.github/workflows/readme.yaml @@ -8,6 +8,10 @@ jobs: update_readme: runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: - name: Checkout repository uses: actions/checkout@v4 From ba6a0a9a306ff7834472d9d9870980a5c4912c42 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 10 Sep 2024 13:28:15 -0700 Subject: [PATCH 3/8] Update checkout --- .github/workflows/readme.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/readme.yaml b/.github/workflows/readme.yaml index b422e51d4e..daa6406f48 100644 --- a/.github/workflows/readme.yaml +++ b/.github/workflows/readme.yaml @@ -16,7 +16,6 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 with: - ref: ${{ github.event.pull_request.head.ref }} fetch-depth: 0 - name: Set up Python From 63210afcd6e31f5835b04b70b55b34b880a57285 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 10 Sep 2024 13:35:50 -0700 Subject: [PATCH 4/8] Rename file --- .github/workflows/{readme.yaml => update-readme.yaml} | 4 ++-- .github/workflows/update_readme/requirements.txt | 1 + .github/workflows/{ => update_readme}/update_readme.py | 0 3 files changed, 3 insertions(+), 2 deletions(-) rename .github/workflows/{readme.yaml => update-readme.yaml} (88%) create mode 100644 .github/workflows/update_readme/requirements.txt rename .github/workflows/{ => update_readme}/update_readme.py (100%) diff --git a/.github/workflows/readme.yaml b/.github/workflows/update-readme.yaml similarity index 88% rename from .github/workflows/readme.yaml rename to .github/workflows/update-readme.yaml index daa6406f48..629d5045e7 100644 --- a/.github/workflows/readme.yaml +++ b/.github/workflows/update-readme.yaml @@ -23,8 +23,8 @@ jobs: with: python-version: '3.x' - - name: Install dependencies (if needed) - run: pip install -r requirements.txt + - name: Install requirements + run: python3 -m pip install -U -r .github/workflows/update_readme/requirements.txt - name: Create or Update README Files run: python update_readme.py diff --git a/.github/workflows/update_readme/requirements.txt b/.github/workflows/update_readme/requirements.txt new file mode 100644 index 0000000000..d09caa45e0 --- /dev/null +++ b/.github/workflows/update_readme/requirements.txt @@ -0,0 +1 @@ +google-cloud-aiplatform diff --git a/.github/workflows/update_readme.py b/.github/workflows/update_readme/update_readme.py similarity index 100% rename from .github/workflows/update_readme.py rename to .github/workflows/update_readme/update_readme.py From acf751dbfe18bc35af5aec9db48e1d92f2b82b81 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 10 Sep 2024 13:36:09 -0700 Subject: [PATCH 5/8] Change path --- .github/workflows/update-readme.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update-readme.yaml b/.github/workflows/update-readme.yaml index 629d5045e7..d3f2ad5a6d 100644 --- a/.github/workflows/update-readme.yaml +++ b/.github/workflows/update-readme.yaml @@ -27,7 +27,7 @@ jobs: run: python3 -m pip install -U -r .github/workflows/update_readme/requirements.txt - name: Create or Update README Files - run: python update_readme.py + run: python .github/workflows/update_readme/update_readme.py - name: Commit changes run: | From 1b1b3a99103b2862cb73a86b4a30d7d515a09c1f Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 10 Sep 2024 13:38:14 -0700 Subject: [PATCH 6/8] Update write --- .github/workflows/update_readme/update_readme.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_readme/update_readme.py b/.github/workflows/update_readme/update_readme.py index 4a735f4463..c230882956 100644 --- a/.github/workflows/update_readme/update_readme.py +++ b/.github/workflows/update_readme/update_readme.py @@ -7,7 +7,7 @@ for directory in top_level_dirs: readme_path = os.path.join(directory, "README.md") - mode = "w" if not os.path.exists(readme_path) else "a+" + mode = "w+" if not os.path.exists(readme_path) else "a+" with open(readme_path, mode, encoding="utf-8") as f: f.seek(0) # Move to the beginning of the file for 'a+' mode From a67a094dcd1a4f4d5442244aa8e0ab6677cf3f72 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 10 Sep 2024 13:48:07 -0700 Subject: [PATCH 7/8] Add Gemini to README generation --- .github/workflows/update-readme.yaml | 2 +- .../workflows/update_readme/requirements.txt | 1 + .../workflows/update_readme/update_readme.py | 118 +++++++++++++++--- 3 files changed, 101 insertions(+), 20 deletions(-) diff --git a/.github/workflows/update-readme.yaml b/.github/workflows/update-readme.yaml index d3f2ad5a6d..f5f9283364 100644 --- a/.github/workflows/update-readme.yaml +++ b/.github/workflows/update-readme.yaml @@ -34,7 +34,7 @@ jobs: git config --global user.email "action@github.com" git config --global user.name "GitHub Action" git add . - git commit + git commit -m "Updated README" - name: Push changes uses: ad-m/github-push-action@master diff --git a/.github/workflows/update_readme/requirements.txt b/.github/workflows/update_readme/requirements.txt index d09caa45e0..23ec10f6a6 100644 --- a/.github/workflows/update_readme/requirements.txt +++ b/.github/workflows/update_readme/requirements.txt @@ -1 +1,2 @@ google-cloud-aiplatform +magika diff --git a/.github/workflows/update_readme/update_readme.py b/.github/workflows/update_readme/update_readme.py index c230882956..f6493848b5 100644 --- a/.github/workflows/update_readme/update_readme.py +++ b/.github/workflows/update_readme/update_readme.py @@ -1,21 +1,101 @@ import os +from typing import List, Tuple +from pathlib import Path +import magika +import datetime +from vertexai.preview.generative_models import GenerativeModel +from vertexai.preview import caching -# Get all top-level directories -top_level_dirs = [ - d for d in os.listdir(".") if os.path.isdir(d) and not d.startswith(".") -] - -for directory in top_level_dirs: - readme_path = os.path.join(directory, "README.md") - mode = "w+" if not os.path.exists(readme_path) else "a+" - - with open(readme_path, mode, encoding="utf-8") as f: - f.seek(0) # Move to the beginning of the file for 'a+' mode - content = f.read() - - # Check if content exists, otherwise add default content - if not content: - f.write(f"# {directory}\n\nA brief description of this directory.") - else: - # You could add logic here to update existing READMEs if needed - pass +PROJECT_ID = "document-ai-test-337818" # @param {type:"string"} +LOCATION = "us-central1" # @param {type:"string"} + +import vertexai + +vertexai.init(project=PROJECT_ID, location=LOCATION) + + +def extract_code(repo_dir: str) -> Tuple[List, str]: + """Create an index, extract content of code/text files.""" + + m = magika.Magika() + + code_index = [] + code_text = [] + + for root, _, files in os.walk(repo_dir): + for file in files: + file_path = os.path.join(root, file) + relative_path = os.path.relpath(file_path, repo_dir) + file_type = m.identify_path(Path(file_path)) + if file_type.output.group in ("text", "code"): + try: + with open(file_path, "r", errors="replace", encoding="utf-8") as f: + code_text.append(f"----- File: {relative_path} -----\n") + code_text.append("```") + code_text.append(f.read()) + code_text.append("```") + code_text.append("\n-------------------------\n") + code_index.append(relative_path) + except Exception: + pass + + return code_index, "".join(code_text) + + +def gemini(code_index: List[str], code_text: str) -> str: + MODEL_ID = "gemini-1.5-pro-001" # @param {type:"string"} + + contents = f""" + Context: + - The entire codebase is provided below. + - Here is an index of all of the files in the codebase: + \n\n{code_index}\n\n. + - Then each of the files are concatenated together. You will find all of the code you need: + \n\n{code_text}\n\n + """ + + cached_content = caching.CachedContent.create( + model_name=MODEL_ID, + system_instruction="You are an expert software engineer, proficient in GitHub, Generative AI and Google Cloud.", + contents=contents, + ttl=datetime.timedelta(minutes=60), + display_name="example-cache", + ) + + prompt = "Write a GitHub README.md file for the directory in the context." + + model = GenerativeModel.from_cached_content(cached_content=cached_content) + + response = model.generate_content(prompt) + + return response.text + + +def update_readme() -> None: + + # Get all top-level directories + top_level_dirs = [ + d for d in os.listdir(".") if os.path.isdir(d) and not d.startswith(".") + ] + + for directory in top_level_dirs: + readme_path = os.path.join(directory, "README.md") + mode = "w+" if not os.path.exists(readme_path) else "a+" + + code_index, code_text = extract_code(directory) + readme_content = gemini(code_index, code_text) + + with open(readme_path, mode, encoding="utf-8") as f: + f.seek(0) # Move to the beginning of the file for 'a+' mode + content = f.read() + + # Check if content exists, otherwise add default content + if not content: + f.write(readme_content) + else: + # You could add logic here to update existing READMEs if needed + pass + + +if __name__ == "__main__": + update_readme() From 3905ad8aff4987a96288bf36b2feb9ae150fdacc Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 10 Sep 2024 14:14:10 -0700 Subject: [PATCH 8/8] Update prompt --- .../workflows/update_readme/update_readme.py | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/.github/workflows/update_readme/update_readme.py b/.github/workflows/update_readme/update_readme.py index f6493848b5..16750d65c0 100644 --- a/.github/workflows/update_readme/update_readme.py +++ b/.github/workflows/update_readme/update_readme.py @@ -5,6 +5,7 @@ import datetime from vertexai.preview.generative_models import GenerativeModel from vertexai.preview import caching +import re PROJECT_ID = "document-ai-test-337818" # @param {type:"string"} LOCATION = "us-central1" # @param {type:"string"} @@ -42,30 +43,38 @@ def extract_code(repo_dir: str) -> Tuple[List, str]: return code_index, "".join(code_text) -def gemini(code_index: List[str], code_text: str) -> str: +def gemini(code_index: List[str], code_text: str, directory: str) -> str: MODEL_ID = "gemini-1.5-pro-001" # @param {type:"string"} contents = f""" Context: - - The entire codebase is provided below. - - Here is an index of all of the files in the codebase: + - Here is an index of all of the files in the directory {directory}: \n\n{code_index}\n\n. - - Then each of the files are concatenated together. You will find all of the code you need: - \n\n{code_text}\n\n """ - - cached_content = caching.CachedContent.create( + print("Code Index", code_index) + # cached_content = caching.CachedContent.create( + # model_name=MODEL_ID, + # system_instruction="You are an expert software engineer, proficient in GitHub, Generative AI and Google Cloud.", + # contents=contents, + # ttl=datetime.timedelta(minutes=60), + # display_name="example-cache", + # ) + + prompt = f"""Context: + {contents} + + Write a GitHub README.md file for the directory in the context. + Give an outline of the files in the directory with links and descriptions of files. + + Just output the markdown, don't include the ``` code fences.""" + + # model = GenerativeModel.from_cached_content(cached_content=cached_content) + + model = GenerativeModel( model_name=MODEL_ID, system_instruction="You are an expert software engineer, proficient in GitHub, Generative AI and Google Cloud.", - contents=contents, - ttl=datetime.timedelta(minutes=60), - display_name="example-cache", ) - prompt = "Write a GitHub README.md file for the directory in the context." - - model = GenerativeModel.from_cached_content(cached_content=cached_content) - response = model.generate_content(prompt) return response.text @@ -77,13 +86,20 @@ def update_readme() -> None: top_level_dirs = [ d for d in os.listdir(".") if os.path.isdir(d) and not d.startswith(".") ] - for directory in top_level_dirs: readme_path = os.path.join(directory, "README.md") mode = "w+" if not os.path.exists(readme_path) else "a+" code_index, code_text = extract_code(directory) - readme_content = gemini(code_index, code_text) + readme_content = gemini(code_index, code_text, directory) + + # # Regex pattern to match the format provided + # pattern = r"```(.*?)```" + + # matches = re.findall(pattern, readme_content, re.DOTALL)[0] + + # file_content = matches[0].strip() + # print(file_content) with open(readme_path, mode, encoding="utf-8") as f: f.seek(0) # Move to the beginning of the file for 'a+' mode