GraphRAG performance enhacements #2737
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (C) 2024 Intel Corporation | |
# SPDX-License-Identifier: Apache-2.0 | |
name: File Change Warning | |
on: | |
pull_request: | |
branches: [main] | |
types: [opened, reopened, ready_for_review, synchronize] | |
# If there is a new commit, the previous jobs will be canceled | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
jobs: | |
Dockerfile-path-change-detection-in-GenAIComps: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean Up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Clone GenAIExamples | |
run: | | |
cd .. | |
git clone https://github.com/opea-project/GenAIExamples | |
- name: Check Dockerfile Paths in Readme | |
if: always() | |
run: | | |
set -e | |
shopt -s globstar | |
cd ${{github.workspace}} | |
is_use="FALSE" | |
used_files="" | |
merged_commit=$(git log -1 --format='%H') | |
changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile**' | cut -f2)" | |
if [ -n "$changed_files" ]; then | |
for file in $changed_files; do | |
if grep -q "$file" ./**/*.md; then | |
is_use="TRUE" | |
used_files+="$file " | |
fi | |
done | |
fi | |
if [[ "$is_use" == "TRUE" ]]; then | |
echo "Warning: Changed Dockerfile paths:" | |
echo "$used_files" | |
echo "Please modify the corresponding README in GenAIComps." | |
exit 1 | |
fi | |
- name: Check Dockerfile path included in image build yaml | |
if: always() | |
run: | | |
set -e | |
shopt -s globstar | |
no_add="FALSE" | |
cd ${{github.workspace}} | |
Dockerfiles=$(find ./comps -name '*Dockerfile*'|sed 's/^\.\///') | |
if [ -n "$Dockerfiles" ]; then | |
for Dockerfile in $Dockerfiles; do | |
service=$(echo "$Dockerfile" | awk -F '/' '{print $2}') | |
yaml_file=${{github.workspace}}/.github/workflows/docker/compose/"$service"-compose | |
if ! grep -q "$Dockerfile" "$yaml_file"*yaml; then | |
echo "AR: Update $Dockerfile to .github/workflows/docker/compose/"$service"-compose.yaml. The yaml is used for release images build." | |
no_add="TRUE" | |
fi | |
done | |
fi | |
if [[ "$no_add" == "TRUE" ]]; then | |
exit 1 | |
fi | |
- name: Check Dockerfile inside image build yaml exist in code | |
if: always() | |
run: | | |
shopt -s globstar | |
no_exist="FALSE" | |
cd ${{github.workspace}} | |
yamls=$(find .github/workflows/docker/compose/ -name '*.yaml') | |
if [ -n "$yamls" ]; then | |
for yaml in $yamls; do | |
dockerfiles=$(grep 'dockerfile:' "$yaml"|sed 's/dockerfile: //') | |
for dockerfile in $dockerfiles; do | |
if [[ "$dockerfile" == *"comps/"* ]]; then | |
if ! [ -e "$dockerfile" ]; then | |
# cat "$yaml" | |
echo "AR: The dockerfile path "$dockerfile" in "$yaml" does not exist, remove or update it." | |
no_exist="TRUE" | |
fi | |
fi | |
done | |
done | |
fi | |
if [[ "$no_exist" == "TRUE" ]]; then | |
exit 1 | |
fi | |
Dockerfile-path-change-detection-in-GenAIExamples: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean Up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Clone repo GenAIExamples | |
run: | | |
cd .. | |
git clone https://github.com/opea-project/GenAIExamples | |
- name: Check for changed Dockerfile paths | |
run: | | |
set -e | |
shopt -s globstar | |
cd ${{github.workspace}} | |
is_use="FALSE" | |
used_files="" | |
merged_commit=$(git log -1 --format='%H') | |
changed_files="$(git diff --name-status --diff-filter=DR ${{ github.event.pull_request.base.sha }} ${merged_commit} -- '**/Dockerfile**' | cut -f2)" | |
if [ -n "$changed_files" ]; then | |
for file in $changed_files; do | |
matching_files=$(grep -rl "$file" ../GenAIExamples/**/*.md) || true | |
if [ -n "$matching_files" ]; then | |
is_use="TRUE" | |
used_files+="$file " | |
echo "Modified Dockerfile '$file' is referenced in:" | |
echo "$matching_files" | |
else | |
echo "Modified Dockerfile '$file' is not referenced" | |
fi | |
done | |
fi | |
if [[ "$is_use" == "TRUE" ]]; then | |
echo "Warning: Changed Dockerfile paths:" | |
echo "$used_files" | |
echo "Please modify the corresponding README in GenAIExamples repo and ask [email protected] for final confirmation." | |
exit 1 | |
fi | |
check-the-validity-of-hyperlinks-in-README: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean Up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Repo GenAIComps | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Check the Validity of Hyperlinks | |
# ignore_links=("https://platform.openai.com/docs/api-reference/fine-tuning" | |
# "https://platform.openai.com/docs/api-reference/" | |
# "https://openai.com/index/whisper/" | |
# "https://platform.openai.com/docs/api-reference/chat/create") | |
run: | | |
cd ${{github.workspace}} | |
fail="FALSE" | |
merged_commit=$(git log -1 --format='%H') | |
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" | |
if [ -n "$changed_files" ]; then | |
for changed_file in $changed_files; do | |
echo $changed_file | |
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIComps/blob/main') || true | |
if [ -n "$url_lines" ]; then | |
for url_line in $url_lines; do | |
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') | |
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) | |
if [[ "$url" == "https://platform.openai.com/docs/api-reference/"* || "https://www.docker.com/get-started" == "$url" || "https://openai.com/index/whisper/" == "$url" ]]; then | |
echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person." | |
else | |
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successfully*****" | |
else | |
echo "Invalid link from ${{github.workspace}}/$path: $url" | |
fail="TRUE" | |
fi | |
fi | |
fi | |
done | |
fi | |
done | |
else | |
echo "No changed .md file." | |
fi | |
if [[ "$fail" == "TRUE" ]]; then | |
exit 1 | |
else | |
echo "All hyperlinks are valid." | |
fi | |
shell: bash | |
check-the-validity-of-relative-path: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Repo GenAIComps | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Checking Relative Path Validity | |
run: | | |
cd ${{github.workspace}} | |
fail="FALSE" | |
repo_name=${{ github.event.pull_request.head.repo.full_name }} | |
if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then | |
owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1) | |
branch="https://github.com/$owner/GenAIComps/tree/${{ github.event.pull_request.head.ref }}" | |
else | |
branch="https://github.com/opea-project/GenAIComps/blob/${{ github.event.pull_request.head.ref }}" | |
fi | |
link_head="https://github.com/opea-project/GenAIComps/blob/main" | |
merged_commit=$(git log -1 --format='%H') | |
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" | |
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http') | |
if [ -n "$png_lines" ]; then | |
for png_line in $png_lines; do | |
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) | |
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) | |
if [[ "${png_path:0:1}" == "/" ]]; then | |
check_path=$png_path | |
elif [[ "$png_path" == *#* ]]; then | |
relative_path=$(echo "$png_path" | cut -d '#' -f1) | |
if [ -n "$relative_path" ]; then | |
check_path=$(dirname "$refer_path")/$relative_path | |
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}') | |
else | |
check_path=$refer_path | |
fi | |
else | |
check_path=$(dirname "$refer_path")/$png_path | |
fi | |
if [ -e "$check_path" ]; then | |
real_path=$(realpath $check_path) | |
if [[ "$png_line" == *#* ]]; then | |
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then | |
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIComps||')$png_path | |
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successfully*****" | |
else | |
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev" | |
fail="TRUE" | |
fi | |
else | |
echo "Validation succeed $png_line" | |
fi | |
fi | |
fi | |
else | |
echo "$check_path does not exist" | |
fail="TRUE" | |
fi | |
done | |
fi | |
if [[ "$fail" == "TRUE" ]]; then | |
exit 1 | |
else | |
echo "All hyperlinks are valid." | |
fi | |
shell: bash |