From 0cc097e8bb56d4cf15bb70c999c857cf2e1c0ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lina=C2=B2?= <20880695+hobgoblina@users.noreply.github.com> Date: Wed, 10 Apr 2024 22:52:06 -0400 Subject: [PATCH 1/3] robots.txt updating action --- .github/workflows/update-robots-txt.yml | 88 +++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 .github/workflows/update-robots-txt.yml diff --git a/.github/workflows/update-robots-txt.yml b/.github/workflows/update-robots-txt.yml new file mode 100644 index 00000000000000..e29f0a6bcf435c --- /dev/null +++ b/.github/workflows/update-robots-txt.yml @@ -0,0 +1,88 @@ +name: Update robots.txt + +permissions: write-all + +on: + schedule: + - cron: '0 0 * * 0' + workflow_dispatch: + +jobs: + sync-upstream: + runs-on: ubuntu-latest + steps: + - name: Create Branch + env: + GITHUB_TOKEN: ${{ github.token }} + uses: peterjgrainger/action-create-branch@v2.2.0 + with: + branch: robots.txt-update + - name: Checkout + uses: actions/checkout@v3 + with: + ref: robots.txt-update + fetch-depth: 0 + - name: Update robot.txt + id: update + env: + API_KEY: ${{ secrets.ROBOTS }} + run: | + '# _---~~(~~-_. + # _{ ) ) + # , ) -~~- ( ,-' )_ + # ( `-,_..`., )-- '_,) + # ( ` _) ( -~( -_ `, } + # (_- _ ~_-~~~~`, ,' ) + # `~ -^( __;-,((())) + # ~~~~ {_ -_(()) + # `\ } + # { } + # BRAAAAAAAAIIIINNSSSSSSS + + ' >> public/robots.txt + curl --location 'https://api.darkvisitors.com/robots-txts' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer $API_KEY' \ + --data '{ "agent_types": [ "AI Data Scraper", "AI Assistant", "AI Search Crawler" ], "disallow": "/" }' > public/robots.txt + git add public/robots.txt + changes=$(git push origin 2>&1) + if [ "$changes" = "Everything up-to-date" ]; then + echo "skip=true" >> "$GITHUB_OUTPUT" + fi + - name: Check if PR exists + env: + GH_TOKEN: ${{ github.token }} + id: check + run: | + prs=$(gh pr list \ + --repo "$GITHUB_REPOSITORY" \ + --json baseRefName,headRefName \ + --jq ' + map(select(.baseRefName == "dev" and .headRefName == "robots.txt-update")) + | length + ') + if ((prs > 0)); then + echo "skip=true" >> "$GITHUB_OUTPUT" + fi + - name: Create Pull Request + if: | + !steps.check.outputs.skip && + !steps.update.outputs.skip + uses: actions/github-script@v6 + with: + script: | + const { repo, owner } = context.repo; + const result = await github.rest.pulls.create({ + title: 'Update robots.txt', + owner, + repo, + head: 'robots.txt-update', + base: 'dev', + body: 'This PR was *auto-generated* by the `Update robots.txt` action and contains updates to our robots.txt file, pulled from [Dark Visitors](https://darkvisitors.com/).' + }); + github.rest.issues.addLabels({ + owner, + repo, + issue_number: result.data.number, + labels: ['automated pr'] + }); \ No newline at end of file From 091371b97aed08638cbcaf04a9848d23261eb3e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lina=C2=B2?= <20880695+hobgoblina@users.noreply.github.com> Date: Wed, 10 Apr 2024 23:02:10 -0400 Subject: [PATCH 2/3] job name --- .github/workflows/update-robots-txt.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-robots-txt.yml b/.github/workflows/update-robots-txt.yml index e29f0a6bcf435c..125781d8eaeafe 100644 --- a/.github/workflows/update-robots-txt.yml +++ b/.github/workflows/update-robots-txt.yml @@ -8,7 +8,7 @@ on: workflow_dispatch: jobs: - sync-upstream: + update-robots: runs-on: ubuntu-latest steps: - name: Create Branch @@ -85,4 +85,4 @@ jobs: repo, issue_number: result.data.number, labels: ['automated pr'] - }); \ No newline at end of file + }); From c5aba355218c911d56348bc1563e406e902f991e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lina=C2=B2?= <20880695+hobgoblina@users.noreply.github.com> Date: Thu, 11 Apr 2024 01:04:34 -0400 Subject: [PATCH 3/3] way better with the PR action --- .github/workflows/update-robots-txt.yml | 80 ++++--------------------- robots-base.txt | 22 +++++++ 2 files changed, 35 insertions(+), 67 deletions(-) create mode 100644 robots-base.txt diff --git a/.github/workflows/update-robots-txt.yml b/.github/workflows/update-robots-txt.yml index 125781d8eaeafe..b2a7928c22466b 100644 --- a/.github/workflows/update-robots-txt.yml +++ b/.github/workflows/update-robots-txt.yml @@ -1,7 +1,5 @@ name: Update robots.txt -permissions: write-all - on: schedule: - cron: '0 0 * * 0' @@ -11,78 +9,26 @@ jobs: update-robots: runs-on: ubuntu-latest steps: - - name: Create Branch - env: - GITHUB_TOKEN: ${{ github.token }} - uses: peterjgrainger/action-create-branch@v2.2.0 - with: - branch: robots.txt-update - name: Checkout uses: actions/checkout@v3 - with: - ref: robots.txt-update - fetch-depth: 0 - name: Update robot.txt id: update env: API_KEY: ${{ secrets.ROBOTS }} run: | - '# _---~~(~~-_. - # _{ ) ) - # , ) -~~- ( ,-' )_ - # ( `-,_..`., )-- '_,) - # ( ` _) ( -~( -_ `, } - # (_- _ ~_-~~~~`, ,' ) - # `~ -^( __;-,((())) - # ~~~~ {_ -_(()) - # `\ } - # { } - # BRAAAAAAAAIIIINNSSSSSSS - - ' >> public/robots.txt + cp robots-base.txt public/robots.txt curl --location 'https://api.darkvisitors.com/robots-txts' \ --header 'Content-Type: application/json' \ - --header 'Authorization: Bearer $API_KEY' \ - --data '{ "agent_types": [ "AI Data Scraper", "AI Assistant", "AI Search Crawler" ], "disallow": "/" }' > public/robots.txt - git add public/robots.txt - changes=$(git push origin 2>&1) - if [ "$changes" = "Everything up-to-date" ]; then - echo "skip=true" >> "$GITHUB_OUTPUT" - fi - - name: Check if PR exists - env: - GH_TOKEN: ${{ github.token }} - id: check - run: | - prs=$(gh pr list \ - --repo "$GITHUB_REPOSITORY" \ - --json baseRefName,headRefName \ - --jq ' - map(select(.baseRefName == "dev" and .headRefName == "robots.txt-update")) - | length - ') - if ((prs > 0)); then - echo "skip=true" >> "$GITHUB_OUTPUT" - fi - - name: Create Pull Request - if: | - !steps.check.outputs.skip && - !steps.update.outputs.skip - uses: actions/github-script@v6 + --header "Authorization: Bearer $API_KEY" \ + --data '{ "agent_types": [ "AI Data Scraper", "AI Assistant", "AI Search Crawler" ], "disallow": "/" }' >> public/robots.txt + - name: Create pull request + uses: peter-evans/create-pull-request@v6 with: - script: | - const { repo, owner } = context.repo; - const result = await github.rest.pulls.create({ - title: 'Update robots.txt', - owner, - repo, - head: 'robots.txt-update', - base: 'dev', - body: 'This PR was *auto-generated* by the `Update robots.txt` action and contains updates to our robots.txt file, pulled from [Dark Visitors](https://darkvisitors.com/).' - }); - github.rest.issues.addLabels({ - owner, - repo, - issue_number: result.data.number, - labels: ['automated pr'] - }); + token: ${{ secrets.GITHUB_TOKEN }} + branch: robots.txt-update + title: "Update robots.txt" + commit-message: "Update robots.txt" + labels: 'robots.txt' + add-paths: public/robots.txt + reviewers: hobgoblina,mannazsci,sneakers-the-rat + body: This PR was generated by the `Update robots.txt` action and contains updates to our robots.txt file, pulled from [Dark Visitors](https://darkvisitors.com/). diff --git a/robots-base.txt b/robots-base.txt new file mode 100644 index 00000000000000..53a70c15660140 --- /dev/null +++ b/robots-base.txt @@ -0,0 +1,22 @@ +# .__---~~~(~~-_. +# _-' ) -~~- ) _-" )_ +# ( ( `-,_..`.,_--_ '_,)_ +# ( -_) ( -_-~ -_ `, ) +# (_ -_ _-~-__-~`, ,' )__-'))--___--~~~--__--~~--___--__.. +# _ ~`_-'( (____;--==,,_))))--___--~~~--__--~~--__----~~~'`=__-~+_-_. +# (@) (@) ````` `-_(())_-~ +# +# ,---. .=-.-..-._ ,-,--. +# _..---. .-.,.---. .--.' \ /==/_ /==/ \ .-._ ,-.'- _\ +# .' .'.-. \ /==/ ` \ \==\-/\ \ |==|, ||==|, \/ /, /==/_ ,_.' +# /==/- '=' /|==|-, .=., |/==/-|_\ | |==| ||==|- \| |\==\ \ +# |==|-, ' |==| '=' /\==\, - \ |==|- ||==| , | -| \==\ -\ +# |==| .=. \|==|- , .' /==/ - ,| |==| ,||==| - _ | _\==\ ,\ +# /==/- '=' ,|==|_ . ,'./==/- /\ - \|==|- ||==| /\ , |/==/\/ _ | +# |==| - //==/ /\ , )==\ _.\=\.-'/==/. //==/, | |- |\==\ - , / +# `-._`.___,' `--`-`--`--' `--` `--`-` `--`./ `--` `--`---' + +User-agent: * +Disallow: /media_proxy/ +Disallow: /interact/ +