Skip to content

Commit

Permalink
MNTSUP-165 - speed up content_url query
Browse files Browse the repository at this point in the history
  • Loading branch information
wimfabri committed Oct 18, 2023
1 parent 28bacb6 commit 1d59b01
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,14 @@ jobs:
registry: private.docker.xenit.eu
username: ${{ secrets.CLOUDSMITH_USER }}
password: ${{ secrets.CLOUDSMITH_APIKEY }}
- name: Build docker image
uses: gradle/[email protected]
env:
BRANCH_NAME: ${{ github.ref_name }}
with:
arguments: buildDockerImage
- name: Publish docker image
if: ${{ startsWith(github.ref, 'refs/heads/master') }}
#if: ${{ startsWith(github.ref, 'refs/heads/master') }}
uses: gradle/[email protected]
env:
BRANCH_NAME: ${{ github.ref_name }}
Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ plugins {
ext {
base_img = 'open-source.docker.xenit.eu/oracle-python'
base_img_version = 'main-2.1.1'
img_version = '2.0.0'
img_version = '2.1.0'
}

createDockerFile {
Expand Down
15 changes: 13 additions & 2 deletions src/main/docker/docker_root/swarmclean.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import records
from dataclasses import dataclass, asdict
import humanfriendly
import binascii

sys.path.insert(0,sys.path[0]+'/castorsdk')
import scspHeaders
Expand Down Expand Up @@ -351,7 +352,7 @@ def list_bucket_contents_filtered(self, filter_function, max_batch_size):
objects = response.json()

if not objects:
return object_list
return { 'list': object_list, 'size': batch_size }

for object in objects:
swarm_object = SwarmObject(**object)
Expand Down Expand Up @@ -456,7 +457,17 @@ def __init__(self, args):

def isDeletionCandidate(self, swarm_object):
if args.filter_method == 'alfresco_db':
result = self.alfresco_db.query_single_value("select count(*) from alf_content_url where content_url like :object_name", {'object_name': f"%/{swarm_object.name}"}) == 0
content_url_short = swarm_object.name[-12:]
#content_url_crc = binascii.crc32(bytes(f"swarm://{self.args.swarm_bucket}/{swarm_object.name}", 'ascii'))
content_url_crc = binascii.crc32(bytes(f"s3://{swarm_object.name}", 'ascii'))
# table has an index on content_url_short + content_url_crc
result = self.alfresco_db.query_single_value(
"select count(*) from alf_content_url where content_url_short = :content_url_short and content_url_crc = :content_url_crc",
{
'content_url_short': content_url_short,
'content_url_crc': content_url_crc
}
) == 0
elif args.filter_method == 'regex':
result = self.filterRegex.match(swarm_object.name)
logging.trace(f"filter { swarm_object.name }: { bool(result) } - size { humanfriendly.format_size(swarm_object.bytes, binary=True) }")
Expand Down

0 comments on commit 1d59b01

Please sign in to comment.