Skip to content

Commit

Permalink
release: 0.17.1
Browse files Browse the repository at this point in the history
  • Loading branch information
ppodolsky committed Jun 14, 2023
1 parent c78b3c4 commit c52d19b
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 7 deletions.
2 changes: 1 addition & 1 deletion summa-embed-py/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "summa-embed-py"
version = "0.17.0"
version = "0.17.1"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand Down
2 changes: 2 additions & 0 deletions summa-embed-py/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
aiohttp
aiokit
izihawa_utils
57 changes: 51 additions & 6 deletions summa-embed-py/summa_embed/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,65 @@
from typing import Dict, Optional
import re
from typing import Dict
from urllib.parse import urlparse

from aiokit import AioThing
from izihawa_utils.pb_to_json import ParseDict

from .proto import index_service_pb2, query_pb2, search_service_pb2
from .summa_embed_bin import IndexRegistry as IndexRegistryBin


class IndexRegistry:
async def detect_host_header(url, aiohttp=None):
async with aiohttp.ClientSession() as session:
async with session.get(url, allow_redirects=False) as resp:
if 300 <= resp.status < 400:
redirection_url = resp.headers['Location']
if 'localhost' in redirection_url:
parsed_url = urlparse(redirection_url)
return re.search(r'(.*)\.localhost.*', parsed_url.netloc).group(0)


def canonoize_endpoint(endpoint):
endpoint = endpoint.rstrip('/')
if not endpoint.startswith('http'):
endpoint = 'http://' + endpoint
return endpoint


class SummaEmbedClient(AioThing):
def __init__(self):
super().__init__()
self.index_registry = IndexRegistryBin()

async def add(self, index_config, index_name: str) -> index_service_pb2.IndexAttributes:
parsed_index_config = index_service_pb2.IndexEngineConfig()
ParseDict(index_config, parsed_index_config)
async def add_remote_index(self, index_name, full_path, cache_size, query_parser_config=None):
headers_template = {'range': 'bytes={start}-{end}'}
if host_header := await detect_host_header(full_path):
headers_template['host'] = host_header
index_engine_config = {
'remote': {
'method': 'GET',
'url_template': f'{full_path}{{file_name}}',
'headers_template': headers_template,
'cache_config': {'cache_size': cache_size},
},
'query_parser_config': query_parser_config
}
return await self.add(index_engine_config, index_name=index_name)

async def add_local_index(self, index_name, full_path, query_parser_config=None):
index_engine_config = {
'file': {
'path': full_path,
},
'query_parser_config': query_parser_config
}
return await self.add(index_engine_config, index_name=index_name)

async def add(self, index_engine_config, index_name: str) -> index_service_pb2.IndexAttributes:
parsed_index_engine_config = index_service_pb2.IndexEngineConfig()
ParseDict(index_engine_config, parsed_index_engine_config)
index_attributes_bytes = await self.index_registry.add(
parsed_index_config.SerializeToString(),
parsed_index_engine_config.SerializeToString(),
index_name=index_name,
)
index_attributes = index_service_pb2.IndexAttributes()
Expand Down

0 comments on commit c52d19b

Please sign in to comment.