Spaces:
Running
Running
from typing import List, Optional | |
from dataclasses import dataclass | |
from enum import Enum | |
import torch | |
from ask_candid.base.lambda_base import LambdaInvokeBase | |
class Encoding: | |
inputs: List[str] | |
vectors: torch.Tensor | |
class CandidSLM(LambdaInvokeBase): | |
"""Wrapper around Candid's custom small language model. | |
For more details see https://dev.azure.com/guidestar/DataScience/_git/graph-ai?path=/releases/language. | |
This services includes: | |
* text encoding | |
* document summarization | |
* entity salience estimation | |
Parameters | |
---------- | |
access_key : Optional[str], optional | |
AWS access key, by default None | |
secret_key : Optional[str], optional | |
AWS secret key, by default None | |
""" | |
class Tasks(Enum): # pylint: disable=missing-class-docstring | |
ENCODE = "/encode" | |
DOCUMENT_SUMMARIZE = "/document/summarize" | |
DOCUMENT_NER_SALIENCE = "/document/entitySalience" | |
def __init__( | |
self, access_key: Optional[str] = None, secret_key: Optional[str] = None | |
) -> None: | |
super().__init__( | |
function_name="small-lm", | |
access_key=access_key, | |
secret_key=secret_key | |
) | |
def encode(self, text: List[str]) -> Encoding: | |
response = self._submit_request({"text": text, "path": self.Tasks.ENCODE.value}) | |
output = Encoding( | |
inputs=(response.get("inputs") or []), | |
vectors=torch.tensor((response.get("vectors") or []), dtype=torch.float32) | |
) | |
return output | |