from typing import List, Optional from dataclasses import dataclass from enum import Enum import torch from ask_candid.base.lambda_base import LambdaInvokeBase @dataclass(slots=True) class Encoding: inputs: List[str] vectors: torch.Tensor class CandidSLM(LambdaInvokeBase): """Wrapper around Candid's custom small language model. For more details see https://dev.azure.com/guidestar/DataScience/_git/graph-ai?path=/releases/language. This services includes: * text encoding * document summarization * entity salience estimation Parameters ---------- access_key : Optional[str], optional AWS access key, by default None secret_key : Optional[str], optional AWS secret key, by default None """ class Tasks(Enum): # pylint: disable=missing-class-docstring ENCODE = "/encode" DOCUMENT_SUMMARIZE = "/document/summarize" DOCUMENT_NER_SALIENCE = "/document/entitySalience" def __init__( self, access_key: Optional[str] = None, secret_key: Optional[str] = None ) -> None: super().__init__( function_name="small-lm", access_key=access_key, secret_key=secret_key ) def encode(self, text: List[str]) -> Encoding: response = self._submit_request({"text": text, "path": self.Tasks.ENCODE.value}) output = Encoding( inputs=(response.get("inputs") or []), vectors=torch.tensor((response.get("vectors") or []), dtype=torch.float32) ) return output