Spaces:
Running
Running
File size: 1,580 Bytes
a0e37e2 c751e97 a0e37e2 c751e97 a0e37e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from typing import List, Optional
from dataclasses import dataclass
from enum import Enum
import torch
from ask_candid.base.lambda_base import LambdaInvokeBase
@dataclass(slots=True)
class Encoding:
inputs: List[str]
vectors: torch.Tensor
class CandidSLM(LambdaInvokeBase):
"""Wrapper around Candid's custom small language model.
For more details see https://dev.azure.com/guidestar/DataScience/_git/graph-ai?path=/releases/language.
This services includes:
* text encoding
* document summarization
* entity salience estimation
Parameters
----------
access_key : Optional[str], optional
AWS access key, by default None
secret_key : Optional[str], optional
AWS secret key, by default None
"""
class Tasks(Enum): # pylint: disable=missing-class-docstring
ENCODE = "/encode"
DOCUMENT_SUMMARIZE = "/document/summarize"
DOCUMENT_NER_SALIENCE = "/document/entitySalience"
def __init__(
self, access_key: Optional[str] = None, secret_key: Optional[str] = None
) -> None:
super().__init__(
function_name="small-lm",
access_key=access_key,
secret_key=secret_key
)
def encode(self, text: List[str]) -> Encoding:
response = self._submit_request({"text": text, "path": self.Tasks.ENCODE.value})
output = Encoding(
inputs=(response.get("inputs") or []),
vectors=torch.tensor((response.get("vectors") or []), dtype=torch.float32)
)
return output
|