Spaces:

CandidAI
/

ask-candid

Running

File size: 1,580 Bytes

from typing import List, Optional
from dataclasses import dataclass
from enum import Enum

import torch

from ask_candid.base.lambda_base import LambdaInvokeBase


@dataclass(slots=True)
class Encoding:
    inputs: List[str]
    vectors: torch.Tensor


class CandidSLM(LambdaInvokeBase):
    """Wrapper around Candid's custom small language model.
    For more details see https://dev.azure.com/guidestar/DataScience/_git/graph-ai?path=/releases/language.
    This services includes:
        * text encoding
        * document summarization
        * entity salience estimation

    Parameters
    ----------
    access_key : Optional[str], optional
        AWS access key, by default None
    secret_key : Optional[str], optional
        AWS secret key, by default None
    """

    class Tasks(Enum):  # pylint: disable=missing-class-docstring
        ENCODE = "/encode"
        DOCUMENT_SUMMARIZE = "/document/summarize"
        DOCUMENT_NER_SALIENCE = "/document/entitySalience"

    def __init__(
        self, access_key: Optional[str] = None, secret_key: Optional[str] = None
    ) -> None:
        super().__init__(
            function_name="small-lm",
            access_key=access_key,
            secret_key=secret_key
        )

    def encode(self, text: List[str]) -> Encoding:
        response = self._submit_request({"text": text, "path": self.Tasks.ENCODE.value})

        output = Encoding(
            inputs=(response.get("inputs") or []),
            vectors=torch.tensor((response.get("vectors") or []), dtype=torch.float32)
        )
        return output