import json
import spacy
from spacy.language import Language
import os

class PipelineWrapper: 
    """
    Pipeline Wrapper for the project 'Frauenerwerbstätigkeit' with the Bertelsmann Foundation team.
    Loads pre-defined patterns from a json file and adds them to a span ruler in a spacy pipeline
    """

    def __init__(self, path: str) -> None: 
        self.patterns = []
        self.nlp: Language = spacy.load("de_core_news_sm")
        self.load_patterns(os.path.join(path, "data", "2024-08-16_patterns_1192.json"))
        self.add_span_ruler()


    def load_patterns(self, path: str):
        """
        load patterns from json file in spacy pattern format

        Parameters
        ----------
        path: path to pattern json file
        """
        with open(file=path, mode="r") as fp:
            self.patterns = json.load(fp=fp)


    def add_span_ruler(self):
        """
        Add a span ruler to the nlp pipeline
        """
        config = {"spans_key": None, "annotate_ents": True, "overwrite": False}
        ruler = self.nlp.add_pipe("span_ruler")
        ruler.add_patterns(self.patterns)


    def __call__(self, queries: list)-> None:
        """
        call method for pipeline
        """
        return self.bulk_predict(queries=queries)

    def bulk_predict(self, queries: list) -> list:
        """
        Bulk predicts the classes

        Parameters
        ----------
        queries: list of dictionaries containing this stucture: 
        {"posting_id": uuid, "text": str}

        Returns
        ----------
        list of dictionaries containing this structure:
        [{"posting_id": , "concept": , }, {"posting_id": , "concept": ...}]
        """
       

        extractions = []


        for entry, doc in zip(queries, self.nlp.pipe((q["text"].lower() for q in queries), disable=["ner", "tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"])):
            extraction = [span.label_ for span in doc.spans["ruler"]]


            # no concept found
            if not extraction:
                extractions.append({"posting_id": entry["posting_id"], "concept": None})

            # concept found
            else:
                # one entry for each found concept
                for el in extraction:
                    extractions.append({"posting_id": entry["posting_id"], "concept": el})
       
        return extractions