toktik-pgx
/

markuplm-large

Model card Files Files and versions Community

Tigran Tokmajyan commited on Feb 19

Commit

2e668a6

·

1 Parent(s): 093705f

Various changes

Files changed (1) hide show

handler.py +11 -2

handler.py CHANGED Viewed

@@ -11,11 +11,14 @@ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 handler.setFormatter(formatter)
 logger.addHandler(handler)
 # https://www.naveedafzal.com/posts/scraping-websites-by-asking-questions-with-markuplm/
 class EndpointHandler:
     def __init__(self, path=""):
         # Load model, tokenizer, and feature extractor
-        logger.debug("Loading model from: " + path)
         self.processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-large-finetuned-websrc")
         self.model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-large-finetuned-websrc")
@@ -47,6 +50,12 @@ class EndpointHandler:
         predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
         answer = self.processor.decode(predict_answer_tokens, skip_special_tokens=True)
         print(f"Answer: {answer}")
-        return {"answer": answer}

 handler.setFormatter(formatter)
 logger.addHandler(handler)
+# This is used https://github.com/NielsRogge/Transformers-Tutorials/blob/master/MarkupLM/Inference_with_MarkupLM_for_question_answering_on_web_pages.ipynb
 # https://www.naveedafzal.com/posts/scraping-websites-by-asking-questions-with-markuplm/
 class EndpointHandler:
     def __init__(self, path=""):
         # Load model, tokenizer, and feature extractor
+        # logger.debug("Loading model from: " + path)
+        # WE ARE CURRENTLY NOT USING OUR REPO'S MODEL
         self.processor = MarkupLMProcessor.from_pretrained("microsoft/markuplm-large-finetuned-websrc")
         self.model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-large-finetuned-websrc")
         predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
         answer = self.processor.decode(predict_answer_tokens, skip_special_tokens=True)
+        # Get the score
+        start_score = outputs.start_logits[0, answer_start_index].item()
+        end_score = outputs.end_logits[0, answer_end_index].item()
+        score = (start_score + end_score) / 2
         print(f"Answer: {answer}")
+        print(f"Score: {score}")
+        return {"answer": answer, "score": score}