-
Notifications
You must be signed in to change notification settings - Fork 1
/
flask_api.py
89 lines (78 loc) · 3.58 KB
/
flask_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 09 2021
@author: Rishabbh sahu
"""
import os
import pickle
import numpy as np
import json
from readers.reader import Reader
from text_preprocessing.vectorizer import BERT_PREPROCESSING_FAST
from model import JOINT_TEXT_MODEL
from flask import Flask, jsonify, request ## Important module to create and expose end points
# Create app
app = Flask(__name__)
#initialize the app
def initialize():
# Declare global variables to make them accessible throughout
global model_tokenizer
# initializing the model tokenizer to be used for creating sub-tokens
model_tokenizer = BERT_PREPROCESSING_FAST(max_seq_length=config['MAX_SEQ_LEN'])
# loading models and necessary artifacts
print('Loading models and artifacts...')
if not os.path.exists(load_folder_path):
print('Folder `%s` not exist' % load_folder_path)
global slot_encoder
# slot encoder used before model training
with open(os.path.join(load_folder_path, 'slot_label_encoder.pkl'), 'rb') as handle:
slot_encoder = pickle.load(handle)
global sequence_label_encoder
# sequence encoder used before model training
with open(os.path.join(load_folder_path, 'sequence_label_encoder.pkl'), 'rb') as handle:
sequence_label_encoder = pickle.load(handle)
global model
# Reading model parameters saved while training the model
with open(os.path.join(load_folder_path, 'model_params.json'), 'r') as json_file:
model_params = json.load(json_file)
# Initiate the model object with the same architecture
model = JOINT_TEXT_MODEL(slots_num=model_params['num_slot_classes'],intents_num=model_params['num_sequence_classes'],
model_path=model_params['model_path'],learning_rate=model_params['learning_rate'])
model.load(load_folder_path)
# Default landing page with caption
@app.route('/', methods=['GET', 'POST'])
def hello():
return 'Hello from NLU inference routine'
@app.route('/predict', methods=['GET', 'POST'])
def predict():
input_json = request.json
utterance = input_json["utterance"]
# Encode the input text using Huggingface fast tokenizer
encodings = model_tokenizer.fastTokenizer([utterance.split()], is_split_into_words=True)
input_txt = model_tokenizer.create_model_input(encodings)
slots, intent = model.predict(input_txt)
predicted_intent = sequence_label_encoder.inverse_transform([np.argmax(intent)])
slots = np.argmax(slots, axis=-1)
list_without_pad = [item for sublist in slots for item in sublist if item > 0]
# Removing CLS and SEP tokens from the prediction
pred_tags = slot_encoder.inverse_transform(list_without_pad[1:-1])
annotations = [{word:tag} for word, tag in zip(model_tokenizer.fastTokenizer.tokenize(utterance), pred_tags)]
# create response to be seen as inference output in dictionary format converted to json
response = {
"intent": {
"name": str(predicted_intent[0]),
},
"algo": "Joint text model",
"annotations": annotations
}
return jsonify(response) # use Postman app or Curl to get the model response
if __name__ == '__main__':
# Read the configuration we used while training the model
configuration_file_path = 'config.yaml'
config = {}
config.update(Reader.read_yaml_from_file(configuration_file_path))
load_folder_path = os.path.join(config['saved_model_dir_path'], config['model_name'], config['model_version'])
print(('Starting the Server'))
initialize()
# Run app
app.run(host='0.0.0.0', port=8888, debug=False, use_reloader=False)