diff --git a/chat.py b/chat.py
new file mode 100644
index 0000000..ce285ec
--- /dev/null
+++ b/chat.py
@@ -0,0 +1,106 @@
+"""
+Inference
+"""
+import numpy as np
+import tensorflow as tf
+import maximal
+from tqdm import tqdm
+
+from config import config
+from models import load_or_build_model
+
+
+def generate_text(
+        prompt: str,
+        char2idx: dict,
+        idx2char: dict,
+        n: int = config.N_GENERATION,
+        temperature: float = config.TEMPERATURE,
+        k: int = config.TOP_K_SAMPLE
+    ) -> str:
+    """
+    Inference time for the GPT.
+
+    Args:
+        prompt (str): input text
+        char2idx (dict): char -> idx mapping
+        idx2char (dict): idx -> char mapping (inverse of original char2idx)
+        n (int): number of tokens to be generated
+        temperature (float): noise in the output probability
+                            (>1. = noisy sampling; <1. = conservative sampling.)
+        k (int): restricts to number of top-k tokens to be sampled from
+
+    Returns:
+        generated_text (str): GPT completion
+    """
+    # If prompt is shorter than INPUT_LENGTH raise error (no padding in this simple tutorial)
+    assert len(prompt) >= config.INPUT_LENGTH, f"Prompt must be of {config.INPUT_LENGTH} character length"
+
+    # If prompt is longer than INPUT_LENGTH crop it to last piece
+    if prompt > config.INPUT_LENGTH:
+        prompt = prompt[-config.INPUT_LENGTH:]
+
+    generated_text = []
+
+    for i in tqdm(range(n)):
+        # vectorize prompt and adjust np.array shape
+        vectorized_text = [char2idx[c] for c in prompt]
+        vectorized_text = np.array(vectorized_text).reshape((1, len(vectorized_text)))
+
+        # next token prediction
+        pred = gpt.predict(vectorized_text, verbose=0)
+        pred = np.squeeze(pred[:, -1, :])
+
+        # temperature scaling
+        pred /= temperature
+
+        # restrict sampling to top k tokens
+        probs, indices = tf.math.top_k(pred, k, sorted=True)
+
+        # sample token id
+        probs = tf.nn.softmax(probs).numpy()
+        pred_id = np.random.choice(indices, p=probs)
+
+        # update prompt
+        next_char = idx2char[pred_id]
+        prompt = prompt[1:] + next_char
+        generated_text.append(next_char)
+
+    generated_text = ''.join(generated_text)
+
+    return generated_text
+
+
+def nlg():
+    """
+    Natural Language Generation.
+    Starts an infinite loop that can be broken only via Ctrl+C or by
+    typing "exit" as prompt.
+    """
+    # Load model
+    print(f"Loading model: {config.MODEL_NAME}.h5")
+    gpt = tf.keras.models.load_model(os.path.join(os.getcwd(), "saved_models", config.MODEL_NAME))
+    print("Completed.")
+
+    print(config.MSG_GREETINGS)
+
+    # Start infinite loop
+    while true:
+        prompt = input("\nUser:\n")
+
+        if prompt < config.INPUT_LENGTH:
+            print(f"Please provide a prompt of {config.INPUT_LENGTH}")
+
+            # If prompt too short send a shakespearean message
+            print(config.MSG_INPUT_TOO_SHORT.format(config.INPUT_LENGTH))
+            continue
+        elif prompt == "exit":
+            print(config.MSG_FAREWELL)
+            quit()
+
+        generated_text = generate_text(prompt=prompt)
+        print(f"\nShakespeare-GPT:\n{generated_text}\n")
+
+
+if __name__ == "__main__":
+    nlg()
\ No newline at end of file
diff --git a/config.py b/config.py
index 64ce1f9..6e98536 100644
--- a/config.py
+++ b/config.py
@@ -3,8 +3,16 @@
 """
 from utils import StrMessages
 
+
 class config(StrMessages):
     MODEL_NAME = "gpt_maximal_00"
+
+    # NLG
+    N_GENERATION = 1000
+    TEMPERATURE = 1.0
+    TOP_K_SAMPLE = 10
+
+    # Model architecture
     INPUT_LENGTH = 128
     DEPTH = 512
     HEADS = 4
diff --git a/model.py b/model.py
index 55d76fd..8847426 100644
--- a/model.py
+++ b/model.py
@@ -13,17 +13,20 @@
 from config import config
 
 
-def build_model():
+def build_model() -> tf.keras.models.Model:
     """
     Builds a GPT using Maximal and TensorFlow.
     Args:   / (just needs config params)
     Returns: GPT model (tf.keras.models.Model)
     """
     # Define nodes of the graph
-    input_batch = Input(shape=(INPUT_LENGTH,), dtype=tf.int32)
-    embedding = PositionalEmbedding(INPUT_LENGTH, VOCAB_SIZE, DEPTH)
-    gpt_layers = [GPTLayer(depth=DEPTH, heads=HEADS, ff_nodes=FF_NODES) for _ in range(N_LAYERS)]
-    classification_layer = Dense(VOCAB_SIZE)
+    input_batch = Input(shape=(config.INPUT_LENGTH,), dtype=tf.int32)
+
+    embedding = PositionalEmbedding(config.INPUT_LENGTH, config.VOCAB_SIZE, config.DEPTH)
+
+    gpt_layers = [GPTLayer(depth=config.DEPTH, heads=config.HEADS, ff_nodes=config.FF_NODES) for _ in range(config.N_LAYERS)]
+
+    classification_layer = Dense(config.VOCAB_SIZE)
 
     # Build the computational graph
     x = embedding(input_batch)
@@ -39,18 +42,25 @@ def build_model():
     )
 
 
-def load_model():
+def load_or_build_model(verbose: bool =False) -> tf.keras.models.Model:
     """
-    If a model with a given name already exists
-    :return:
-    """
-    return gpt
-
+    Checks if a model with name MODEL_NAME is already stored in /saved_models
+    folder. If present, loads the existing one (to train it further). If not, it
+    builds a new one.
 
-def load_or_build_model():
+    Args:
+        verbose (bool): print model.summary() or not - defaults to False
+    """
+    filenames = os.listdir(os.path.join(os.getcwd(), "saved_models"))
 
-    # check if the model is
+    if config.MODEL_NAME in filenames:
+        print(f"Loading existing model: {config.MODEL_NAME}.h5")
+        gpt = tf.keras.models.load_model(os.path.join(os.getcwd(), "saved_models", config.MODEL_NAME))
+    else:
+        print(f"Creating a new model: {config.MODEL_NAME}.h5")
+        gpt = build_model()
 
-    #
+    if verbose:
+        print(gpt.summary())
 
     return gpt
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 6075ec6..3c9dd7f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
 numpy
 tensorflow>2.1
 maximal>=1.0
+matplotlib
+tqdm
\ No newline at end of file
diff --git a/train.py b/train.py
index d4f6d2b..0330825 100644
--- a/train.py
+++ b/train.py
@@ -1,7 +1,9 @@
 """
 Training
 """
+import os
 import requests
+import yaml
 
 import numpy as np
 import tensorflow as tf
@@ -12,14 +14,21 @@
 
 
 # globals
-gpt = load_or_build_model()
+gpt = load_or_build_model(verbose=True)
 optimizer = tf.keras.optimizers.Adam(learning_rate=config.LEARNING_RATE)
 
 
-def numerical_encoding(text, char_dict):
+def numerical_encoding(text: str, char_dict: dict) -> np.array:
     """
     First breaks text into a list of chars, then converts each to
     its numerical idx (np.array)
+
+    Args:
+        text (str): corpus to be vectorized
+        char_dict (dict): dictionary to map chars to indexes
+
+    Returns:
+        chars_list (np.array): vectorized corpus
     """
     chars_list = [ char for char in text ]
     chars_list = [ char_dict[char] for char in chars_list ]
@@ -27,10 +36,14 @@ def numerical_encoding(text, char_dict):
     return chars_list
 
 
-def get_text_matrix(sequence, len_input):
+def get_text_matrix(sequence: np.array, len_input: int) -> np.array:
     """
-    This generates a matrix containing all the sequences
+    Generates a matrix containing all sequences
     of length INPUT_LENGTH to be fed into the Network
+
+    Args:
+        sequence (np.array): array to be processed
+        len_input (int): length od model input
     """
     # create empty matrix
     X = np.empty((len(sequence)-len_input, len_input))
@@ -42,7 +55,17 @@ def get_text_matrix(sequence, len_input):
     return X
 
 
-def process_corpus():
+def process_corpus() -> (np.array, dict):
+    """
+    Text preprocessing steps: 1. Downloads corpus; 2. extracts set of
+    unique chars; 3. Map every char to its int; 3. vectorize text;
+    4. process vectorized text into a 2D array for model training
+    (a sliding window of text is produced)
+
+    Returns:
+        X (np.array): 2D array for model training
+        char2idx (dict): dictionary to preserve char-index mapping
+    """
     page = requests.get(config.CORPUS_URL)
     text = page.text
 
@@ -53,14 +76,13 @@ def process_corpus():
     # Map every letter in our alphabet to an int
     char2idx = {char[1]: char[0] for char in enumerate(unique_chars)}
 
-    # Produce a reverse dictionary to go back from int to str later
-    idx2char = {v: k for k, v in char2idx.items()}
-
+    # vectorize text
     encoded_text = numerical_encoding(text, char2idx)
 
+    # Sequence of vectorized chars to 2D array
     X = get_text_matrix(encoded_text, INPUT_LENGTH + 1)
 
-    return X
+    return X, char2idx
 
 
 @tf.function
@@ -79,7 +101,7 @@ def train_on_batch(x, y):
 
 
 def main():
-    X = process_corpus()
+    X, char2idx = process_corpus()
 
     loss_history = []
 
@@ -119,10 +141,15 @@ def main():
         plt.show()
 
     # Save model
-    gpt.save(os.path.join(os.getcwd(), "saved_models", config.MODEL_NAME))
+    gpt.save(os.path.join(os.getcwd(), "saved_models", config.MODEL_NAME+".h5"))
 
-    return None
+    # Save char2idx mapping as yaml
+    yaml.dump(
+        char2idx,
+        open(os.path.join(os.getcwd(), "saved_models", f"{config.MODEL_NAME}_char_idx_map.yaml"), "w")
+    )
 
+    return None
 
 
 if __name__ == "__main__":
diff --git a/utils.py b/utils.py
index 597c1b6..735503d 100644
--- a/utils.py
+++ b/utils.py
@@ -12,11 +12,6 @@ class StrMessages:
     With words of lofty prose, I shall adorn
     Thy journey through this CLI program's feed.
     
-    Thou art most welcome to this humble stage,
-    Where bytes and lines doth dance in harmony.
-    Methinks thou seeketh knowledge of this age,
-    And for thy query, I shall thee gladly see.
-    
     Inscribe the word "exit," a concise decree,
     Or wield the key combination, Ctrl-C, with glee.
     By this act, thou shalt gracefully conclude thy stay,
@@ -24,9 +19,23 @@ class StrMessages:
     """
 
     MSG_INPUT_TOO_SHORT = """
+    ** InputError: Input length too short **
+    
     Pray, kind user, if it be not too much to ask,
     I beseech thee, extend thy prompt, a greater task.
     Yet, one more thing I must humbly request,
     A length of {} characters, at its behest.
     """
 
+    MSG_FAREWELL = """
+    Fair user, thou hast spoken the chosen word,
+    "Exit" resounds, like a song of a departing bird.
+    With brevity, I bid thee a swift farewell,
+    May fortune attend thee, as thou venturest, as well.
+    
+    Fare thee well, dear user, with gratitude and grace,
+    Till we meet again, in another time and place.
+    Shakespeare-GPT awaits, shouldst thou return anew,
+    With words of wisdom and verses, old and true.
+    """
+