-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathChatBot Development
1 lines (1 loc) · 8.05 KB
/
ChatBot Development
1
{"cells":[{"cell_type":"markdown","metadata":{"id":"kDn_lVxg3Z2G"},"source":["# ***Keerio's AI Chat Bot***\n"]},{"cell_type":"code","source":["#importing nesscessery Libraries\n","import gradio as gd\n","import os\n","import nltk\n","nltk.download('omw-1.4')\n","nltk.download('punkt')\n","nltk.download('wordnet')\n","nltk.download('stopwords')\n","from nltk.corpus import stopwords\n","from nltk.stem import WordNetLemmatizer\n","import numpy as np\n","import tensorflow as tf\n","from tensorflow.keras.layers import Dense, Dropout\n","from tensorflow.keras.models import Sequential\n","from tensorflow.keras.optimizers import SGD\n","import json\n","import pickle\n","import random"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"collapsed":true,"id":"4Bt6ouEL8le3","executionInfo":{"status":"ok","timestamp":1731828437704,"user_tz":-300,"elapsed":6284,"user":{"displayName":"G.M Keerio","userId":"17671004916695510792"}},"outputId":"41bb839f-1544-458e-832f-c16e100baf4e"},"execution_count":12,"outputs":[{"output_type":"stream","name":"stderr","text":["[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n","[nltk_data] Package omw-1.4 is already up-to-date!\n","[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data] Package punkt is already up-to-date!\n","[nltk_data] Downloading package wordnet to /root/nltk_data...\n","[nltk_data] Package wordnet is already up-to-date!\n","[nltk_data] Downloading package stopwords to /root/nltk_data...\n","[nltk_data] Package stopwords is already up-to-date!\n"]}]},{"cell_type":"code","source":["\n","lemmatizer = WordNetLemmatizer()\n","\n","words = []\n","classes = []\n","documents = []\n","ignore_words = [\"?\", \"!\"]\n","intents = json.loads(open(\"/content/dev-v1.1.json\").read())\n","#looping and lemitizaing the words\n","for intent in intents[\"intents\"]:\n"," for pattern in intent['patterns']:\n"," w = nltk.word_tokenize(pattern)\n"," w = [lemmatizer.lemmatize(word.lower()) for word in w if word not in ignore_words]\n"," words.extend(w)\n"," documents.append((w, intent['tag']))\n"," if intent['tag'] not in classes:\n"," classes.append(intent['tag'])\n","\n","# Deduplication and sorting\n","words = sorted(list(set(words)))\n","classes = sorted(list(set(classes)))\n","\n","# Saving words and classes with pickle\n","pickle.dump(words, open('words.pkl', 'wb'))\n","pickle.dump(classes, open('classes.pkl', 'wb'))\n","\n","print(len(documents), \"documents\")\n","print(len(classes), \"classes\", classes)\n","print(len(words), \"unique lemmatized words\", words)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":211},"id":"PW0FdYCz85J-","executionInfo":{"status":"error","timestamp":1731827869517,"user_tz":-300,"elapsed":474,"user":{"displayName":"G.M Keerio","userId":"17671004916695510792"}},"outputId":"56076031-6dbf-460f-d50a-d8a73dbc7eb4"},"execution_count":4,"outputs":[{"output_type":"error","ename":"KeyError","evalue":"'intents'","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)","\u001b[0;32m<ipython-input-4-918b7424b340>\u001b[0m in \u001b[0;36m<cell line: 9>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mintents\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/content/dev-v1.1.json\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;31m#looping and lemitizaing the words\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mintent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mintents\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"intents\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mpattern\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mintent\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'patterns'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mword_tokenize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mKeyError\u001b[0m: 'intents'"]}]},{"cell_type":"code","execution_count":null,"metadata":{"id":"42bPTEVjHshz"},"outputs":[],"source":["\n","training = []\n","output_empty = [0] * len(classes)\n","\n","for document in documents:\n"," bag = []\n"," pattern_words = document[0]\n"," pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]\n"," for word in words:\n"," bag.append(1 if word in pattern_words else 0)\n","\n"," output_row = list(output_empty)\n"," output_row[classes.index(document[1])] = 1\n","\n"," training.append([bag, output_row])\n","\n","random.shuffle(training)\n","training = np.array(training, dtype=object)\n","\n","# Splitting the data into train_x and train_y\n","train_x = np.array(list(training[:, 0]))\n","train_y = np.array(list(training[:, 1]))\n","\n","# Model definition\n","model = Sequential()\n","model.add(Dense(128, input_shape=(len(train_x[0]), ), activation='relu'))\n","model.add(Dropout(0.5))\n","model.add(Dense(64, activation='relu'))\n","model.add(Dropout(0.5))\n","model.add(Dense(len(train_y[0]), activation='softmax'))\n","\n","# Compile model\n","sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)\n","model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])\n","\n","# Training\n","model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=1)\n","\n","# Save the model\n","model.save(\"chatbot_model.h5\")\n","print(\"Model created and saved successfully!\")\n","\n","def clean_up_sentence(sentence):\n"," sentence_words = nltk.word_tokenize(sentence)\n"," sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]\n"," return sentence_words\n","\n","def bow(sentence, words, show_details=True):\n"," sentence_words = clean_up_sentence(sentence)\n"," bag = [0] * len(words)\n"," for s in sentence_words:\n"," for i, w in enumerate(words):\n"," if w == s:\n"," bag[i] = 1\n"," if show_details:\n"," print(\"found in bag: %s\" % w)\n"," return np.array(bag)\n","\n","def predict_class(sentence, model):\n"," p = bow(sentence, words, show_details=False)\n"," res = model.predict(np.array([p]))[0]\n"," ERROR_THRESHOLD = 0.25\n"," results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]\n"," results.sort(key=lambda x: x[1], reverse=True)\n"," return_list = []\n"," for r in results:\n"," return_list.append({\"intent\": classes[r[0]], \"probability\": str(r[1])})\n"," return return_list\n","\n","def get_response(ints, intents_json):\n"," tag = ints[0]['intent'] if ints else 'default'\n"," list_of_intents = intents_json['intents']\n"," for i in list_of_intents:\n"," if i['tag'] == tag:\n"," result = random.choice(i['responses'])\n"," break\n"," return result\n","\n","# Running a prediction (commented out for script correction purposes)\n","while True:\n"," message = input(\"You: \")\n"," ints = predict_class(message, model)\n"," res = get_response(ints, intents)\n"," print(\"Bot:\", res)"]}],"metadata":{"colab":{"provenance":[{"file_id":"/v2/external/notebooks/snippets/importing_libraries.ipynb","timestamp":1701852097748}]},"kernelspec":{"display_name":"Python 3","name":"python3"},"vp":{"vp_note_display":false,"vp_note_width":0,"vp_section_display":false}},"nbformat":4,"nbformat_minor":0}