- pip3 install Tensorflow
- pip3 install Keras
- pip3 install nltk
- pip3 install pandas
- import nltk
- import ssl
- from nltk.stem.lancaster import LancasterStemmer
- stemmer = LancasterStemmer()
- import numpy as np
- from keras.models import Sequential
- from keras.layers import Dense, Activation, Dropout
- from keras.optimizers import SGD
- import pandas as pd
- import pickle
- import random
Resource punkt not found
- import nltk
- nltk.download('punkt')
- https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
- C:\Users\liuyue\tokenizers\nltk_data\punkt
- intents = {"intents": [
- {"tag": "打招呼",
- "patterns": ["你好", "您好", "请问", "有人吗", "师傅","不好意思","美女","帅哥","靓妹","hi"],
- "responses": ["您好", "又是您啊", "吃了么您内","您有事吗"],
- "context": [""]
- },
- {"tag": "告别",
- "patterns": ["再见", "拜拜", "88", "回见", "回头见"],
- "responses": ["再见", "一路顺风", "下次见", "拜拜了您内"],
- "context": [""]
- },
- ]
- }
- for intent in intents['intents']:
- for pattern in intent['patterns']:
- # tokenize each word in the sentence
- w = nltk.word_tokenize(pattern)
- # add to our words list
- words.extend(w)
- # add to documents in our corpus
- documents.append((w, intent['tag']))
- # add to our classes list
- if intent['tag'] not in classes:
- classes.append(intent['tag'])
- words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
- words = sorted(list(set(words)))
- classes = sorted(list(set(classes)))
- print (len(classes), "语境", classes)
- print (len(words), "词数", words)
- 2 语境 ['告别', '打招呼']
- 14 词数 ['88', '不好意思', '你好', '再见', '回头见', '回见', '帅哥', '师傅', '您好', '拜拜', '有人吗', '美女', '请问', '靓妹']
- # create our training data
- training = []
- # create an empty array for our output
- output_empty = [0] * len(classes)
- # training set, bag of words for each sentence
- for doc in documents:
- # initialize our bag of words
- bag = []
- pattern_words = doc[0]
- pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
- for w in words:
- bag.append(1) if w in pattern_words else bag.append(0)
- output_row = list(output_empty)
- output_row[classes.index(doc[1])] = 1
- training.append([bag, output_row])
- random.shuffle(training)
- training = np.array(training)
- train_x = list(training[:,0])
- train_y = list(training[:,1])
- model = Sequential()
- model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
- model.add(Dropout(0.5))
- model.add(Dense(64, activation='relu'))
- model.add(Dropout(0.5))
- model.add(Dense(len(train_y[0]), activation='softmax'))
- sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
- model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
- model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
- 14/14 [==============================] - 0s 32ms/step - loss: 0.7305 - acc: 0.5000
- Epoch 2/200
- 14/14 [==============================] - 0s 391us/step - loss: 0.7458 - acc: 0.4286
- Epoch 3/200
- 14/14 [==============================] - 0s 390us/step - loss: 0.7086 - acc: 0.3571
- Epoch 4/200
- 14/14 [==============================] - 0s 395us/step - loss: 0.6941 - acc: 0.6429
- Epoch 5/200
- 14/14 [==============================] - 0s 426us/step - loss: 0.6358 - acc: 0.7143
- Epoch 6/200
- 14/14 [==============================] - 0s 356us/step - loss: 0.6287 - acc: 0.5714
- Epoch 7/200
- 14/14 [==============================] - 0s 366us/step - loss: 0.6457 - acc: 0.6429
- Epoch 8/200
- 14/14 [==============================] - 0s 899us/step - loss: 0.6336 - acc: 0.6429
- Epoch 9/200
- 14/14 [==============================] - 0s 464us/step - loss: 0.5815 - acc: 0.6429
- Epoch 10/200
- 14/14 [==============================] - 0s 408us/step - loss: 0.5895 - acc: 0.6429
- Epoch 11/200
- 14/14 [==============================] - 0s 548us/step - loss: 0.6050 - acc: 0.6429
- Epoch 12/200
- 14/14 [==============================] - 0s 468us/step - loss: 0.6254 - acc: 0.6429
- Epoch 13/200
- 14/14 [==============================] - 0s 388us/step - loss: 0.4990 - acc: 0.7857
- Epoch 14/200
- 14/14 [==============================] - 0s 392us/step - loss: 0.5880 - acc: 0.7143
- Epoch 15/200
- 14/14 [==============================] - 0s 370us/step - loss: 0.5118 - acc: 0.8571
- Epoch 16/200
- 14/14 [==============================] - 0s 457us/step - loss: 0.5579 - acc: 0.7143
- Epoch 17/200
- 14/14 [==============================] - 0s 432us/step - loss: 0.4535 - acc: 0.7857
- Epoch 18/200
- 14/14 [==============================] - 0s 357us/step - loss: 0.4367 - acc: 0.7857
- Epoch 19/200
- 14/14 [==============================] - 0s 384us/step - loss: 0.4751 - acc: 0.7857
- Epoch 20/200
- 14/14 [==============================] - 0s 346us/step - loss: 0.4404 - acc: 0.9286
- Epoch 21/200
- 14/14 [==============================] - 0s 500us/step - loss: 0.4325 - acc: 0.8571
- Epoch 22/200
- 14/14 [==============================] - 0s 400us/step - loss: 0.4104 - acc: 0.9286
- Epoch 23/200
- 14/14 [==============================] - 0s 738us/step - loss: 0.4296 - acc: 0.7857
- Epoch 24/200
- 14/14 [==============================] - 0s 387us/step - loss: 0.3706 - acc: 0.9286
- Epoch 25/200
- 14/14 [==============================] - 0s 430us/step - loss: 0.4213 - acc: 0.8571
- Epoch 26/200
- 14/14 [==============================] - 0s 351us/step - loss: 0.2867 - acc: 1.0000
- Epoch 27/200
- 14/14 [==============================] - 0s 3ms/step - loss: 0.2903 - acc: 1.0000
- Epoch 28/200
- 14/14 [==============================] - 0s 366us/step - loss: 0.3010 - acc: 0.9286
- Epoch 29/200
- 14/14 [==============================] - 0s 404us/step - loss: 0.2466 - acc: 0.9286
- Epoch 30/200
- 14/14 [==============================] - 0s 428us/step - loss: 0.3035 - acc: 0.7857
- Epoch 31/200
- 14/14 [==============================] - 0s 407us/step - loss: 0.2075 - acc: 1.0000
- Epoch 32/200
- 14/14 [==============================] - 0s 457us/step - loss: 0.2167 - acc: 0.9286
- Epoch 33/200
- 14/14 [==============================] - 0s 613us/step - loss: 0.1266 - acc: 1.0000
- Epoch 34/200
- 14/14 [==============================] - 0s 534us/step - loss: 0.2906 - acc: 0.9286
- Epoch 35/200
- 14/14 [==============================] - 0s 463us/step - loss: 0.2560 - acc: 0.9286
- Epoch 36/200
- 14/14 [==============================] - 0s 500us/step - loss: 0.1686 - acc: 1.0000
- Epoch 37/200
- 14/14 [==============================] - 0s 387us/step - loss: 0.0922 - acc: 1.0000
- Epoch 38/200
- 14/14 [==============================] - 0s 430us/step - loss: 0.1620 - acc: 1.0000
- Epoch 39/200
- 14/14 [==============================] - 0s 371us/step - loss: 0.1104 - acc: 1.0000
- Epoch 40/200
- 14/14 [==============================] - 0s 488us/step - loss: 0.1330 - acc: 1.0000
- Epoch 41/200
- 14/14 [==============================] - 0s 381us/step - loss: 0.1322 - acc: 1.0000
- Epoch 42/200
- 14/14 [==============================] - 0s 462us/step - loss: 0.0575 - acc: 1.0000
- Epoch 43/200
- 14/14 [==============================] - 0s 1ms/step - loss: 0.1137 - acc: 1.0000
- Epoch 44/200
- 14/14 [==============================] - 0s 450us/step - loss: 0.0245 - acc: 1.0000
- Epoch 45/200
- 14/14 [==============================] - 0s 470us/step - loss: 0.1824 - acc: 1.0000
- Epoch 46/200
- 14/14 [==============================] - 0s 444us/step - loss: 0.0822 - acc: 1.0000
- Epoch 47/200
- 14/14 [==============================] - 0s 436us/step - loss: 0.0939 - acc: 1.0000
- Epoch 48/200
- 14/14 [==============================] - 0s 396us/step - loss: 0.0288 - acc: 1.0000
- Epoch 49/200
- 14/14 [==============================] - 0s 580us/step - loss: 0.1367 - acc: 0.9286
- Epoch 50/200
- 14/14 [==============================] - 0s 351us/step - loss: 0.0363 - acc: 1.0000
- Epoch 51/200
- 14/14 [==============================] - 0s 379us/step - loss: 0.0272 - acc: 1.0000
- Epoch 52/200
- 14/14 [==============================] - 0s 358us/step - loss: 0.0712 - acc: 1.0000
- Epoch 53/200
- 14/14 [==============================] - 0s 4ms/step - loss: 0.0426 - acc: 1.0000
- Epoch 54/200
- 14/14 [==============================] - 0s 370us/step - loss: 0.0430 - acc: 1.0000
- Epoch 55/200
- 14/14 [==============================] - 0s 368us/step - loss: 0.0292 - acc: 1.0000
- Epoch 56/200
- 14/14 [==============================] - 0s 494us/step - loss: 0.0777 - acc: 1.0000
- Epoch 57/200
- 14/14 [==============================] - 0s 356us/step - loss: 0.0496 - acc: 1.0000
- Epoch 58/200
- 14/14 [==============================] - 0s 427us/step - loss: 0.1485 - acc: 1.0000
- Epoch 59/200
- 14/14 [==============================] - 0s 381us/step - loss: 0.1006 - acc: 1.0000
- Epoch 60/200
- 14/14 [==============================] - 0s 421us/step - loss: 0.0183 - acc: 1.0000
- Epoch 61/200
- 14/14 [==============================] - 0s 344us/step - loss: 0.0788 - acc: 0.9286
- Epoch 62/200
- 14/14 [==============================] - 0s 529us/step - loss: 0.0176 - acc: 1.0000
- def clean_up_sentence(sentence):
- # tokenize the pattern - split words into array
- sentence_words = nltk.word_tokenize(sentence)
- # stem each word - create short form for word
- sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
- return sentence_words
- def bow(sentence, words, show_details=True):
- # tokenize the pattern
- sentence_words = clean_up_sentence(sentence)
- # bag of words - matrix of N words, vocabulary matrix
- bag = [0]*len(words)
- for s in sentence_words:
- for i,w in enumerate(words):
- if w == s:
- # assign 1 if current word is in the vocabulary position
- bag[i] = 1
- if show_details:
- print ("found in bag: %s" % w)
- return(np.array(bag))
- p = bow("你好", words)
- print (p)
- found in bag: 你好
- [0 0 1 0 0 0 0 0 0 0 0 0 0 0]
- def classify_local(sentence):
- # generate probabilities from the model
- input_data = pd.DataFrame([bow(sentence, words)], dtype=float, index=['input'])
- results = model.predict([input_data])[0]
- # filter out predictions below a threshold, and provide intent index
- results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
- # sort by strength of probability
- results.sort(key=lambda x: x[1], reverse=True)
- return_list = []
- for r in results:
- return_list.append((classes[r[0]], str(r[1])))
- # return tuple of intent and probability
- return return_list
- print(classify_local('您好'))
- found in bag: 您好
- [('打招呼', '0.999913')]
- liuyue:mytornado liuyue$
- print(classify_local('88'))
- found in bag: 88
- [('告别', '0.9995449')]
- model.save("./v3u.h5")
- import random
- import uvicorn
- from fastapi import FastAPI
- app = FastAPI()
- def classify_local(sentence):
- # generate probabilities from the model
- input_data = pd.DataFrame([bow(sentence, words)], dtype=float, index=['input'])
- results = model.predict([input_data])[0]
- # filter out predictions below a threshold, and provide intent index
- results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
- # sort by strength of probability
- results.sort(key=lambda x: x[1], reverse=True)
- return_list = []
- for r in results:
- return_list.append((classes[r[0]], str(r[1])))
- # return tuple of intent and probability
- return return_list
- @app.get('/')
- async def root(word: str = None):
- from keras.models import model_from_json,load_model
- model = load_model("./v3u.h5")
- wordlist = classify_local(word)
- a = ""
- for intent in intents['intents']:
- if intent['tag'] == wordlist[0][0]:
- a = random.choice(intent['responses'])
- return {'message':a}
- if __name__ == "__main__":
- uvicorn.run(app, host="", port=8000)
- from keras.models import model_from_json,load_model
- model = load_model("./v3u.h5")
- uvicorn main:app --reload