In [1]:
!wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.0.0-linux-x86_64.tar.gz
!tar -xzf elasticsearch-7.0.0-linux-x86_64.tar.gz
!chown -R daemon:daemon elasticsearch-7.0.0
--2020-11-16 07:51:33--  https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.0.0-linux-x86_64.tar.gz
Resolving artifacts.elastic.co (artifacts.elastic.co)... 151.101.2.222, 151.101.66.222, 151.101.130.222, ...
Connecting to artifacts.elastic.co (artifacts.elastic.co)|151.101.2.222|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 346760090 (331M) [application/x-gzip]
Saving to: ‘elasticsearch-7.0.0-linux-x86_64.tar.gz’

elasticsearch-7.0.0 100%[===================>] 330.70M   160MB/s    in 2.1s    

2020-11-16 07:51:35 (160 MB/s) - ‘elasticsearch-7.0.0-linux-x86_64.tar.gz’ saved [346760090/346760090]

In [2]:
!./elasticsearch-7.0.0/bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.0.0/elasticsearch-analysis-ik-7.0.0.zip
!./elasticsearch-7.0.0/bin/elasticsearch-plugin list
-> Downloading https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.0.0/elasticsearch-analysis-ik-7.0.0.zip
[=================================================] 100%   
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@     WARNING: plugin requires additional permissions     @
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
* java.net.SocketPermission * connect,resolve
See http://docs.oracle.com/javase/8/docs/technotes/guides/security/permissions.html
for descriptions of what these permissions allow and the associated risks.

Continue with installation? [y/N]y
-> Installed analysis-ik
analysis-ik
In [3]:
!pip install elasticsearch -q
     |████████████████████████████████| 327kB 4.3MB/s 
In [4]:
from subprocess import Popen, PIPE, STDOUT
from elasticsearch import Elasticsearch
from elasticsearch import helpers

import os
import time
import pprint
In [5]:
es_server = Popen(
  ['elasticsearch-7.0.0/bin/elasticsearch'],
  stdout = PIPE, stderr = STDOUT,
  preexec_fn = lambda: os.setuid(1))
In [6]:
!curl -X GET "localhost:9200/"
{
  "name" : "4a45748a87d6",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "UCXbtORgQ1mHcrvWHRMJjg",
  "version" : {
    "number" : "7.0.0",
    "build_flavor" : "default",
    "build_type" : "tar",
    "build_hash" : "b7e28a7",
    "build_date" : "2019-04-05T22:55:32.697037Z",
    "build_snapshot" : false,
    "lucene_version" : "8.0.0",
    "minimum_wire_compatibility_version" : "6.7.0",
    "minimum_index_compatibility_version" : "6.0.0-beta1"
  },
  "tagline" : "You Know, for Search"
}
In [7]:
def gen_data():
  with open('/content/gdrive/My Drive/finch/es/free_chat/data/basic.txt') as f:
    for line in f:
      line = line.rstrip()
      q, a = line.split('<SEP>')
      yield {
        '_index': 'chatbot',
        'question': q,
        'answer': a,}
In [8]:
from google.colab import drive
drive.mount('/content/gdrive')
Mounted at /content/gdrive
In [9]:
es = Elasticsearch()
print(es.ping())

es.indices.create(index='chatbot')

mapping = {
  'properties': {
    'question': {
      'type': 'text',
      'analyzer': 'ik_max_word',
      'search_analyzer': 'ik_max_word',}}}
es.indices.put_mapping(body=mapping, index='chatbot')

helpers.bulk(es, gen_data())
True
Out[9]:
(366, [])
In [10]:
correct, total = 0., 0.
while True:
  text_inp = input('Input:')
  if text_inp == '|quit':
    break
  t0 = time.time()
  dsl = {
    'query': {
      'match': {
        'question': text_inp,}}}
  hits = es.search(index='chatbot', body=dsl)['hits']['hits']
  print('Match:', hits[0]['_source'])
  print('%.2f sec' % (time.time() - t0))
  text_inp = input('Is the answer correct?')
  if text_inp == 'yes':
    correct += 1.
  total += 1
  print()
print('Correct: {} | Total: {} | Accuracy: {:.3f}'.format(correct, total, correct/total))
Input:早安
Match: {'question': '早安', 'answer': '早安'}
0.09 sec
Is the answer correct?yes

Input:天气
Match: {'question': '天气', 'answer': '天气还行 要查具体的吗'}
0.03 sec
Is the answer correct?yes

Input:几点了
Match: {'question': '现在几点了', 'answer': '要查一下现在的具体时间吗'}
0.03 sec
Is the answer correct?yes

Input:冷
Match: {'question': '冷', 'answer': '多穿衣服哈'}
0.01 sec
Is the answer correct?yes

Input:热死了
Match: {'question': '热', 'answer': '对啊, 热死人了'}
0.01 sec
Is the answer correct?yes

Input:激动死了
Match: {'question': '太激动了', 'answer': '发生什么好事了 说来听听'}
0.01 sec
Is the answer correct?yes

Input:我伤心
Match: {'question': '我好伤心', 'answer': '怎么了 说来听听'}
0.01 sec
Is the answer correct?yes

Input:厉害了
Match: {'question': '你好厉害啊', 'answer': '这么夸我我会骄傲的'}
0.01 sec
Is the answer correct?yes

Input:我爱你
Match: {'question': '我爱你', 'answer': '我也爱你'}
0.01 sec
Is the answer correct?yes

Input:我喜欢你
Match: {'question': '我喜欢看书', 'answer': '书是人类进步的阶梯'}
0.01 sec
Is the answer correct?no

Input:|quit
Correct: 9.0 | Total: 10.0 | Accuracy: 0.900