In [1]:
!pip install jieba
!pip install refo
!pip install rdflib
Requirement already satisfied: jieba in /usr/local/lib/python3.6/dist-packages (0.42.1)
Requirement already satisfied: refo in /usr/local/lib/python3.6/dist-packages (0.13)
Requirement already satisfied: rdflib in /usr/local/lib/python3.6/dist-packages (5.0.0)
Requirement already satisfied: isodate in /usr/local/lib/python3.6/dist-packages (from rdflib) (0.6.0)
Requirement already satisfied: pyparsing in /usr/local/lib/python3.6/dist-packages (from rdflib) (2.4.7)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from rdflib) (1.15.0)
In [2]:
from refo import finditer, Predicate, Star, Any

import re
import rdflib
import jieba
import jieba.posseg as pseg

Knowledge

In [3]:
URI_PREFIX = 'http://kgdemo.com/'

triples = [
  ('宝马', 'is_what', '宝马(BMW)是德国豪华汽车品牌'),
  ('宝马', 'is_how', '德系大品牌值得信赖,各方面口碑都很好'),
  ('宝马', 'is_compared', '各有千秋,但是人生苦短,我选宝马'),
  ('捷豹', 'is_what', '捷豹(Jaguar)英国豪华汽车品牌,英国皇室御用品牌,1935年诞生'),
  ('宾利', 'is_how', '举世闻名的豪华汽车制造品牌,非常昂贵哦'),
  ('帕加尼', 'is_what', '帕加尼(Pagani)是一家位于意大利摩德纳的超级跑车制造商,该车厂坚持手工打造车辆,其汽车产量非常少,价格也十分昂贵'),
  ('广汽本田', 'is_what', '广汽本田汽车有限公司(原广州本田汽车有限公司;简称广汽本田)于1998年7月1日成立,它是由广州汽车集团公司与日本本田技研工业株式会社共同出资组建的合资公司,双方各占50%股份,合作年限为30年'),
  ('北京奔驰', 'is_how', '大品牌值得信赖,我经常在宝马的后视镜里看到它'),
]

graph = rdflib.Graph()
resources = set([r for triple in triples for r in triple])
resource2uri = {r: URI_PREFIX + r for r in resources}
uri2resource = {uri: r for r, uri in resource2uri.items()}
for (s, p, o) in triples:
  s_uri = rdflib.URIRef(resource2uri[s])
  p_uri = rdflib.URIRef(resource2uri[p])
  o_uri = rdflib.URIRef(resource2uri[o])
  graph.add((s_uri, p_uri, o_uri))

jieba.add_word('广汽本田')
jieba.add_word('北京奔驰')
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.792 seconds.
Prefix dict has been built successfully.

Rule

In [4]:
class W(Predicate):
  def __init__(self, token='.*', pos='.*'):
    self.token = re.compile(token + '$')
    self.pos = re.compile(pos + '$')
    super(W, self).__init__(self.match)
                  
  def match(self, word):
    m1 = self.token.match(word.token)
    m2 = self.pos.match(word.pos)
    return m1 and m2


class Rule(object):
  def __init__(self, condition=None, action=None):
    self.condition = condition
    self.action = action
          
  def apply(self, sentence):
    matches = []
    for m in finditer(self.condition, sentence):
      i, j = m.span()
      matches.extend(sentence[i:j])
    return self.action(matches)


class Word(object):
  def __init__(self, token, pos):
    self.token = token
    self.pos = pos
In [5]:
def what_is_xxx(matches):
  if len(matches) > 0:
    print("Query:", "  ".join([word.token+'|'+word.pos for word in matches]))
    for word in matches:
      if word.pos == 'nr' or word.pos == 'nz' or word.pos == 'ns' or word.pos == 'x':
        return (
        """
        PREFIX : <%s>
        
        SELECT DISTINCT ?o WHERE {
            :%s :is_what ?o .
        }
        """ % (URI_PREFIX, word.token)
        )


def how_is_xxx(matches):
  if len(matches) > 0:
    print("Query:", "  ".join([word.token+'|'+word.pos for word in matches]))
    for word in matches:
      if word.pos == 'nr' or word.pos == 'nz' or word.pos == 'ns' or word.pos == 'x':
        return (
        """
        PREFIX : <%s>
        
        SELECT DISTINCT ?o WHERE {
            :%s :is_how ?o .
        }
        """ % (URI_PREFIX, word.token)
        )


def xxx_compared_to(matches):
  if len(matches) > 0:
    print("Query:", "  ".join([word.token+'|'+word.pos for word in matches]))
    for word in matches:
      if word.pos == 'nr' or word.pos == 'v':
        return (
        """
        PREFIX : <%s>
        
        SELECT DISTINCT ?o WHERE {
            :%s :is_compared ?o .
        }
        """ % (URI_PREFIX, word.token)
        )
In [6]:
noun = (W(pos='nr') | W(pos='nz') | W(pos='ns'))

rules = [                                                 
  Rule(condition = (noun | W(pos='v')) + W('和') + (
                    noun | W(pos='v')) + Star(W('比')) + (
                    W('怎么样') | W('怎样') | (W('哪个') + (W('好') | W('更好') ) )),
       action = xxx_compared_to),  

  Rule(condition = (noun | W(pos='x')) + Star(Any(), greedy=False) + \
                   (W('怎么样') | W('怎样') | W('如何') | (W('好用') + W('吗')) ),
       action = how_is_xxx),

  Rule(condition = W(pos='r') + W('是') + (noun | W(pos='x')) | \
                   (noun | W(pos='x')) + W('是') + W(pos='r') | \
                   (W('讲解') | W('介绍') | W('了解')) + Star(Any(), greedy=False) + (noun | W(pos='x')),
       action = what_is_xxx),
]
In [7]:
for utt in ['宝马是什么',
            '我想了解一下宝马',
            '给我介绍一下宝马',
            '给我讲解一下捷豹这个汽车品牌',
            '给我介绍一下帕加尼',
            '我想了解一下广汽本田',
            '宝马这个牌子的汽车怎么样',
            '宾利这个牌子的汽车怎么样',
            '北京奔驰怎么样',
            '宝马如何呢',
            '宝马汽车好用吗',
            '宝马和奔驰比怎么样',
            '宝马和奔驰比哪个好',
            '宝马和奔驰比哪个更好',]:
  is_matched = False
  for rule in rules:
    db_query = rule.apply([Word(word, tag) for word, tag in pseg.cut(utt)])
    if db_query:
      for row in graph.query(db_query):
        print(db_query)
        print('Output:', uri2resource[row.o.toPython()])
        print()
        print()
      is_matched = True
      break
  if not is_matched:
    print('Not Matched:', [(word, tag) for word, tag in pseg.cut(utt)])
Query: 宝马|nr  是|v  什么|r

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_what ?o .
        }
        
Output: 宝马(BMW)是德国豪华汽车品牌


Query: 了解|v  一下|m  宝马|nr

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_what ?o .
        }
        
Output: 宝马(BMW)是德国豪华汽车品牌


Query: 介绍|v  一下|m  宝马|nr

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_what ?o .
        }
        
Output: 宝马(BMW)是德国豪华汽车品牌


Query: 讲解|v  一下|m  捷豹|nz

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :捷豹 :is_what ?o .
        }
        
Output: 捷豹(Jaguar)英国豪华汽车品牌,英国皇室御用品牌,1935年诞生


Query: 介绍|v  一下|m  帕加尼|nr

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :帕加尼 :is_what ?o .
        }
        
Output: 帕加尼(Pagani)是一家位于意大利摩德纳的超级跑车制造商,该车厂坚持手工打造车辆,其汽车产量非常少,价格也十分昂贵


Query: 了解|v  一下|m  广汽本田|x

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :广汽本田 :is_what ?o .
        }
        
Output: 广汽本田汽车有限公司(原广州本田汽车有限公司;简称广汽本田)于1998年7月1日成立,它是由广州汽车集团公司与日本本田技研工业株式会社共同出资组建的合资公司,双方各占50%股份,合作年限为30年


Query: 宝马|nr  这个|r  牌子|n  的|uj  汽车|n  怎么样|r

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_how ?o .
        }
        
Output: 德系大品牌值得信赖,各方面口碑都很好


Query: 宾利|ns  这个|r  牌子|n  的|uj  汽车|n  怎么样|r

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宾利 :is_how ?o .
        }
        
Output: 举世闻名的豪华汽车制造品牌,非常昂贵哦


Query: 北京奔驰|x  怎么样|r

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :北京奔驰 :is_how ?o .
        }
        
Output: 大品牌值得信赖,我经常在宝马的后视镜里看到它


Query: 宝马|nr  如何|r

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_how ?o .
        }
        
Output: 德系大品牌值得信赖,各方面口碑都很好


Query: 宝马|nr  汽车|n  好用|v  吗|y

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_how ?o .
        }
        
Output: 德系大品牌值得信赖,各方面口碑都很好


Query: 宝马|nr  和|c  奔驰|v  比|p  怎么样|r

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_compared ?o .
        }
        
Output: 各有千秋,但是人生苦短,我选宝马


Query: 宝马|nr  和|c  奔驰|v  比|p  哪个|r  好|a

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_compared ?o .
        }
        
Output: 各有千秋,但是人生苦短,我选宝马


Query: 宝马|nr  和|c  奔驰|v  比|p  哪个|r  更好|d

        PREFIX : <http://kgdemo.com/>
        
        SELECT DISTINCT ?o WHERE {
            :宝马 :is_compared ?o .
        }
        
Output: 各有千秋,但是人生苦短,我选宝马