You need java 8, Spark NLP and PySpark installed in your enviroment
import os
! apt-get update -qq > /dev/null
# Install java
! apt-get install -y openjdk-8-jdk-headless -qq > /dev/null
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
! pip install nlu pyspark==2.4 > /dev/null
import nlu
import nlu
nlu.load('sentiment').predict('I love NLU and rainy days!')
analyze_sentiment download started this may take some time. Approx size to download 4.9 MB [OK!]
sentence | sentiment_confidence | sentiment | checked | |
---|---|---|---|---|
origin_index | ||||
0 | I love NLU and rainy days! | 0.688000 | positive | [I, love, NLU, and, rainy, days, !] |
nlu.load('pos').predict('POS assigns each token in a sentence a grammatical label')
pos_anc download started this may take some time. Approximate size to download 4.3 MB [OK!]
pos | token | |
---|---|---|
origin_index | ||
0 | NNP | POS |
0 | NNS | assigns |
0 | DT | each |
0 | NN | token |
0 | IN | in |
0 | DT | a |
0 | NN | sentence |
0 | DT | a |
0 | JJ | grammatical |
0 | NN | label |
nlu.load('ner').predict("John Snow Labs congratulates the Amarican John Biden to winning the American election!", output_level='chunk')
onto_recognize_entities_sm download started this may take some time. Approx size to download 159 MB [OK!]
entities_confidence | embeddings | entities | |
---|---|---|---|
origin_index | |||
0 | PERSON | [[-0.2747400104999542, 0.48680999875068665, -0... | John Snow Labs |
0 | PERSON | [[-0.2747400104999542, 0.48680999875068665, -0... | the Amarican |
0 | PERSON | [[-0.2747400104999542, 0.48680999875068665, -0... | John Biden |
0 | NORP | [[-0.2747400104999542, 0.48680999875068665, -0... | American |
nlu.load('ner').predict("John Snow Labs congratiulates John Biden to winning the American election!", output_level = 'document')
onto_recognize_entities_sm download started this may take some time. Approx size to download 159 MB [OK!]
entities_confidence | embeddings | entities | document | |
---|---|---|---|---|
origin_index | ||||
0 | [PERSON, PERSON, NORP] | [[-0.2747400104999542, 0.48680999875068665, -0... | [John Snow Labs, John Biden, American] | John Snow Labs congratiulates John Biden to wi... |
nlu.print_components(action='ner')
For language <nl> NLU provides the following Models : nlu.load('nl.ner') returns Spark NLP model wikiner_6B_100 nlu.load('nl.ner.wikiner') returns Spark NLP model wikiner_6B_100 nlu.load('nl.ner.wikiner.glove.6B_100') returns Spark NLP model wikiner_6B_100 nlu.load('nl.ner.wikiner.glove.6B_300') returns Spark NLP model wikiner_6B_300 nlu.load('nl.ner.wikiner.glove.840B_300') returns Spark NLP model wikiner_840B_300 For language <en> NLU provides the following Models : nlu.load('en.ner') returns Spark NLP model ner_dl nlu.load('en.ner.dl') returns Spark NLP model ner_dl nlu.load('en.ner.dl.glove.6B_100d') returns Spark NLP model ner_dl nlu.load('en.ner.dl.bert') returns Spark NLP model ner_dl_bert nlu.load('en.ner.onto') returns Spark NLP model onto_100 nlu.load('en.ner.onto.glove.6B_100d') returns Spark NLP model onto_100 nlu.load('en.ner.onto.glove.840B_300d') returns Spark NLP model onto_300 nlu.load('en.ner.onto.bert.cased_base') returns Spark NLP model onto_bert_base_cased nlu.load('en.ner.onto.bert.cased_large') returns Spark NLP model onto_bert_large_cased nlu.load('en.ner.onto.electra.uncased_large') returns Spark NLP model onto_electra_large_uncased nlu.load('en.ner.onto.bert.small_l2_128') returns Spark NLP model onto_small_bert_L2_128 nlu.load('en.ner.onto.bert.small_l4_256') returns Spark NLP model onto_small_bert_L4_256 nlu.load('en.ner.onto.bert.small_l4_512') returns Spark NLP model onto_small_bert_L4_512 nlu.load('en.ner.onto.bert.small_l8_512') returns Spark NLP model onto_small_bert_L8_512 nlu.load('en.ner.onto.electra.uncased_small') returns Spark NLP model onto_electra_small_uncased nlu.load('en.ner.onto.electra.uncased_base') returns Spark NLP model onto_electra_base_uncased nlu.load('en.ner.bert_base_cased') returns Spark NLP model ner_dl_bert_base_cased nlu.load('en.ner.ade') returns Spark NLP model ade_ner_100d nlu.load('en.ner.aspect_sentiment') returns Spark NLP model ner_aspect_based_sentiment nlu.load('en.ner.glove.100d') returns Spark NLP model ner_dl_sentence nlu.load('en.ner.atis') returns Spark NLP model nerdl_atis_840b_300d nlu.load('en.ner.airline') returns Spark NLP model nerdl_atis_840b_300d nlu.load('en.ner.aspect.airline') returns Spark NLP model nerdl_atis_840b_300d nlu.load('en.ner.aspect.atis') returns Spark NLP model nerdl_atis_840b_300d For language <fr> NLU provides the following Models : nlu.load('fr.ner') returns Spark NLP model wikiner_840B_300 nlu.load('fr.ner.wikiner') returns Spark NLP model wikiner_840B_300 nlu.load('fr.ner.wikiner.glove.840B_300') returns Spark NLP model wikiner_840B_300 nlu.load('fr.ner.wikiner.glove.6B_300') returns Spark NLP model wikiner_6B_300 For language <de> NLU provides the following Models : nlu.load('de.ner') returns Spark NLP model wikiner_840B_300 nlu.load('de.ner.wikiner') returns Spark NLP model wikiner_840B_300 nlu.load('de.ner.wikiner.glove.840B_300') returns Spark NLP model wikiner_840B_300 nlu.load('de.ner.wikiner.glove.6B_300') returns Spark NLP model wikiner_6B_300 For language <it> NLU provides the following Models : nlu.load('it.ner') returns Spark NLP model wikiner_840B_300 nlu.load('it.ner.wikiner.glove.6B_300') returns Spark NLP model wikiner_6B_300 For language <no> NLU provides the following Models : nlu.load('no.ner') returns Spark NLP model norne_6B_100 nlu.load('no.ner.norne') returns Spark NLP model norne_6B_100 nlu.load('no.ner.norne.glove.6B_100') returns Spark NLP model norne_6B_100 nlu.load('no.ner.norne.glove.6B_300') returns Spark NLP model norne_6B_300 nlu.load('no.ner.norne.glove.840B_300') returns Spark NLP model norne_840B_300 For language <pl> NLU provides the following Models : nlu.load('pl.ner') returns Spark NLP model wikiner_6B_100 nlu.load('pl.ner.wikiner') returns Spark NLP model wikiner_6B_100 nlu.load('pl.ner.wikiner.glove.6B_100') returns Spark NLP model wikiner_6B_100 nlu.load('pl.ner.wikiner.glove.6B_300') returns Spark NLP model wikiner_6B_300 nlu.load('pl.ner.wikiner.glove.840B_300') returns Spark NLP model wikiner_840B_300 For language <pt> NLU provides the following Models : nlu.load('pt.ner') returns Spark NLP model wikiner_6B_100 nlu.load('pt.ner.wikiner.glove.6B_100') returns Spark NLP model wikiner_6B_100 nlu.load('pt.ner.wikiner.glove.6B_300') returns Spark NLP model wikiner_6B_300 nlu.load('pt.ner.wikiner.glove.840B_300') returns Spark NLP model wikiner_840B_300 For language <ru> NLU provides the following Models : nlu.load('ru.ner') returns Spark NLP model wikiner_6B_100 nlu.load('ru.ner.wikiner') returns Spark NLP model wikiner_6B_100 nlu.load('ru.ner.wikiner.glove.6B_100') returns Spark NLP model wikiner_6B_100 nlu.load('ru.ner.wikiner.glove.6B_300') returns Spark NLP model wikiner_6B_300 nlu.load('ru.ner.wikiner.glove.840B_300') returns Spark NLP model wikiner_840B_300 For language <es> NLU provides the following Models : nlu.load('es.ner') returns Spark NLP model wikiner_6B_100 nlu.load('es.ner.wikiner') returns Spark NLP model wikiner_6B_100 nlu.load('es.ner.wikiner.glove.6B_100') returns Spark NLP model wikiner_6B_100 nlu.load('es.ner.wikiner.glove.6B_300') returns Spark NLP model wikiner_6B_300 nlu.load('es.ner.wikiner.glove.840B_300') returns Spark NLP model wikiner_840B_300 For language <ar> NLU provides the following Models : nlu.load('ar.ner') returns Spark NLP model aner_cc_300d nlu.load('ar.ner.aner') returns Spark NLP model aner_cc_300d For language <fi> NLU provides the following Models : nlu.load('fi.ner') returns Spark NLP model wikiner_6B_100 nlu.load('fi.ner.6B_100') returns Spark NLP model wikiner_6B_100 nlu.load('fi.ner.6B_300') returns Spark NLP model wikiner_6B_300 nlu.load('fi.ner.840B_300') returns Spark NLP model wikiner_840B_300 nlu.load('fi.ner.6B_100d') returns Spark NLP model finnish_ner_6B_100 nlu.load('fi.ner.6B_300d') returns Spark NLP model finnish_ner_6B_300 nlu.load('fi.ner.840B_300d') returns Spark NLP model finnish_ner_840B_300 For language <he> NLU provides the following Models : nlu.load('he.ner') returns Spark NLP model hebrewner_cc_300d nlu.load('he.ner.cc_300d') returns Spark NLP model hebrewner_cc_300d For language <da> NLU provides the following Models : nlu.load('da.ner') returns Spark NLP model dane_ner_6B_100 nlu.load('da.ner.6B_100D') returns Spark NLP model dane_ner_6B_100 nlu.load('da.ner.6B_300D') returns Spark NLP model dane_ner_6B_300 nlu.load('da.ner.840B_300D') returns Spark NLP model dane_ner_840B_300 For language <ja> NLU provides the following Models : nlu.load('ja.ner') returns Spark NLP model ner_ud_gsd_glove_840B_300d nlu.load('ja.ner.ud_gsd') returns Spark NLP model ner_ud_gsd_glove_840B_300d nlu.load('ja.ner.ud_gsd.glove_840B_300D') returns Spark NLP model ner_ud_gsd_glove_840B_300d For language <fa> NLU provides the following Models : nlu.load('fa.ner') returns Spark NLP model personer_cc_300d nlu.load('fa.ner.person') returns Spark NLP model personer_cc_300d nlu.load('fa.ner.person.cc_300d') returns Spark NLP model personer_cc_300d For language <sv> NLU provides the following Models : nlu.load('sv.ner') returns Spark NLP model swedish_ner_6B_100 nlu.load('sv.ner.6B_100') returns Spark NLP model swedish_ner_6B_100 nlu.load('sv.ner.6B_300') returns Spark NLP model swedish_ner_6B_300 nlu.load('sv.ner.840B_300') returns Spark NLP model swedish_ner_840B_300 For language <th> NLU provides the following Models : nlu.load('th.ner.lst20.glove_840B_300D') returns Spark NLP model ner_lst20_glove_840B_300d For language <tr> NLU provides the following Models : nlu.load('tr.ner') returns Spark NLP model turkish_ner_840B_300 nlu.load('tr.ner.bert') returns Spark NLP model turkish_ner_bert For language <zh> NLU provides the following Models : nlu.load('zh.ner') returns Spark NLP model ner_msra_bert_768d nlu.load('zh.ner.bert') returns Spark NLP model ner_msra_bert_768d nlu.load('zh.ner.msra.bert_768D') returns Spark NLP model ner_msra_bert_768d nlu.load('zh.ner.weibo.bert_768d') returns Spark NLP model ner_weibo_bert_768d For language <ur> NLU provides the following Models : nlu.load('ur.ner') returns Spark NLP model uner_mk_140M_300d nlu.load('ur.ner.mk_140M_300d') returns Spark NLP model uner_mk_140M_300d For language <ko> NLU provides the following Models : nlu.load('ko.ner') returns Spark NLP model ner_kmou_glove_840B_300d nlu.load('ko.ner.kmou') returns Spark NLP model ner_kmou_glove_840B_300d nlu.load('ko.ner.kmou.glove_840B_300d') returns Spark NLP model ner_kmou_glove_840B_300d
nlu.load('bert').predict("Albert and Elmo are pretty good freidns")
small_bert_L2_128 download started this may take some time. Approximate size to download 16.1 MB [OK!]
bert_embeddings | token | |
---|---|---|
origin_index | ||
0 | [-1.2644212245941162, 1.0388842821121216, 0.42... | Albert |
0 | [-1.0341346263885498, 0.35990777611732483, 0.2... | and |
0 | [-1.5926620960235596, -0.32061171531677246, -0... | Elmo |
0 | [-0.3129887580871582, 0.2978755831718445, 0.10... | are |
0 | [0.5073671936988831, -0.35482677817344666, 0.0... | pretty |
0 | [-0.6654903888702393, 0.050630949437618256, -0... | good |
0 | [-2.3138480186462402, 0.690037727355957, -0.05... | freidns |
nlu.load('elmo').predict("Albert and Elmo are pretty good freidns")
elmo download started this may take some time. Approximate size to download 334.1 MB [OK!]
elmo_embeddings | token | |
---|---|---|
origin_index | ||
0 | [-0.9555240273475647, -1.0100127458572388, 0.7... | Albert |
0 | [-0.02477884292602539, -0.20155462622642517, -... | and |
0 | [0.6083736419677734, 0.20088991522789001, 0.42... | Elmo |
0 | [-0.031240105628967285, 0.08035830408334732, -... | are |
0 | [0.3517477512359619, -0.24238181114196777, -0.... | pretty |
0 | [0.5430472493171692, -0.19053488969802856, -0.... | good |
0 | [-0.6736612319946289, -0.15871864557266235, 0.... | freidns |
nlu.load('embed_sentence.bert').predict("get me sum embeddings for these tokens")
sent_small_bert_L2_128 download started this may take some time. Approximate size to download 16.1 MB [OK!]
document | embed_sentence_bert_embeddings | |
---|---|---|
origin_index | ||
0 | get me sum embeddings for these tokens | [-0.8406468629837036, 0.3447624742984772, -0.0... |
nlu.print_components(action='embed')
For language <en> NLU provides the following Models : nlu.load('en.embed') returns Spark NLP model glove_100d nlu.load('en.embed.glove') returns Spark NLP model glove_100d nlu.load('en.embed.glove.100d') returns Spark NLP model glove_100d nlu.load('en.embed.bert') returns Spark NLP model bert_base_uncased nlu.load('en.embed.bert.base_uncased') returns Spark NLP model bert_base_uncased nlu.load('en.embed.bert.base_cased') returns Spark NLP model bert_base_cased nlu.load('en.embed.bert.large_uncased') returns Spark NLP model bert_large_uncased nlu.load('en.embed.bert.large_cased') returns Spark NLP model bert_large_cased nlu.load('en.embed.biobert') returns Spark NLP model biobert_pubmed_base_cased nlu.load('en.embed.biobert.pubmed_base_cased') returns Spark NLP model biobert_pubmed_base_cased nlu.load('en.embed.biobert.pubmed_large_cased') returns Spark NLP model biobert_pubmed_large_cased nlu.load('en.embed.biobert.pmc_base_cased') returns Spark NLP model biobert_pmc_base_cased nlu.load('en.embed.biobert.pubmed_pmc_base_cased') returns Spark NLP model biobert_pubmed_pmc_base_cased nlu.load('en.embed.biobert.clinical_base_cased') returns Spark NLP model biobert_clinical_base_cased nlu.load('en.embed.biobert.discharge_base_cased') returns Spark NLP model biobert_discharge_base_cased nlu.load('en.embed.elmo') returns Spark NLP model elmo nlu.load('en.embed.use') returns Spark NLP model tfhub_use nlu.load('en.embed.albert') returns Spark NLP model albert_base_uncased nlu.load('en.embed.albert.base_uncased') returns Spark NLP model albert_base_uncased nlu.load('en.embed.albert.large_uncased') returns Spark NLP model albert_large_uncased nlu.load('en.embed.albert.xlarge_uncased') returns Spark NLP model albert_xlarge_uncased nlu.load('en.embed.albert.xxlarge_uncased') returns Spark NLP model albert_xxlarge_uncased nlu.load('en.embed.xlnet') returns Spark NLP model xlnet_base_cased nlu.load('en.embed.xlnet_base_cased') returns Spark NLP model xlnet_base_cased nlu.load('en.embed.xlnet_large_cased') returns Spark NLP model xlnet_large_cased nlu.load('en.embed.electra') returns Spark NLP model electra_small_uncased nlu.load('en.embed.electra.small_uncased') returns Spark NLP model electra_small_uncased nlu.load('en.embed.electra.base_uncased') returns Spark NLP model electra_base_uncased nlu.load('en.embed.electra.large_uncased') returns Spark NLP model electra_large_uncased nlu.load('en.embed.covidbert') returns Spark NLP model covidbert_large_uncased nlu.load('en.embed.covidbert.large_uncased') returns Spark NLP model covidbert_large_uncased nlu.load('en.embed.bert.small_L2_128') returns Spark NLP model small_bert_L2_128 nlu.load('en.embed.bert.small_L4_128') returns Spark NLP model small_bert_L4_128 nlu.load('en.embed.bert.small_L6_128') returns Spark NLP model small_bert_L6_128 nlu.load('en.embed.bert.small_L8_128') returns Spark NLP model small_bert_L8_128 nlu.load('en.embed.bert.small_L10_128') returns Spark NLP model small_bert_L10_128 nlu.load('en.embed.bert.small_L12_128') returns Spark NLP model small_bert_L12_128 nlu.load('en.embed.bert.small_L2_256') returns Spark NLP model small_bert_L2_256 nlu.load('en.embed.bert.small_L4_256') returns Spark NLP model small_bert_L4_256 nlu.load('en.embed.bert.small_L6_256') returns Spark NLP model small_bert_L6_256 nlu.load('en.embed.bert.small_L8_256') returns Spark NLP model small_bert_L8_256 nlu.load('en.embed.bert.small_L10_256') returns Spark NLP model small_bert_L10_256 nlu.load('en.embed.bert.small_L12_256') returns Spark NLP model small_bert_L12_256 nlu.load('en.embed.bert.small_L2_512') returns Spark NLP model small_bert_L2_512 nlu.load('en.embed.bert.small_L4_512') returns Spark NLP model small_bert_L4_512 nlu.load('en.embed.bert.small_L6_512') returns Spark NLP model small_bert_L6_512 nlu.load('en.embed.bert.small_L8_512') returns Spark NLP model small_bert_L8_512 nlu.load('en.embed.bert.small_L10_512') returns Spark NLP model small_bert_L10_512 nlu.load('en.embed.bert.small_L12_512') returns Spark NLP model small_bert_L12_512 nlu.load('en.embed.bert.small_L2_768') returns Spark NLP model small_bert_L2_768 nlu.load('en.embed.bert.small_L4_768') returns Spark NLP model small_bert_L4_768 nlu.load('en.embed.bert.small_L6_768') returns Spark NLP model small_bert_L6_768 nlu.load('en.embed.bert.small_L8_768') returns Spark NLP model small_bert_L8_768 nlu.load('en.embed.bert.small_L10_768') returns Spark NLP model small_bert_L10_768 nlu.load('en.embed.bert.small_L12_768') returns Spark NLP model small_bert_L12_768 For language <ar> NLU provides the following Models : nlu.load('ar.embed') returns Spark NLP model arabic_w2v_cc_300d nlu.load('ar.embed.cbow') returns Spark NLP model arabic_w2v_cc_300d nlu.load('ar.embed.cbow.300d') returns Spark NLP model arabic_w2v_cc_300d nlu.load('ar.embed.aner') returns Spark NLP model arabic_w2v_cc_300d nlu.load('ar.embed.aner.300d') returns Spark NLP model arabic_w2v_cc_300d nlu.load('ar.embed.glove') returns Spark NLP model arabic_w2v_cc_300d For language <fi> NLU provides the following Models : nlu.load('fi.embed.bert.') returns Spark NLP model bert_finnish_cased nlu.load('fi.embed.bert.cased.') returns Spark NLP model bert_finnish_cased nlu.load('fi.embed.bert.uncased.') returns Spark NLP model bert_finnish_uncased For language <he> NLU provides the following Models : nlu.load('he.embed') returns Spark NLP model hebrew_cc_300d nlu.load('he.embed.glove') returns Spark NLP model hebrew_cc_300d nlu.load('he.embed.cbow_300d') returns Spark NLP model hebrew_cc_300d For language <fa> NLU provides the following Models : nlu.load('fa.embed') returns Spark NLP model persian_w2v_cc_300d nlu.load('fa.embed.word2vec') returns Spark NLP model persian_w2v_cc_300d nlu.load('fa.embed.word2vec.300d') returns Spark NLP model persian_w2v_cc_300d For language <zh> NLU provides the following Models : nlu.load('zh.embed') returns Spark NLP model bert_base_chinese nlu.load('zh.embed.bert') returns Spark NLP model bert_base_chinese For language <ur> NLU provides the following Models : nlu.load('ur.embed') returns Spark NLP model urduvec_140M_300d nlu.load('ur.embed.urdu_vec_140M_300d') returns Spark NLP model urduvec_140M_300d For language <xx> NLU provides the following Models : nlu.load('xx.embed') returns Spark NLP model glove_840B_300 nlu.load('xx.embed.glove.840B_300') returns Spark NLP model glove_840B_300 nlu.load('xx.embed.glove.6B_300') returns Spark NLP model glove_6B_300 nlu.load('xx.embed.bert_multi_cased') returns Spark NLP model bert_multi_cased nlu.load('xx.embed.bert') returns Spark NLP model bert_multi_cased