# Read the text files of Atlassian, Cloudera, New Relic, Okta and Zuora earnings calls
filename = 'Saas_companies.txt'
file = open(filename, 'rt')
text = file.read()
file.close()
# import NLTK (Natural Language Toolkit)
# use tokenize package to split at punctuation other than periods and white space; import stopwords
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
# split into words
tokens = word_tokenize(text)
# remove from tokens any unwanted characters and punctuation
words = [x for x in tokens if len(x) >2]
# remove numbers
words = [w for w in words if not w.isnumeric()]
# lowercase all words because default_stopwords are lowercase too
words = [word.lower() for word in words]
# create variable for NLTK stopwords
stopwords_eng = set(nltk.corpus.stopwords.words('english'))
# remove stopwords
words = [word for word in words if word not in stopwords_eng]
# names of participants
filename_names = 'saasCallNames.txt'
file_two = open(filename_names, 'rt')
text_two = file_two.read()
file.close()
# split into words
tokens_names = word_tokenize(text_two)
# remove from tokens any unwanted characters and punctuation
words_names = [x for x in tokens_names if len(x) >2]
# remove numbers
words_names = [w for w in words_names if not w.isnumeric()]
# lowercase all words because default_stopwords are lowercase too
words_names = [word.lower() for word in words_names]
# other terms like names, terms that show up in a transcript that aren't useful for analysis
terms = words_names
# remove other terms
words = [word for word in words if word not in terms]
# Check top words so far
# Calculate frequency distribution
fdist = nltk.FreqDist(words)
# Output top 10 words
for word, frequency in fdist.most_common(10):
print(u'{};{}'.format(word, frequency))
customers;251 're;236 think;205 quarter;165 cloud;151 revenue;144 year;142 million;135 growth;132 like;129
#import wordcloud and matplotlib for chart
from wordcloud import WordCloud, STOPWORDS
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (18.0, 15.0)
# create string of words list so that WordCloud can be created
words_string = " ".join(str(x) for x in words)
# remove any other stopwords we might have missed
other_stopwords = set(STOPWORDS)
# create WordCloud Object
wc = WordCloud(background_color="white", stopwords=other_stopwords,
width=1600, height=900, colormap=matplotlib.cm.inferno)
# Generate WordCloud
wc.generate(words_string)
# Show the WordCloud
plt.figure()
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
(-0.5, 1599.5, 899.5, -0.5)
# Larger view, and save into jpg file
plt.figure( figsize=(20,10) )
plt.imshow(wc,interpolation="bilinear")
plt.axis("off")
plt.savefig('Saas.jpg', format='jpg', dpi=600)
# let's combine some similar terms
words = list(map(lambda word: word.replace('customers', 'customer'),words))
words = list(map(lambda word: word.replace('companies', 'company'),words))
words = list(map(lambda word: word.replace('businesses', 'business'),words))
words = list(map(lambda word: word.replace('quarters', 'quarter'),words))
# remove other terms that are unhelpful to industry evaluation and are common to earnings calls/"call speak"
more_terms = ["'re",'think','quarter','year','million','billion','like',"'ve",'today','well','also','last',
'operator','really','would','line','much','fiscal','want',"n't","'ll",'continue','one','thing',
'time','way','around','lot','something','some','business','next','question','said','obviously',
'give','given','going','thank','things','thing','get','seeing','thanks','could','look',
'many','third','second','know','yes','guys','maybe','talk','use','bit','let','ago','earlier',
'may','ask','result','see','come','say','pretty','question','company','great','first','right',
'kind','results','good','afternoon','okay','two','new','relic','cloudera','zuora','atlassian',
'okta','done','whether','comes','become','forward','looking','number','side','statement',
'income','month','highlight','growth','market','help','actually','able','past','put','different',
'believe','point','got','net','across','year ',' year','little','take','years','still','big',
'year-over-year','sort','seen','strong','excited','expect','make']
words_edit = [word for word in words if word not in more_terms]
# Check top words so far
# Calculate frequency distribution
fdist = nltk.FreqDist(words_edit)
# Output top 10 words
for word, frequency in fdist.most_common(10):
print(u'{};{}'.format(word, frequency))
customer;373 cloud;151 revenue;144 platform;88 data;73 software;68 identity;65 product;55 enterprise;54 operating;50
# create string of words list so that WordCloud can be created
words_string_edit = " ".join(str(x) for x in words_edit)
# remove any other stopwords we might have missed
other_stopwords = set(STOPWORDS)
# create WordCloud Object
wc = WordCloud(background_color="white", stopwords=other_stopwords,
width=1600, height=900, colormap=matplotlib.cm.inferno)
# Generate WordCloud
wc.generate(words_string_edit)
# Show the WordCloud
plt.figure()
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
(-0.5, 1599.5, 899.5, -0.5)
plt.figure( figsize=(20,10) )
plt.imshow(wc,interpolation="bilinear")
plt.axis("off")
plt.savefig('SaaS2.jpg', format='jpg', dpi=600)
# use concordance to show the occurrence of a given word, together with some context
# first create a text object
text_obj = nltk.Text(tokens)
# try some terms to see how they are used in a given context
text_obj.concordance('product')
Displaying 25 of 55 matches: ery significant improvements in the product . We 're investing heavily in our p t . We 're investing heavily in our product every month , and this comes throug e , it 's pretty high value-to-cost product . So how do you capture more of the we believe in providing incredible product in the most affordable price . That t in the context of the data center product family , which we 've talked about ave kind of visibility into how the product 's being used and we have kind of d nnecting to the customer kind of in product to open up different paths and aven hat help joint customers across our product portfolios to have the best experie mers themselves can grow inside the product that they 're already in . So if th , is that the way we distribute our product , it 's really chosen by the end us the choice of a customer to run the product where it best fits them but provide move faster or there 's an existing product out there . We also have a very str igher price point to SSO or the MFA product to get access to it . But what can e we see the data , we can make the product more valuable for individual custom gacy competitor , it 's all about a product set that 's not being updated or no competitive side , we do have some product overlap with Duo and we continue to king at how we 're investing in the product , it 's consistent with that . Fred doing larger deal sizes of a single product than maybe what you were seeing bef find more and more ways to use the product , so they could start , for example do you feel about the growth of the product set over the next 12 months versus and we 're trying to help . I think product innovation is critical to us and we ut what 's ahead for us in terms of product innovation in releasing new capabil d our Workforce Identity Management product . Now why is that ? Well , Workforc enthusiasm has been around our new product roadmap . Customers love the idea o rger benefits . The engineering and product teams have already agreed on our jo
text_obj.concordance('platform')
Displaying 25 of 88 matches: a whole , to our work on our Cloud platform , which you can think of really as much more that part of the overall platform that we offer that was driving the ns than the -- behind the firewall platform offerings that we have because , o assian builds . And we provide the platform for that to connect those vendors o to the -- try to solve more of a platform solution , kind of emphasizing the e -- JIRA , we got this incredible platform in JIRA that handles all the workf rkflows , I 'd say , on top of the platform that we 've already built . Operat ties through our independent cloud platform . As a reminder , we address two m Deloitte to find a single identity platform that could manage and secure both inue to innovate and expand on our platform . There are a couple of overarchin tely independent and neutral cloud platform for identity . Because our busines rs think of Okta as an independent platform that helps them futureproof their because we offer a single identity platform for every type of user in an organ in a consistent way from a single platform . The last thing I want to call ou e customers are successful on your platform that they 're able to successfully owever , since Okta is an enabling platform for people and technology , it has twork and our ability to offer one platform for every use case and our custome high , as we continue to scale our platform . Turning now to operating expense significantly in the Okta identity platform and our Okta integration network . tiatives and innovation across our platform capabilities . We remain focused o al expansion and innovation in our platform and network . In particular this q ccess we 've seen with our leading platform integration network and customer-f being this independent and neutral platform . So it is -- I would say , it is the benefits of being on the cloud platform . Threat insights is going very we worried about lock in to a certain platform , particularly Microsoft , so ther
text_obj.concordance('acquisition')
Displaying 14 of 14 matches: flow . In October , we closed the acquisition of OpsGenie , a leader in inciden arquhar Yes , I think you have -- acquisition from Microsoft , we have n't seen ld say , judiciously exercise our acquisition model over time as another way fo ur installed base on the OpsGenie acquisition ? And I guess , given the price r sGenie purely on the basis of our acquisition and knowing that the increased in ove to acquire something . On the acquisition side , we 've got a very strong t at may be derived from our recent acquisition . Forward-looking statements invo Last quarter , I talked about our acquisition of ScaleFT as an important step i ve landscape relative to Cisco 's acquisition of Duo and to what extent you 're ng to see any time there 's a big acquisition like that , you 're going to see be tip note of IBM ’ s announced acquisition of Red Hat . Of course , the move d be to you given IBM ’ s pending acquisition of Red Hat ? Lew Cirne Sure . So . We think Red Hat , the Red Hat acquisition , the interesting thing about tha solutions . That 's a closer deal acquisition actually about a quarter of our n
text_obj.concordance('arr')
Displaying 11 of 11 matches: t hundred new deals over $ 100,000 in ARR , partners notice that as well and th customers with more than $ 100,000 of ARR at the conclusion of Q3 . Reflecting customers with more than $ 1 million ARR , representing 52 % of software reven at high -- greater than $ 100,000 of ARR or expand more than 100,000 of ARR . of ARR or expand more than 100,000 of ARR . That 's really our measure of a tru teady sequential growth in enterprise ARR and accounts paying more than $ 100,0 , which totaled more than 40 % of new ARR . Once again , New Relic Insights and . When you factor in the significant ARR growth of our installed base during t the quarter-end totals for enterprise ARR now at 56 % of our business , as well se business was approximately 56 % of ARR , up around 51 % as of the same perio ion deal with existing million-dollar ARR paid business account , which include
text_obj.concordance('ACV')
Displaying 2 of 2 matches: g metrics in Q3 . Customers over 100K ACV maintained a solid growth rate of 30 s to represent over 80 % of our total ACV . In addition , we are continuing to
text_obj.concordance('solution')
Displaying 21 of 21 matches: -- try to solve more of a platform solution , kind of emphasizing the synergie ing now is as we provide more of a solution , we 're becoming more of a truste e . And where there 's an existing solution in the marketplace , where we beli nized that its outsourced identity solution lacked a full view of its customer enterprise wide workforce identity solution because of our credibility across oblems with a simple and intuitive solution and the ability to keep pace with ions driving more interest in your solution or is it even having an adverse ef r maybe it 's just more of a niche solution not a broader platform . So I thin , reliable , scalable performance solution for the government agencies for a engine , by delivering as a cloud solution . So , we 're getting better , but oming to North America , that 's a solution that 's been out there for quite s he only player to offer a complete solution from the edge to AI , increasingly , we will provide a comprehensive solution set for customers from the edge to ur customers provide that complete solution to see exactly what they need to s volve from being a tool to being a solution . And so , we wanted to – we begun mize New Relic to become that full solution rather than have to reinvent the e ands today , we ’ re a value-based solution , even though we ’ ve got attracti esearch , RevPro is the top ranked solution for revenue recognition software . d this to be a short-term band-aid solution . These companies are realizing th e back to putting in an automation solution , and they selected RevPro this qu partners are wrapping around your solution are getting larger , and you 're a
I thought some interesting terms jumped out of this exercise: customer, cloud, product, platform and customer. We are able to get a sense of the areas of focus of management teams, investors and analysts.