When trying to figure out what went into a program, look at
Try to find the main function, and start delving from there.
Contain functions and data types. Used to organize code and package large functionalities into reusable units.
# Python
import re # regular expressions
import requests # web requests
import pandas as pd # data science computation
import numpy as np # numerical computation
import matplotlib.pyplot as plt # plotting
# R
library(ggplot2) # plotting
library(tidyverse) # data wrangling
library(cluster) # data clustering
library(slam) # numerical computation
library(tm) # text mining
library(SnowballC) # word stemming
library(wordcloud) # word clouds
Loading required package: NLP Attaching package: ‘NLP’ The following object is masked from ‘package:ggplot2’: annotate Loading required package: RColorBrewer
// Scala
import $ivy.`com.github.tototoshi::scala-csv:1.3.5` // this one is just for the notebook, and not usual Scala
import com.github.tototoshi.csv._
import scala.io.Source
import scala.collection.JavaConverters._
import $ivy.$ // this one is just for the notebook, and not usual Scala import com.github.tototoshi.csv._ import scala.io.Source import scala.collection.JavaConverters._
Allow you to package code in reusable packages. Used to organize a codebase. Zero or one output, as many input parameters as you like.
# Python
import re
def standardize(text):
text = text.replace("."," ").replace(","," ").replace("?"," ").replace("!"," ").replace("'","").lower()
return re.sub("\s+"," ", text)
print(standardize("Where are we? I don't know!"))
def sum(values):
sum = 0
for value in values:
sum += value
return sum
print(sum([1,2,3]))
where are we i dont know 6
# R
standardize <- function(text) {
return(tolower(gsub("\\s+"," ",gsub("\\."," ",gsub(","," ",gsub("\\?"," ",gsub("!"," ",gsub("'","",text))))))))
}
print(standardize("Where are we? I don't know!"))
sum <- function(values) {
sum <- 0
for (value in values) {
sum <- sum + value
}
return(sum)
}
print(sum(c(1,2,3)))
[1] "where are we i dont know " [1] 6
// Scala
def standardize(text: String) =
text.replace("."," ").replace(","," ").replace("?"," ").replace("!"," ").replace("'","").toLowerCase().replaceAll("\\s+"," ")
println(standardize("Where are we? I don't know!"))
def sum1(values: Seq[Int]) = {
var sum = 0
for (value <- values) sum += value
sum
}
// here's a functional variant for sum
def sum2(values: Seq[Int]) = values.reduce(_+_)
println(sum1(Seq(1,2,3)))
println(sum2(Seq(1,2,3)))
where are we i dont know 6 6
defined function standardize defined function sum1 defined function sum2
In many programming languages, methods are functions associated with data types, with a different syntax for specifying the key parameter:
# Python
print("abab".replace("a","b").replace("b","X"))
print("abab".replace("b","X").replace("a","b"))
XXXX bXbX
// Scala
println("abab".replace("a","b").replace("b","X"))
println("abab".replace("b","X").replace("a","b"))
XXXX bXbX
R doesn't really believe in methods.
Operators are yet another, easier syntax for core functions. In Python and Scala, they really are syntactic sugar for methods, but in R they're a separate language construct.
# Python
print((5).__add__(3).__add__(7))
print(5+3+7)
values = [1,2]
values.extend([3])
values += [4]
print(values)
15 15 [1, 2, 3, 4]
// Scala
import scala.collection.mutable.ArrayBuffer
println(5.+(3).+(7))
println(5+3+7)
val values = ArrayBuffer(1,2)
values.+=(3)
values += 4
print(values)
15 15 ArrayBuffer(1, 2, 3, 4)
import scala.collection.mutable.ArrayBuffer values: ArrayBuffer[Int] = ArrayBuffer(1, 2, 3, 4) res3_4: ArrayBuffer[Int] = ArrayBuffer(1, 2, 3, 4) res3_5: ArrayBuffer[Int] = ArrayBuffer(1, 2, 3, 4)
Allow you to store data and refer to it using self-defined symbols in your code
# Python
name = "Eetu"
age = 18
if age>=18:
print(name + " is an adult")
else:
print(name + " is a child")
Eetu is an adult
# R
name <- "Eetu"
age <- 18
if (age>=18) {
print(paste(name," is an adult",sep=""))
} else {
print(paste(name, " is a child",sep=""))
}
[1] "Eetu is an adult"
// Scala
val name = "Eetu"
val age = 18
if (age>=18)
println(name + " is an adult")
else
println(name + " is a child")
Eetu is an adult
name: String = "Eetu" age: Int = 18
Program flow control statement that allows you to choose between alternate courses of action based on data
# Python
name = "Eetu"
age = 18
if age<18:
print(name + " is a child")
elif age>65:
print(name + " is old")
elif age>100:
print(name + " is ancient")
else:
print(name + " is an adult")
Eetu is an adult
# R
name <- "Eetu"
age <- 18
if (age<18) {
print(paste(name, "is a child"))
} else if (age>65) {
print(paste(name, "is old"))
} else if (age>100) {
print(paste(name, "is ancient"))
} else {
print(paste(name,"is an adult"))
}
[1] "Eetu is an adult"
// Scala
val name = "Eetu"
val age = 18
if (age<18)
println(name + " is a child")
else if (age>65)
println(name + " is old")
else if (age>100)
println(name + " is ancient")
else
println(name + " is an adult")
Eetu is an adult
name: String = "Eetu" age: Int = 18
Some languages such as Scala and R have construct to make certain if else statements a bit easier:
// Scala
val name = "Batman"
name match {
case "John" => println("Hello Johnny")
case "Bruce Wayne" => println("Hello Batman")
case anyname => println("Hello "+anyname)
}
Hello Batman
name: String = "Batman"
# R
name <- "Batman"
switch(name,
"John" = print("Hello Johnny"),
"Bruce Wayne" = print("Hello Batman"),
print(paste("Hello",name))
)
[1] "Hello Batman"
General flow control structure for doing something as long as a condition holds
# Python
ages = [ 15, 17, 19, 20, 55, 90 ]
i = 0
while (ages[i]<18): i+=1
print("First age over 18 (age nr. "+str(i+1)+"): "+str(ages[i]))
i = 0
agesum = 0
while i<len(ages):
agesum += ages[i]
i+=1
print("Average age: "+str(agesum/len(ages)))
First age over 18 (age nr. 3): 19 Average age: 36.0
# R
ages <- c(15, 17, 19, 20, 55, 90)
i <- 1
while (ages[i]<18) i <- i+1
print(paste("First age over 18 (age nr. ",i,"): ",ages[i],sep=""))
i <- 1
agesum <- 0
while (i<=length(ages)) {
agesum <- agesum + ages[i]
i <- i + 1
}
print(paste("Average age: ",agesum/length(ages)))
[1] "First age over 18 (age nr. 3): 19" [1] "Average age: 36"
// Scala
val ages = Seq(15, 17, 19, 20, 55, 90)
var i = 0
while (ages(i)<18) i+=1
println("First age over 18 (age nr. "+(i+1)+"): "+ages(i))
i = 0
var agesum = 0
while (i<ages.length) {
agesum += ages(i)
i += 1
}
println("Average age: "+agesum/ages.length)
First age over 18 (age nr. 3): 19 Average age: 36
ages: Seq[Int] = List(15, 17, 19, 20, 55, 90) i: Int = 6 agesum: Int = 216
Specific structure available in most languages for repeatedly doing something to a set of values
# Python
ages = [ 15, 17, 19, 20, 55, 90 ]
agesum = 0
for age in ages: agesum += age
print("Average age: "+str(agesum/len(ages)))
birth_years = [1918, 1910, 1915]
ages = []
for birth_year in birth_years: ages += [1944 - birth_year]
print(ages)
Average age: 36.0 [26, 34, 29]
# R
ages <- c(15, 17, 19, 20, 55, 90)
agesum <- 0
for (age in ages) agesum <- agesum + age
print(paste("Average age:",agesum/length(ages)))
birth_years = c(1918, 1910, 1915)
ages <- c()
for (birth_year in birth_years)
ages <- c(ages, 1944 - birth_year)
print(ages)
[1] "Average age: 36" [1] 26 34 29
// Scala
import scala.collection.mutable.ArrayBuffer
val ages = Seq(15, 17, 19, 20, 55, 90)
var agesum = 0
for (age <- ages) agesum += age
println("Average age: "+agesum/ages.length)
val birth_years = Seq(1918, 1910, 1915)
val ages2 = ArrayBuffer[Int]()
for (birth_year <- birth_years) ages2 += 1944 - birth_year
println(ages2)
Average age: 36 ArrayBuffer(26, 34, 29)
import scala.collection.mutable.ArrayBuffer ages: Seq[Int] = List(15, 17, 19, 20, 55, 90) agesum: Int = 216 birth_years: Seq[Int] = List(1918, 1910, 1915) ages2: ArrayBuffer[Int] = ArrayBuffer(26, 34, 29)
Lists are data structures for holding multiple values.
# Python
sentences = [ "Where are we? I don't know!", "This, programming... is... terrifying!" ]
# Here we're calling the function once for each string in the sentences list
for sentence in sentences:
print(sentence)
# You can also explicitly refer to a particular slot in a list using square brackets:
print(sentences[0])
# In the above, note that the first entry in the list is at index 0, not 1. That's a conventional relic that permeates most programming languages, and comes originally from the way computers handle memory.
Where are we? I don't know! This, programming... is... terrifying! Where are we? I don't know!
# R
sentences = c("Where are we? I don't know!", "This, programming... is... terrifying!")
# Here we're calling the function once for each string in the sentences list
for (sentence in sentences)
print(sentence)
# You can also explicitly refer to a particular slot in a list using square brackets:
print(sentences[1])
# In the above, note how R indices start at 1, in contrast to many other languages.
[1] "Where are we? I don't know!" [1] "This, programming... is... terrifying!" [1] "Where are we? I don't know!"
// Scala
val sentences = Seq("Where are we? I don't know!", "This, programming... is... terrifying!")
// Here we're calling the function once for each string in the sentences list
for (sentence <- sentences)
println(sentence)
// You can also explicitly refer to a particular slot in a list using square brackets:
println(sentences(0))
// In the above, note that the first entry in the list is at index 0, not 1. That's a conventional relic that permeates most programming languages, and comes originally from the way computers handle memory.
Where are we? I don't know! This, programming... is... terrifying! Where are we? I don't know!
sentences: Seq[String] = List( "Where are we? I don't know!", "This, programming... is... terrifying!" )
Dictionaries are useful data structures for mapping values to other values, or for creating simple structured data. Python and Scala have them. R has named vectors, but those are a bit more complicated.
# Python
replacements = {
".": " ",
",": " ",
"!": " ",
"?": " ",
"'": "",
"&": "and"
}
# Here we're going over all the keys in the replacement dictionary and acting on them
text = "Where are we? & I don't know!"
for key in replacements:
text = text.replace(key, replacements[key])
print(text)
# You can also explicitly refer to a particular slot in a list or a key in a dictionary using square brackets:
print(replacements["&"])
Where are we and I dont know and
// Scala
val replacements = Map(
"." -> " ",
"," -> " ",
"!" -> " ",
"?" -> " ",
"'" -> "",
"&" -> "and"
)
// Here we're going over all the keys in the replacement dictionary and acting on them
var text = "Where are we? & I don't know!"
for ((key,replacement) <- replacements)
text = text.replace(key, replacement)
println(text)
// You can also explicitly refer to a particular slot in a list or a key in a dictionary using square brackets:
println(replacements("&"))
Where are we and I dont know and
replacements: Map[String, String] = Map( "." -> " ", "&" -> "and", "!" -> " ", "," -> " ", "'" -> "", "?" -> " " ) text: String = "Where are we and I dont know "
# Python
# Note that a dictionary can only contain one value for each key
replacements = {
"." : "?",
"." : "!"
}
print(replacements["."])
# Therefore, if you need multiple values, you have to combine dictionaries with lists:
replacements = {
"." : ["?","!"]
}
print(replacements["."])
! ['?', '!']
// Scala
// Note that a dictionary can only contain one value for each key
val replacements = Map(
"." -> "?",
"." -> "!"
)
println(replacements("."))
// Therefore, if you need multiple values, you have to combine dictionaries with lists:
val replacements2 = Map(
"." -> Seq("?","!")
)
println(replacements2("."))
! List(?, !)
replacements: Map[String, String] = Map("." -> "!") replacements2: Map[String, Seq[String]] = Map("." -> List("?", "!"))
# Python
# Here's some structured data stored in a combination of arrays and dictionaries:
people = [
{
"name": "Eetu",
"age": 18,
"jobs": [ "Researcher", "Lecturer"]
},
{
"name": "Bruce Wayne",
"age": 65,
"jobs": [ "Batman", "Philanthropist"]
}
]
for person in people:
if person["name"] == "Bruce Wayne":
print(person["jobs"])
['Batman', 'Philanthropist']
// Scala
// Here's some structured data stored in a combination of arrays and dictionaries:
val people = Seq(
Map(
"name" -> "Eetu",
"age" -> 18,
"jobs" -> Seq("Researcher", "Lecturer")
),
Map(
"name" -> "Bruce Wayne",
"age" -> 65,
"jobs" -> Seq("Batman", "Philanthropist")
)
)
for (person <- people)
if (person("name") == "Bruce Wayne")
println(person("jobs"))
List(Batman, Philanthropist)
people: Seq[Map[String, Any]] = List( Map("name" -> "Eetu", "age" -> 18, "jobs" -> List("Researcher", "Lecturer")), Map( "name" -> "Bruce Wayne", "age" -> 65, "jobs" -> List("Batman", "Philanthropist") ) )