#!/usr/bin/env python # coding: utf-8 # # Wirting A Lexer In Nim # # - 2018/11/28 # # # Table of Contents # # 1. Overview # 2. Why Nim? # 3. Wat is an Interpreter? # 3. What is a Lexer? # 4. What I have learned # 5. Impression # # Overview # # ![](https://www.oreilly.co.jp/books/images/picture_large978-4-87311-822-2.jpeg) # # ## Writing An Interpreter In Go # # - Make a Programming Language called "Monkey" # - Make a REPL # - Lexer # - Parser # - Evaluator(?) # # # Why Nim ? # # - Not Use # - Go (same as this book) # - Rust (very difficult!) # - C, C++ (don't like!) # - Very Fast # - [benchmarking](https://mrsekut.site/?p=1591) # - Low Learning Cost # - Interesting # # # What is an Interpreter ? # # # ## Language Processing System # # - Compiler # > translate source code from a high-level programming language to a lower level language # - Interpreter # > 1. parse the source code and perform its behavior directly; # > 2. translate source code into some efficient intermediate representation and immediately execute this; # > 3. explicitly execute stored precompiled code made by a compiler which is part of the interpreter system. # # - ex. 1: Lisp, Brainfuck, .. # - ex. 2: Python, Ruby, .. # - ex. 3: Java, Basic, .. # # - [Compiler - Wikipedia](https://en.wikipedia.org/wiki/Compiler) # - [Interpreter (computing) - Wikipedia](https://en.wikipedia.org/wiki/Interpreter_(computing)) # ## Interpreter # # - make AST and run # # # ![](https://1.bp.blogspot.com/-BmT3MCbTUfw/V2f0Zu9VsyI/AAAAAAAAFuI/FoBiF336ZmwHL6mZYqWB0j8dpkH_jYaVACLcB/s640/6c4385fbe3d8471982c9b2a030106d38.png) # - by [Positive Technologies - learn and secure : Theory and Practice of Source Code Parsing with ANTLR and Roslyn](http://blog.ptsecurity.com/2016/06/theory-and-practice-of-source-code.html) # ## REPL? # # - Read, Eval, Print, Loop # - Ptyhon, JavaScript, Haskell, etc. # # ### ex # # - `> python`: REPL # - `> python hoge.py`: no REPL # # What is a Lexer? # # - Create Tokens from Input # - Token is the samllest unit of program # ## Example # # #### Input # # ``` # let five = 5 # let ten = 10 # ``` # #### Output # # ``` # [ # LET, # IDENT("five"), # ASSIGN, # INT(5), # LET, # IDENT("ten"), # ASSIGN, # INT(10), # ] # ``` # # # ## Examples of Other Tokens # # - PAREN: `(`,`)` # - BRASE: `{`,`}` # - LT, GT: `<`,`>` # - PROC: `proc` # - IF: `if` # # ## use Case..of # # ``` # proc nextToken*(self: Lexer): token.Token = # var tok: token.Token # self.skipWhiteSpace() # # case self.ch # of ':': # tok = newToken(COLON, self.ch) # of '(': # tok = newToken(LPAREN, self.ch) # of ')': # tok = newToken(RPAREN, self.ch) # of ',': # tok = newToken(COMMA, self.ch) # of '+': # tok = newToken(PLUS, self.ch) # else: # if isLetter(self.ch): # let l = self.readIdentifier() # let t = LookUpIdent(l) # return Token(Type: t, Literal: l) # elif isDigit(self.ch): # let t = token.INT # let l = self.readNumber() # return Token(Type: t, Literal: l) # else: # tok = newToken(token.ILLEGAL, self.ch) # # self.readNextChar() # tok # ``` # # What I have learned # # - two character token need to peek # - `=` and `==` # - `!` and `!=` # # ### use IF in Case..of # # ``` # case self.ch # of '=': # if self.peekChar() == '=': # let ch = self.ch # self.readNextChar() # let l = $ch & $self.ch # make `==` # tok = Token(Type: EQ, Literal: l) # else: # tok = newToken(ASSIGN, self.ch) # `=` # ``` # # Impression # # ### Test # # - test first # # ### Types # # - Which is more difficult than Dynamic or Static typing # # ### Grammar # # - not use `{`,`}` # - a nest with indents like Python or Nim # # # Next Step... # # - make a Parser!! # - need to learn Nim's ptr or interface