|
|
@@ -6,7 +6,7 @@ function GetTextStream(input){ |
|
|
|
var col = 0; |
|
|
|
|
|
|
|
function peek(){ |
|
|
|
return input.getChar(pos); |
|
|
|
return input.charAt(pos); |
|
|
|
} |
|
|
|
|
|
|
|
function next(){ |
|
|
@@ -23,17 +23,24 @@ function GetTextStream(input){ |
|
|
|
return (input.charAt(pos) === ""); |
|
|
|
} |
|
|
|
|
|
|
|
function getLine(){return line;} |
|
|
|
function getCol(){return col;} |
|
|
|
|
|
|
|
function die(msg){ |
|
|
|
throw new Error(msg + " Line: " + line + ", Col: " + col); |
|
|
|
} |
|
|
|
|
|
|
|
return { |
|
|
|
peek: peek, |
|
|
|
get: next, |
|
|
|
eof: eof |
|
|
|
next: next, |
|
|
|
eof: eof, |
|
|
|
line: getLine, |
|
|
|
col: getCol, |
|
|
|
die: die |
|
|
|
}; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------- |
|
|
|
// VALIDATORS |
|
|
|
// ---------------------------------------------- |
|
|
@@ -63,7 +70,7 @@ function isBinary(c){ |
|
|
|
} |
|
|
|
|
|
|
|
function isLabelStart(c){ |
|
|
|
return /[a-fA-F_]/i.test(c); |
|
|
|
return /[a-zA-Z_]/i.test(c); |
|
|
|
} |
|
|
|
|
|
|
|
function isLabel(c){ |
|
|
@@ -75,118 +82,142 @@ function isPunctuation(c){ |
|
|
|
} |
|
|
|
|
|
|
|
function isOp(c){ |
|
|
|
return ("=+-/*".indexOf(c) >= 0); |
|
|
|
return ("=+-/*#".indexOf(c) >= 0); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------- |
|
|
|
// TOKENIZER CLASS |
|
|
|
// ----------------------------------------------------------- |
|
|
|
|
|
|
|
class Tokenizer{ |
|
|
|
|
|
|
|
constructor(input){ |
|
|
|
this.__stream = GetTextStream(input); |
|
|
|
} |
|
|
|
|
|
|
|
genTokenObject(type, val){ |
|
|
|
return { |
|
|
|
type: type, |
|
|
|
val: val, |
|
|
|
line: this.__stream.line(), |
|
|
|
col: this.__stream.col() |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// ---------------------------------------------- |
|
|
|
// Read Operations |
|
|
|
// ---------------------------------------------- |
|
|
|
|
|
|
|
function skipComment(stream){ |
|
|
|
readWhile(stream, (c)=>{return c != "\n";}); |
|
|
|
stream.next(); |
|
|
|
} |
|
|
|
skipComment(){ |
|
|
|
this.readWhile((c)=>{return c != "\n";}); |
|
|
|
this.__stream.next(); |
|
|
|
} |
|
|
|
|
|
|
|
function readHex(stream){ |
|
|
|
stream.next(); |
|
|
|
var str = readWhile(stream, isHex); |
|
|
|
return {type:'number', val: parseInt(str, 16)}; |
|
|
|
} |
|
|
|
readHex(){ |
|
|
|
this.__stream.next(); |
|
|
|
var str = this.readWhile(isHex); |
|
|
|
return this.genTokenObject('number', parseInt(str, 16)); |
|
|
|
} |
|
|
|
|
|
|
|
function readBinary(stream){ |
|
|
|
stream.next(); |
|
|
|
var str = readWhile(stream, isBinary); |
|
|
|
return {type:'number', val: parseInt(str, 2)}; |
|
|
|
} |
|
|
|
readBinary(){ |
|
|
|
this.__stream.next(); |
|
|
|
var str = this.readWhile(isBinary); |
|
|
|
return this.genTokenObject('number', parseInt(str, 2)); |
|
|
|
} |
|
|
|
|
|
|
|
function readNumber(stream){ |
|
|
|
let c = stream.peek(); |
|
|
|
if (c === "$") |
|
|
|
return readHex(stream); |
|
|
|
if (c === "%") |
|
|
|
return readBinary(stream); |
|
|
|
|
|
|
|
var dot = false; |
|
|
|
var str = readWhile(stream, (c)=>{ |
|
|
|
if (c === "."){ |
|
|
|
if (dot){return false;} |
|
|
|
dot = true; |
|
|
|
return true; |
|
|
|
} |
|
|
|
return isDigit(c); |
|
|
|
}); |
|
|
|
return {type:'number', val:parseFloat(str)}; |
|
|
|
} |
|
|
|
readNumber(){ |
|
|
|
let c = this.__stream.peek(); |
|
|
|
if (c === "$") |
|
|
|
return this.readHex(); |
|
|
|
if (c === "%") |
|
|
|
return this.readBinary(); |
|
|
|
|
|
|
|
var str = this.readWhile(isDigit); |
|
|
|
return this.genTokenObject('number', parseInt(str)); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
function readString(stream, end){ |
|
|
|
var str = ""; |
|
|
|
var escaped = false; |
|
|
|
stream.next(); |
|
|
|
while (!stream.eof()){ |
|
|
|
let c = stream.next(); |
|
|
|
if (escaped){ |
|
|
|
str += c; |
|
|
|
escaped = false; |
|
|
|
} else if (c === "\\"){ |
|
|
|
escaped = true; |
|
|
|
} else if (c === end){ |
|
|
|
break; |
|
|
|
} else { |
|
|
|
str += c; |
|
|
|
readString(end){ |
|
|
|
var str = ""; |
|
|
|
var escaped = false; |
|
|
|
this.__stream.next(); |
|
|
|
while (!this.__stream.eof()){ |
|
|
|
let c = this.__stream.next(); |
|
|
|
if (escaped){ |
|
|
|
str += c; |
|
|
|
escaped = false; |
|
|
|
} else if (c === "\\"){ |
|
|
|
escaped = true; |
|
|
|
} else if (c === end){ |
|
|
|
break; |
|
|
|
} else { |
|
|
|
str += c; |
|
|
|
} |
|
|
|
} |
|
|
|
return this.genTokenObject('string', str); |
|
|
|
} |
|
|
|
return {type: "string", val: str}; |
|
|
|
} |
|
|
|
|
|
|
|
function readLabel(stream){ |
|
|
|
str = readWhile(stream, isLabel); |
|
|
|
return {type:"label", val: str}; |
|
|
|
} |
|
|
|
readDirective(){ |
|
|
|
var str = this.__stream.next() + this.readWhile(isLabel); |
|
|
|
return this.genTokenObject('directive', str); |
|
|
|
} |
|
|
|
|
|
|
|
function readWhile(stream, validator){ |
|
|
|
var str = ""; |
|
|
|
while (!stream.eof() && validator(stream.peek())) |
|
|
|
str += stream.next(); |
|
|
|
return str; |
|
|
|
} |
|
|
|
readLabel(){ |
|
|
|
var str = this.readWhile(isLabel); |
|
|
|
return this.genTokenObject('label', str); |
|
|
|
} |
|
|
|
|
|
|
|
function nextToken(stream){ |
|
|
|
readWhile(stream, isWhiteSpace); |
|
|
|
if (stream.eof()){return null;} |
|
|
|
let c = stream.peek(); |
|
|
|
if (c === ";"){ |
|
|
|
skipComment(stream); |
|
|
|
return nextToken(stream); |
|
|
|
} else if (isStringStart(c)){ |
|
|
|
return readString(stream, c); |
|
|
|
} else if (isNumType(c)){ |
|
|
|
return readNumber(stream); |
|
|
|
} else if (isLabelStart(c)){ |
|
|
|
return readLabel(stream); |
|
|
|
} else if (isPunctuation(c)){ |
|
|
|
return {type:"punctuation", val:stream.next()}; |
|
|
|
} else if (isMathOp(c)){ |
|
|
|
return {type:"op", val:stream.next()}; |
|
|
|
} |
|
|
|
stream.die("Unable to process character '" + c + "'."); |
|
|
|
} |
|
|
|
readWhile(validator){ |
|
|
|
var str = ""; |
|
|
|
while (!this.__stream.eof() && validator(this.__stream.peek())) |
|
|
|
str += this.__stream.next(); |
|
|
|
return str; |
|
|
|
} |
|
|
|
|
|
|
|
nextToken(){ |
|
|
|
this.readWhile(isWhiteSpace); |
|
|
|
if (this.__stream.eof()){return null;} |
|
|
|
let c = this.__stream.peek(); |
|
|
|
if (c === ";"){ |
|
|
|
this.skipComment(); |
|
|
|
return this.nextToken(); |
|
|
|
} else if (isStringStart(c)){ |
|
|
|
return this.readString(c); |
|
|
|
} else if (isNumType(c)){ |
|
|
|
return this.readNumber(); |
|
|
|
} else if (isLabelStart(c)){ |
|
|
|
return this.readLabel(); |
|
|
|
} else if (c === "."){ |
|
|
|
return this.readDirective(); |
|
|
|
} else if (isPunctuation(c)){ |
|
|
|
return this.genTokenObject('punc', this.__stream.next()); |
|
|
|
} else if (isOp(c)){ |
|
|
|
return this.genTokenObject('op', this.__stream.next()); |
|
|
|
} |
|
|
|
this.__stream.die("Unable to process character '" + c + "'."); |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
function tokenize(input){ |
|
|
|
var stream = GetTextStream(input); |
|
|
|
var tokenizer = new Tokenizer(input); |
|
|
|
var tokens = []; |
|
|
|
var t = nextToken(stream); |
|
|
|
var t = tokenizer.nextToken(); |
|
|
|
while (t !== null){ |
|
|
|
tokens.push(t); |
|
|
|
t = nextToken(stream); |
|
|
|
t = tokenizer.nextToken(); |
|
|
|
} |
|
|
|
return tokens; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
module.exports = tokenize; |
|
|
|
|
|
|
|
module.exports = Object.freeze({ |
|
|
|
Tokenizer:Tokenizer, |
|
|
|
tokenize:tokenize |
|
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|