Browse Source

Tokenizer finished.

master
Bryan Miller 5 years ago
parent
commit
9a3cb5162f
2 changed files with 122 additions and 88 deletions
  1. +3
    -0
      .gitignore
  2. +119
    -88
      src/MOS/6502/assembler/tokenizer.js

+ 3
- 0
.gitignore View File

@@ -1,3 +1,6 @@

node_modules/*
package-lock.json
dummy.js
test.asm


+ 119
- 88
src/MOS/6502/assembler/tokenizer.js View File

@@ -6,7 +6,7 @@ function GetTextStream(input){
var col = 0;

function peek(){
return input.getChar(pos);
return input.charAt(pos);
}

function next(){
@@ -23,17 +23,24 @@ function GetTextStream(input){
return (input.charAt(pos) === "");
}

function getLine(){return line;}
function getCol(){return col;}

function die(msg){
throw new Error(msg + " Line: " + line + ", Col: " + col);
}

return {
peek: peek,
get: next,
eof: eof
next: next,
eof: eof,
line: getLine,
col: getCol,
die: die
};
}


// ----------------------------------------------
// VALIDATORS
// ----------------------------------------------
@@ -63,7 +70,7 @@ function isBinary(c){
}

function isLabelStart(c){
return /[a-fA-F_]/i.test(c);
return /[a-zA-Z_]/i.test(c);
}

function isLabel(c){
@@ -75,118 +82,142 @@ function isPunctuation(c){
}

function isOp(c){
return ("=+-/*".indexOf(c) >= 0);
return ("=+-/*#".indexOf(c) >= 0);
}


// -----------------------------------------------------------
// TOKENIZER CLASS
// -----------------------------------------------------------

class Tokenizer{

constructor(input){
this.__stream = GetTextStream(input);
}

genTokenObject(type, val){
return {
type: type,
val: val,
line: this.__stream.line(),
col: this.__stream.col()
}
}

// ----------------------------------------------
// Read Operations
// ----------------------------------------------

function skipComment(stream){
readWhile(stream, (c)=>{return c != "\n";});
stream.next();
}
skipComment(){
this.readWhile((c)=>{return c != "\n";});
this.__stream.next();
}

function readHex(stream){
stream.next();
var str = readWhile(stream, isHex);
return {type:'number', val: parseInt(str, 16)};
}
readHex(){
this.__stream.next();
var str = this.readWhile(isHex);
return this.genTokenObject('number', parseInt(str, 16));
}

function readBinary(stream){
stream.next();
var str = readWhile(stream, isBinary);
return {type:'number', val: parseInt(str, 2)};
}
readBinary(){
this.__stream.next();
var str = this.readWhile(isBinary);
return this.genTokenObject('number', parseInt(str, 2));
}

function readNumber(stream){
let c = stream.peek();
if (c === "$")
return readHex(stream);
if (c === "%")
return readBinary(stream);

var dot = false;
var str = readWhile(stream, (c)=>{
if (c === "."){
if (dot){return false;}
dot = true;
return true;
}
return isDigit(c);
});
return {type:'number', val:parseFloat(str)};
}
readNumber(){
let c = this.__stream.peek();
if (c === "$")
return this.readHex();
if (c === "%")
return this.readBinary();

var str = this.readWhile(isDigit);
return this.genTokenObject('number', parseInt(str));
}


function readString(stream, end){
var str = "";
var escaped = false;
stream.next();
while (!stream.eof()){
let c = stream.next();
if (escaped){
str += c;
escaped = false;
} else if (c === "\\"){
escaped = true;
} else if (c === end){
break;
} else {
str += c;
readString(end){
var str = "";
var escaped = false;
this.__stream.next();
while (!this.__stream.eof()){
let c = this.__stream.next();
if (escaped){
str += c;
escaped = false;
} else if (c === "\\"){
escaped = true;
} else if (c === end){
break;
} else {
str += c;
}
}
return this.genTokenObject('string', str);
}
return {type: "string", val: str};
}

function readLabel(stream){
str = readWhile(stream, isLabel);
return {type:"label", val: str};
}
readDirective(){
var str = this.__stream.next() + this.readWhile(isLabel);
return this.genTokenObject('directive', str);
}

function readWhile(stream, validator){
var str = "";
while (!stream.eof() && validator(stream.peek()))
str += stream.next();
return str;
}
readLabel(){
var str = this.readWhile(isLabel);
return this.genTokenObject('label', str);
}

function nextToken(stream){
readWhile(stream, isWhiteSpace);
if (stream.eof()){return null;}
let c = stream.peek();
if (c === ";"){
skipComment(stream);
return nextToken(stream);
} else if (isStringStart(c)){
return readString(stream, c);
} else if (isNumType(c)){
return readNumber(stream);
} else if (isLabelStart(c)){
return readLabel(stream);
} else if (isPunctuation(c)){
return {type:"punctuation", val:stream.next()};
} else if (isMathOp(c)){
return {type:"op", val:stream.next()};
}
stream.die("Unable to process character '" + c + "'.");
}
readWhile(validator){
var str = "";
while (!this.__stream.eof() && validator(this.__stream.peek()))
str += this.__stream.next();
return str;
}

nextToken(){
this.readWhile(isWhiteSpace);
if (this.__stream.eof()){return null;}
let c = this.__stream.peek();
if (c === ";"){
this.skipComment();
return this.nextToken();
} else if (isStringStart(c)){
return this.readString(c);
} else if (isNumType(c)){
return this.readNumber();
} else if (isLabelStart(c)){
return this.readLabel();
} else if (c === "."){
return this.readDirective();
} else if (isPunctuation(c)){
return this.genTokenObject('punc', this.__stream.next());
} else if (isOp(c)){
return this.genTokenObject('op', this.__stream.next());
}
this.__stream.die("Unable to process character '" + c + "'.");
}

}

function tokenize(input){
var stream = GetTextStream(input);
var tokenizer = new Tokenizer(input);
var tokens = [];
var t = nextToken(stream);
var t = tokenizer.nextToken();
while (t !== null){
tokens.push(t);
t = nextToken(stream);
t = tokenizer.nextToken();
}
return tokens;
}


module.exports = tokenize;

module.exports = Object.freeze({
Tokenizer:Tokenizer,
tokenize:tokenize
});





Loading…
Cancel
Save