Browse Source

Parser functional and turned into a class. Tokenizer now has a tokenize() method. Both Parser and Tokenizer have been moved from MOS/6502/assembler into the compiler folder. Both Parser and Tokenizer have op code identification generalized out... which should allow Parser and Tokenizer to be used for more than one CPU type.

master
Bryan Miller 5 years ago
parent
commit
54d32839f0
4 changed files with 376 additions and 418 deletions
  1. +0
    -96
      src/MOS/6502/assembler/op.js
  2. +0
    -304
      src/MOS/6502/assembler/parser.js
  3. +338
    -0
      src/compiler/parser.js
  4. +38
    -18
      src/compiler/tokenizer.js

+ 0
- 96
src/MOS/6502/assembler/op.js View File



// Each array orders op codes as...
// [<immediate>, <zero page>, <zero page X/Y>, <absolute>, <absolute X>, <absolute Y>, <Indirect X>, <Indirect Y>, <Indirect>, <accumulator>]
// If an opcode does not support a particular format, null will be in that space.
// NOTE: Opcode that do not have arguments will store their opcode value directly (no array).
const CODES = {
"ADC":[0x69, 0x65, 0x75, 0x6D, 0x7D, 0x79, 0x61, 0x71, null, null],
"AND":[0x29, 0x25, 0x35, 0x2D, 0x3D, 0x39, 0x21, 0x31, null, null],
"ASL":[null, 0x06, 0x16, 0x0E, 0x1E, null, null, null, null, 0x0A],
"BCC":0x90,
"BCS":0xB0,
"BEQ":0xF0,
"BIT":[null, 0x24, null, 0x2C, null, null, null, null, null, null],
"BMI":0x30,
"BNE":0xD0,
"BPL":0x10,
"BRK":0x00,
"BVC":0x50,
"BVS":0x70,
"CLC":0x18,
"CLD":0xD8,
"CLI":0x58,
"CLV":0xB8,
"CMP":[0xC9, 0xC5, 0xD5, 0xCD, 0xDD, 0xD9, 0xC1, 0xD1, null, null],
"CPX":[0xE0, 0xE4, null, 0xEC, null, null, null, null, null, null],
"CPY":[0xC0, 0xC4, null, 0xCC, null, null, null, null, null, null],
"DEC":[null, 0xC6, 0xD6, 0xCE, 0xDE, null, null, null, null, null],
"DEX":0xCA,
"DEY":0x88,
"EOR":[0x49, 0x45, 0x55, 0x4D, 0x5D, 0x59, 0x41, 0x51, null, null],
"INC":[null, 0xE6, 0xF6, 0xEE, 0xFE, null, null, null, null, null],
"INX":0xE8,
"INY":0xC8,
"JMP":[null, null, null, 0x4C, null, null, null, null, 0x6C, null],
"JSR":[null, null, null, 0x20, null, null, null, null, null, null],
"LDA":[0xA9, 0xA5, 0xB5, 0xAD, 0xBD, 0xB9, 0xA1, 0xB1, null, null],
"LDX":[0xA2, 0xA6, 0xB6, 0xAE, 0xBE, null, null, null, null, null],
"LDY":[0xA0, 0xA4, 0xB4, 0xAC, 0xBC, null, null, null, null, null],
"LSR":[null, 0x46, 0x56, 0x4E, 0x5E, null, null, null, null, 0x4A],
"NOP":0xEA,
"ORA":[0x09, 0x05, 0x15, 0x0D, 0x1D, 0x19, 0x01, 0x11, null, null],
"PHA":0x48,
"PHP":0x08,
"PLA":0x68,
"PLP":0x28,
"ROL":[null, 0x26, 0x36, 0x2E, 0x3E, null, null, null, null, 0x2A],
"ROR":[null, 0x66, 0x76, 0x6E, 0x7E, null, null, null, null, 0x6A],
"RTI":0x40,
"RTS":0x60,
"SBC":[0xE9, 0xE5, 0xF5, 0xED, 0xFD, 0xF9, 0xE1, 0xF1, null, null],
"SEC":0x38,
"SED":0xF8,
"SEI":0x78,
"STA":[null, 0x85, 0x95, 0x8D, 0x9D, 0x99, 0x81, 0x91, null, null],
"STX":[null, 0x86, 0x96, 0x8E, null, null, null, null, null, null],
"STY":[null, 0x84, 0x94, 0x8C, null, null, null, null, null, null],
"TAX":0xAA,
"TAY":0xA8,
"TSX":0xBA,
"TXA":0x8A,
"TXS":0x9A,
"TYA":0x98
};
var NAMES = Object.keys(CODES);



module.exports = Object.freeze({
MODES:{
IMMEDIATE: 0,
ZEROPAGE: 1,
ZEROPAGEXY: 2,
ABSOLUTE: 3,
ABSOLUTEX: 4,
ABSOLUTEY: 5,
INDIRECTX: 6,
INDIRECTY: 7,
INDIRECT: 8,
ACCUMULATOR: 9
},
CODES: CODES,
NAMES: NAMES,
isCode:function(op){return (NAMES.indexOf(op) >= 0);},
getCode:function(op, mode){
if (NAMES.indexOf(op) >= 0){
if (typeof(CODES[op]) === 'number')
return CODES[op];
return (!(mode >= 0 && mode <= 9)) ? CODES[op][mode] : null;
}
return null;
}
});




+ 0
- 304
src/MOS/6502/assembler/parser.js View File

const OP = require('./op.js');

const PRECEDENCE = {
"#": 0, // Precedence 0 should be ignored!
"=": 1,
"<":7, "<=":7, ">":7, ">=":7, "==":7, "!=":7,
"+": 10, "-": 10,
"*": 20, "/": 20
};

function TokenStream(input){
var pos = 0;

function peek(){
return (pos < input.length) ? input[pos] : null;
}

function next(){
if (pos < input.length){
let v = input[pos];
pos += 1;
return v;
}
return null;
}

function line(){
return (pos < input.length) ? input[pos].line : -1;
}

function col(){
return (pos < input.length) ? input[pos].col : -1;
}

function eol(){
return (pos > 0) ? (input[pos-1].line !== input[pos].line) : true;
}

function eof(){
return (pos >= input.length);
}

function die(msg){
throw new Error(msg + " Line: " + input[pos].line + ", Col: " + input[pos].col);
}

return {
peek: peek,
next: next,
line: line,
col: col,
eol: eol,
eof, eof,
die: die
};
}
var stream = null;

function isTokenType(type, val){
let t = stream.peek();
return (t && t.type === type && (!val || t.val === val) && t);
}

function isPunc(ch){
return isTokenType("punc", ch);
}

function isOpCode(ch){
return isTokenType("opcode", ch);
}

function isLabel(ch){
return isTokenType("label", ch);
}

function isDirective(ch){
return isTokenType("directive", ch);
}

function isOp(ch){
return isTokenType("op", ch);
}


function skipPunc(ch){
if (!isPunc(ch))
stream.die("Unexpected punctuation '" + ch + "'.");
stream.next();
}


function skipDirective(ch){
if (!isDirective(ch))
stream.die("Unexpected directive '" + ch + "'.");
stream.next();
}


function parseDelimited(s,e,d,parser){
let toEOL = (s === null || e === null);
let a = [];
let first = true;
if (!toEOL){skipPunc(s);}
while (!stream.eof() && ((!toEOL && isPunc(e)) || (toEOL && !stream.eol()))){
if (first){
first = false;
} else {skipPunc(d);}
a.push(parser());
}
if (!toEOL){skipPunc(e);}
return a;
}

function parseByteDirective(){
let line = stream.line();
let col = stream.col();
stream.next();
return {
type: "directive",
op: ".byte",
args: parseDelimited(null, null, ",", parseExpression),
line: line,
col: col
}
}


function parseElseIfDirective(){
skipDirective(".elseif");
let cond = parseExpression();
skipDirective(".then");
let then = parseBlock([".else", ".elif", ".endif"]);
let f = {
type: "directive",
op: "if",
cond: cond,
then: then
}
if (isDirective(".elif")){
f["else"] = parseElseIfDirective();
} else if (isDirective(".else")){
f["else"] = parseBlock([".endif"]);
} else if(!isDirective(".endif")){
stream.die("Expected '.endif' Directive.");
}
return f;
}


function parseIfDirective(){
skipDirective(".if");
let cond = parseExpression();
skipDirective(".then");
let then = parseExpression();
let f = {
type: "directive",
op: "if",
cond: cond,
then: then
};
if (isDirective(".elif")){
f["else"] = parseElseIfDirective();
} else if (isDirective(".else")){
f["else"] = parseBlock([".endif"]);
}
skipDirective(".endif");
return f;
}


function parseOpCode(){
let line = stream.line();
let col = stream.col();
let val = stream.next();
let mode = 0; // Guess between absolute and zero page.
if (isOp("#")){
stream.next();
mode = 1; // Immediate
} else if (isPunc("(")){
mode = 2; // Indirect
}
return {
type: "opcode",
op: val,
args: parseDelimited(null, null, ",", parseExpression),
mode: mode,
line: line,
col: col
};
}

function parseAtom(){
if (isPunc("(")){
stream.next();
let exp = parseExpression();
if (isPunc(")")){
stream.next();
return exp;
}
} else if (isOpCode()){
return parseOpCode();
} else if (isDirective(".if")){
return parseIfDirective();
} else if (isDirective(".bytes")){
return parseByteDirective();
}

let tok = stream.next();
if (tok.type === "number" || tok.type === "string" || tok.type === "label")
return tok;
stream.die("Unexpected token {type:" + tok.type + ", val:'" + tok.val + "'}.");
}


function parseExpression(){
return maybeCall(function(){
return maybeBinary(parseAtom(), 0);
});
}

function parseCall(func){
return {
type: "call",
func: func,
args: parseDelimited("(", ")", ",", parseExpression)
};
}

function maybeCall(expr){
expr = expr();
return (isOp("(")) ? parseCall(expr) : expr;
}

function maybeBinary(left, pres){
let tok = isOp();
if (tok){
let cpres = PRECEDENCE[tok.val];
if (cpres > pres){
stream.next();
return maybeBinary({
type: (tok.val === "=") ? "assign" : "binary",
op: tok.val,
left: left,
right: maybeBinary(parseAtom(), cpres),
line: tok.line,
col: tok.col
}, pres);
}
}
return left;
}


function parseBlock(bed){
let exp = [];
let isBlockEnd = (t) => {
return (bed && t.type === 'directive' && bed.indexOf(t.val) >= 0);
}
while (!stream.eof()){
if (isBlockEnd(stream.peek()))
break;
let e = parseExpression();
if (e.type === "label"){
e = {
type: "assign",
op: "=",
left: e,
right: "*", // To designate "current program counter".
line: e.line,
col: e.col
}
}
exp.push(e);
if (!stream.eol())
stream.die("Expected End of Line.");
}
return exp;
}

function parseProg(ed){
let line = stream.line();
let col = stream.col();
return {
type: "prog",
expressions: parseBlock(),
line: line,
col: col
};
}

function parse(tokens){
stream = TokenStream(tokens);
let p = parseProg();
stream = null;
return p;
}


module.exports = Object.freeze({
parse: parse
});




+ 338
- 0
src/compiler/parser.js View File

//const OP = require('./op.js');

const PRECEDENCE = {
"#": 0, // Precedence 0 should be ignored!
"=": 1,
"<":7, "<=":7, ">":7, ">=":7, "==":7, "!=":7,
"+": 10, "-": 10,
"*": 20, "/": 20
};

function TokenStream(input){
var pos = 0;

function peek(){
return (pos < input.length) ? input[pos] : null;
}

function next(){
if (pos < input.length){
let v = input[pos];
pos += 1;
return v;
}
return null;
}

function line(){
return (pos < input.length) ? input[pos].line : -1;
}

function col(){
return (pos < input.length) ? input[pos].col : -1;
}

function eol(){
return (pos > 0 && pos < input.length) ? (input[pos-1].line !== input[pos].line) : true;
}

function eof(){
return (pos >= input.length);
}

function die(msg){
throw new Error(msg + " Line: " + input[pos].line + ", Col: " + input[pos].col);
}

function getPos(){return pos;}

return {
peek: peek,
next: next,
line: line,
col: col,
pos: getPos,
eol: eol,
eof, eof,
die: die
};
}
// var stream = null;
// var SKIPEOL = false;


function isTokenType(stream, type, val){
let t = stream.peek();
return (t && t.type === type && (!val || t.val === val) && t);
}


class Parser{
constructor(pof){
if (typeof(pof) !== 'function')
throw new TypeError("Expected op code parsing function.");
this.__stream = null;
this.__SKIPEOL = false;
this.__output = null;
this.parseOpCode = pof(this);
}

set tokens(t){
this.__stream = new TokenStream(t);
this.__output = null;
this.__SKIPEOL = false;
}
get stream(){return this.__stream;}

isPunc(ch){
return isTokenType(this.stream, "punc", ch);
}

isOpCode(ch){
return isTokenType(this.stream, "opcode", ch);
}

isLabel(ch){
return isTokenType(this.stream, "label", ch);
}

isDirective(ch){
return isTokenType(this.stream, "directive", ch);
}

isOp(ch){
return isTokenType(this.stream, "op", ch);
}


skipPunc(ch){
if (!this.isPunc(ch))
this.stream.die("Unexpected punctuation '" + ch + "'.");
this.stream.next();
}


skipDirective(ch){
if (!this.isDirective(ch))
this.stream.die("Unexpected directive '" + ch + "'.");
this.stream.next();
}


parseDelimited(s,e,d,parser){
let toEOL = (s === null || e === null);
let a = [];
let first = true;
if (!toEOL){this.skipPunc(s);}
while (!this.stream.eof() && ((!toEOL && this.isPunc(e)) || (toEOL && !this.stream.eol()))){
if (first){
first = false;
} else {this.skipPunc(d);}
a.push(parser());
}
if (!toEOL){skipPunc(e);}
return a;
}

parseByteDirective(){
let line = this.stream.line();
let col = this.stream.col();
this.stream.next();
return {
type: "directive",
op: ".byte",
args: this.parseDelimited(null, null,
",",
this.parseExpression.bind(this)
),
line: line,
col: col
}
}


parseElseIfDirective(){
this.skipDirective(".elif");
let cond = this.parseExpression();
this.skipDirective(".then");
let then = this.parseBlock([".else", ".elif", ".endif"]);
let f = {
type: "directive",
op: "if",
cond: cond,
then: then
}
if (this.isDirective(".elif")){
f["else"] = this.parseElseIfDirective();
} else if (this.isDirective(".else")){
this.skipDirective(".else");
f["else"] = this.parseBlock([".endif"]);
} else if(!this.isDirective(".endif")){
this.stream.die("Expected '.endif' Directive.");
}
return f;
}


parseIfDirective(){
this.skipDirective(".if");
let cond = this.parseExpression();
this.skipDirective(".then");
let then = this.parseBlock([".elif", ".else", ".endif"]);
let f = {
type: "directive",
op: "if",
cond: cond,
then: then
};
if (this.isDirective(".elif")){
f["else"] = this.parseElseIfDirective();
} else if (this.isDirective(".else")){
this.skipDirective(".else");
f["else"] = this.parseBlock([".endif"]);
}
this.skipDirective(".endif");
return f;
}

parseLabel(t){
if (!this.stream.eof()){
if (this.stream.peek().type === "opcode"){
this.__SKIPEOL = true;
return {
type: "assign",
op: "=",
left: t,
right: "*", // To designate "current program counter".
line: t.line,
col: t.col
}
}
}
return t;
}

parseAtom(){
if (this.isPunc("(")){
this.stream.next();
let exp = this.parseExpression();
if (this.isPunc(")")){
this.stream.next();
return exp;
}
} else if (this.isOpCode()){
return this.parseOpCode();
} else if (this.isDirective(".if")){
return this.parseIfDirective();
} else if (this.isDirective(".bytes")){
return this.parseByteDirective();
}

let tok = this.stream.next();
if (tok.type === "number" || tok.type === "string" || tok.type === "label"){
if (tok.type === "label")
return this.parseLabel(tok);
return tok;
}
this.stream.die("Unexpected token {type:" + tok.type + ", val:'" + tok.val + "'}.");
}


parseExpression(){
return this.maybeCall((function(){
return this.maybeBinary(this.parseAtom(), 0);
}).bind(this));
}

parseCall(func){
return {
type: "call",
func: func,
args: this.parseDelimited(
"(", ")",
",",
this.parseExpression.bind(this)
)
};
}

maybeCall(expr){
expr = expr();
return (this.isOp("(")) ? this.parseCall(expr) : expr;
}

maybeBinary(left, pres){
let tok = this.isOp();
if (tok){
let cpres = PRECEDENCE[tok.val];
if (cpres > pres){
this.stream.next();
return this.maybeBinary({
type: (tok.val === "=") ? "assign" : "binary",
op: tok.val,
left: left,
right: this.maybeBinary(this.parseAtom(), cpres),
line: tok.line,
col: tok.col
}, pres);
}
}
return left;
}


parseBlock(bed){
let line = this.stream.line();
let col = this.stream.col();
let exp = [];
let isBlockEnd = (t) => {
return (bed && t.type === 'directive' && bed.indexOf(t.val) >= 0);
}
while (!this.stream.eof()){
if (isBlockEnd(this.stream.peek()))
break;
exp.push(this.parseExpression());
if (!this.stream.eol()){
if (!this.__SKIPEOL)
this.stream.die("Expected End of Line.");
this.__SKIPEOL = false;
}
}
return {
type: "block",
expressions: exp,
line: line,
col: col
};
}

parseProg(){
let line = this.stream.line();
let col = this.stream.col();
return {
type: "prog",
block: this.parseBlock(),
line: line,
col: col
};
}

parse(tokens){
if (tokens)
this.tokens = tokens;

if (this.__stream !== null){
if (this.__output === null && this.__stream.pos() === 0)
this.__output = this.parseProg();
return this.__output;
}
return null;
}
}


module.exports = Parser;




src/MOS/6502/assembler/tokenizer.js → src/compiler/tokenizer.js View File

const OP = require('./op.js');


function GetTextStream(input){ function GetTextStream(input){
var pos = 0; var pos = 0;
return (input.charAt(pos) === ""); return (input.charAt(pos) === "");
} }


function getPos(){return pos;}
function getLength(){return input.length;}

function getLine(){return line;} function getLine(){return line;}
function getCol(){return col;} function getCol(){return col;}


eof: eof, eof: eof,
line: getLine, line: getLine,
col: getCol, col: getCol,
pos: getPos,
length: getLength,
die: die die: die
}; };
} }


class Tokenizer{ class Tokenizer{


constructor(input){
this.__stream = GetTextStream(input);
constructor(op, input){
if (!(op.hasOwnProperty("isCode") && op.hasOwnProperty("getCode")))
throw new Error("OP Code definition object missing required method(s).");
this.__stream = (input) ? GetTextStream(input) : GetTextStream("");
this.__output = null;
this.__op = op;
}

set input(i){
if (typeof(i) === 'string'){
this.__stream = GetTextStream(i);
this.__output = null;
}
} }
get stream(){return this.__stream;}



genTokenObject(type, val, line, col){ genTokenObject(type, val, line, col){
return { return {


readLabel(){ readLabel(){
var str = this.readWhile(isLabel); var str = this.readWhile(isLabel);
if (OP.isCode(str.toUpperCase()))
if (this.__op.isCode(str.toUpperCase()))
return this.genTokenObject('opcode', str.toUpperCase()); return this.genTokenObject('opcode', str.toUpperCase());
return this.genTokenObject('label', str); return this.genTokenObject('label', str);
} }
this.__stream.die("Unable to process character '" + c + "'."); this.__stream.die("Unable to process character '" + c + "'.");
} }


}
tokenize(input){
if (input)
this.input = input;


function tokenize(input){
var tokenizer = new Tokenizer(input);
var tokens = [];
var t = tokenizer.nextToken();
while (t !== null){
tokens.push(t);
t = tokenizer.nextToken();
if (this.__stream.length() > 0){
if (this.__output === null && this.__stream.pos() === 0){
this.__output = [];
let t = this.nextToken();
while (t !== null){
this.__output.push(t);
t = this.nextToken();
}
}
return this.__output;
}
return [];
} }
return tokens;
} }





module.exports = Object.freeze({
Tokenizer:Tokenizer,
tokenize:tokenize
});
module.exports = Tokenizer;







Loading…
Cancel
Save