用JS实现一个字节码运行器

2023-06-10 15:43:57 浏览数 (1)

JS运行字节码、JS实现一个bytecode运行系统、js虚拟机。

用JS实现一个简单的虚拟机、一个字节码运行系统。运行的代码原理类似下面的形式:

代码语言:javascript复制
 var program = [11,0,10,42,6,255,30,0,11,0,0,11,1,1,11,3,1,60,1,10,2,0,20,2,1,60,2,10,0,1,10,1,2,11,2,1,20,3,2,31,2,30,2,41,3,2,19,31,0,50];

run(program);

function run(program)
{
...
}

以下给出例程,该例程实现斐波那契(Fibonacci)算法。

斐波纳契数列以如下被以递归的方法定义:F(0)=0,F(1)=1, F(n)=F(n-1) F(n-2)(n>=2,n∈N*)

示例代码:

代码语言:javascript复制
//示例代码:
const Fn = (n) => {
    if(n === 0) { return 0; }
    if(n === 1) { return 1; }
    return Fn(n - 1)   Fn(n - 2);
}

用ASM汇编风格的JS代码实现:

代码语言:javascript复制
var code = `
// Loads value 10 in R0 
// and calls Fibonacci routine

MOVV R0, 10
CALL 6
HALT

// This is the Fibonacci routing
// Expects number of Fibonacci 
// numbers in register R0

PUSH R0
MOVV R0, 0
MOVV R1, 1
MOVV R3, 1
PRINT R1
MOVR R2, R0
ADD R2, R1
PRINT R2
MOVR R0, R1
MOVR R1, R2
MOVV R2, 1
ADD R3, R2
POP R2
PUSH R2
JL R3, R2, 19
POP R0
RET
`;

接下来是将ASM转为字节码:

代码语言:javascript复制
function assemble(code)
{
    var bytes = asm_assemble(code);
    return bytes.toString();
}
console.log(assemble(code));
var byte_code = assemble(code);

//运行字节码
function run(txtBytes, txtOutput){
    var bytes = getBytes(txtBytes);
    bytecode_init(bytes, txtOutput);
    bytecode_run();
}
run(byte_code)

function getBytes(txt){
    var bytes = txt.split(",");
    for(var i = 0; i < bytes.length; i  )
    {
        bytes[i] = parseInt(bytes[i]);
    }
    return bytes;
}

ByteCode字节码运行相关部分:

代码语言:javascript复制
// r0, r1, r2, r3
var regs = [0, 0, 0, 0];

var stack = [];

var program = [];

var pc = 0;

var halted = false;

var txtOutput = null;

function bytecode_init(prg, txtOut)
{
  program = prg;
  txtOutput = txtOut;
  
  pc = 0;
  halted = false;
  stack = [];
}

function bytecode_run()
{
  while(!halted)
  {
      runone();
  }
}

function runone()
{
  if (halted)
      return;

  var instr = program[pc];

  switch(instr)
  {
      // movr rdst, rsrc
      case 10:
          pc  ;
          var rdst = program[pc  ];
          var rsrc = program[pc  ];
          regs[rdst] = regs[rsrc];
          break;

      // movv rdst, val
      case 11:
          pc  ;
          var rdst = program[pc  ];
          var val = program[pc  ];
          regs[rdst] = val;
          break;

      // add rdst, rsrc
      case 20:
          pc  ;
          var rdst = program[pc  ];
          var rsrc = program[pc  ];
          regs[rdst]  = regs[rsrc];
          break;

      // sub rdst, rsrc
      case 21:
          pc  ;
          var rdst = program[pc  ];
          var rsrc = program[pc  ];
          regs[rdst] -= regs[rsrc];
          break;

      // push rsrc
      case 30:
          pc  ;
          var rsrc = program[pc  ];
          stack.push(regs[rsrc]);
          break;

      // pop rdst
      case 31:
          pc  ;
          var rdst = program[pc  ];
          regs[rdst] = stack.pop();
          break;

      // jp addr
      case 40:
          pc  ;
          var addr = program[pc  ];
          pc = addr;
          break;

      // jl r1, r2, addr
      case 41:
          pc  ;
          var r1 = program[pc  ];
          var r2 = program[pc  ];
          var addr = program[pc  ];
          if (regs[r1] < regs[r2])
              pc = addr
          break;

      // call addr
      case 42:
          pc  ;
          var addr = program[pc  ];
          stack.push(pc);
          pc = addr;
          break;

      // ret
      case 50:
          pc  ;
          var addr = stack.pop();
          pc = addr;
          break;

      // print reg
      case 60:
          pc  ;
          var reg = program[pc  ];
          println(regs[reg]);
          break;

      // halt
      case 255:
          pc  ;
          halted = true;
          break;
      
      default:
          println("Error in bytecode");
          halted = true;
          break;
  }
  
  if (pc >= program.length)
  {
      halted = true;
  }

}

function println(txt)
{
  console.log(txt)
  if (txtOutput)
      txtOutput.text  = txt   "n";
}

汇编语法风格处理部分:

代码语言:javascript复制
//ASM语法处理
var instructions = {
    MOVR : 10,
    MOVV : 11,
    ADD  : 20,
    SUB  : 21,
    PUSH : 30,
    POP  : 31,
    JP   : 40,
    JL   : 41,
    CALL : 42,
    RET  : 50,
    PRINT: 60,
    HALT : 255
}

var registers = {
    R0 : 0,
    R1 : 1,
    R2 : 2,
    R3 : 3
}

function asm_assemble(code)
{
    var tokens = getTokens(code);
    var bytes = getBytecode(tokens);

    return bytes;
}

function getBytecode(tokens)
{
    var bytes = [];
    
    for(var line of tokens)
    {
        for(var i = 0; i < line.length; i  ) 
        {
            var token = line[i].trim().toUpperCase();
            
            // First token in a line is assumed to be an instruction
            if(i == 0)
            {
                token = instructions[token];
                bytes.push(token ? token : -1);
            }
            else
            {
                // If operand start with R is assumed to be a register
                if (token.startsWith("R"))
                    token = registers[token];
                    
                bytes.push(parseInt(token));
            }
        }
    }
    
    return bytes;
}

function getTokens(code){
    var arLines = code.split(/r?n/);
    
    // Remove comments and empty lines
    for(var i = arLines.length - 1; i >= 0; i--){
        var txt = arLines[i].trim();
        if (!txt || txt.startsWith("//"))
        {
            arLines.splice(i, 1);
            continue;
        }
        
        // Split each line by " " or ,
        arLines[i] = txt.split(/[s,] /);
    }
    
    return arLines;
}  

将以上代码合并到一起,即可运行。运行时,先转换ASM代码为字节码,然后再解释执行字节码。

JS虚拟机、JS字节码(ByteCode)运行器,可用于JavaScript混淆加密,保护JS代码。但由于字节码或虚拟机中对于外部交互并不好处理,如前端的Dom、Node.JS中的环境变量等。所以,通常只能用于处理算法。如要对产品或项目中的JS代码混淆加密,建议使用JShaman这样专业的JS代码保护工具。

0 人点赞