js引擎v8源码解析之token(基于0.1.5)

2019-07-30 18:32:22 浏览数 (1)

代码语言:javascript复制

#ifndef V8_TOKEN_H_
#define V8_TOKEN_H_

namespace v8 { namespace internal {

// TOKEN_LIST takes a list of 3 macros M, all of which satisfy the
// same signature M(name, string, precedence), where name is the
// symbolic token name, string is the corresponding syntactic symbol
// (or NULL, for literals), and precedence is the precedence (or 0).
// The parameters are invoked for token categories as follows:
//
//   T: Non-keyword tokens
//   K: Keyword tokens
//   F: Future (reserved) keyword tokens

// IGNORE_TOKEN is a convenience macro that can be supplied as
// an argument (at any position) for a TOKEN_LIST call. It does
// nothing with tokens belonging to the respective category.

#define IGNORE_TOKEN(name, string, precedence)

#define TOKEN_LIST(T, K, F)                                             
  /* End of source indicator. */                                        
  T(EOS, "EOS", 0)                                                      
                                                                        
  /* Punctuators (ECMA-262, section 7.7, page 15). */                   
  T(LPAREN, "(", 0)                                                     
  T(RPAREN, ")", 0)                                                     
  T(LBRACK, "[", 0)                                                     
  T(RBRACK, "]", 0)                                                     
  T(LBRACE, "{", 0)                                                     
  T(RBRACE, "}", 0)                                                     
  T(COLON, ":", 0)                                                      
  T(SEMICOLON, ";", 0)                                                  
  T(PERIOD, ".", 0)                                                     
  T(CONDITIONAL, "?", 3)                                                
  T(INC, "  ", 0)                                                       
  T(DEC, "--", 0)                                                       
                                                                        
  /* Assignment operators. */                                           
  /* IsAssignmentOp() relies on this block of enum values */            
  /* being contiguous and sorted in the same order! */                  
  T(INIT_VAR, "=init_var", 2)  /* AST-use only. */                      
  T(INIT_CONST, "=init_const", 2)  /* AST-use only. */                  
  T(ASSIGN, "=", 2)                                                     
  T(ASSIGN_BIT_OR, "|=", 2)                                             
  T(ASSIGN_BIT_XOR, "^=", 2)                                            
  T(ASSIGN_BIT_AND, "&=", 2)                                            
  T(ASSIGN_SHL, "<<=", 2)                                               
  T(ASSIGN_SAR, ">>=", 2)                                               
  T(ASSIGN_SHR, ">>>=", 2)                                              
  T(ASSIGN_ADD, " =", 2)                                                
  T(ASSIGN_SUB, "-=", 2)                                                
  T(ASSIGN_MUL, "*=", 2)                                                
  T(ASSIGN_DIV, "/=", 2)                                                
  T(ASSIGN_MOD, "%=", 2)                                                
                                                                        
  /* Binary operators sorted by precedence. */                          
  /* IsBinaryOp() relies on this block of enum values */                
  /* being contiguous and sorted in the same order! */                  
  T(COMMA, ",", 1)                                                      
  T(OR, "||", 4)                                                        
  T(AND, "&&", 5)                                                       
  T(BIT_OR, "|", 6)                                                     
  T(BIT_XOR, "^", 7)                                                    
  T(BIT_AND, "&", 8)                                                    
  T(SHL, "<<", 11)                                                      
  T(SAR, ">>", 11)                                                      
  T(SHR, ">>>", 11)                                                     
  T(ADD, " ", 12)                                                       
  T(SUB, "-", 12)                                                       
  T(MUL, "*", 13)                                                       
  T(DIV, "/", 13)                                                       
  T(MOD, "%", 13)                                                       
                                                                        
  /* Compare operators sorted by precedence. */                         
  /* IsCompareOp() relies on this block of enum values */               
  /* being contiguous and sorted in the same order! */                  
  T(EQ, "==", 9)                                                        
  T(NE, "!=", 9)                                                        
  T(EQ_STRICT, "===", 9)                                                
  T(NE_STRICT, "!==", 9)                                                
  T(LT, "<", 10)                                                        
  T(GT, ">", 10)                                                        
  T(LTE, "<=", 10)                                                      
  T(GTE, ">=", 10)                                                      
  K(INSTANCEOF, "instanceof", 10)                                       
  K(IN, "in", 10)                                                       
                                                                        
  /* Unary operators. */                                                
  /* IsUnaryOp() relies on this block of enum values */                 
  /* being contiguous and sorted in the same order! */                  
  T(NOT, "!", 0)                                                        
  T(BIT_NOT, "~", 0)                                                    
  K(DELETE, "delete", 0)                                                
  K(TYPEOF, "typeof", 0)                                                
  K(VOID, "void", 0)                                                    
                                                                        
  /* Keywords (ECMA-262, section 7.5.2, page 13). */                    
  K(BREAK, "break", 0)                                                  
  K(CASE, "case", 0)                                                    
  K(CATCH, "catch", 0)                                                  
  K(CONTINUE, "continue", 0)                                            
  K(DEBUGGER, "debugger", 0)                                            
  K(DEFAULT, "default", 0)                                              
  /* DELETE */                                                          
  K(DO, "do", 0)                                                        
  K(ELSE, "else", 0)                                                    
  K(FINALLY, "finally", 0)                                              
  K(FOR, "for", 0)                                                      
  K(FUNCTION, "function", 0)                                            
  K(IF, "if", 0)                                                        
  /* IN */                                                              
  /* INSTANCEOF */                                                      
  K(NEW, "new", 0)                                                      
  K(RETURN, "return", 0)                                                
  K(SWITCH, "switch", 0)                                                
  K(THIS, "this", 0)                                                    
  K(THROW, "throw", 0)                                                  
  K(TRY, "try", 0)                                                      
  /* TYPEOF */                                                          
  K(VAR, "var", 0)                                                      
  /* VOID */                                                            
  K(WHILE, "while", 0)                                                  
  K(WITH, "with", 0)                                                    
                                                                        
  /* Future reserved words (ECMA-262, section 7.5.3, page 14). */       
  F(ABSTRACT, "abstract", 0)                                            
  F(BOOLEAN, "boolean", 0)                                              
  F(BYTE, "byte", 0)                                                    
  F(CHAR, "char", 0)                                                    
  F(CLASS, "class", 0)                                                  
  K(CONST, "const", 0)                                                  
  F(DOUBLE, "double", 0)                                                
  F(ENUM, "enum", 0)                                                    
  F(EXPORT, "export", 0)                                                
  F(EXTENDS, "extends", 0)                                              
  F(FINAL, "final", 0)                                                  
  F(FLOAT, "float", 0)                                                  
  F(GOTO, "goto", 0)                                                    
  F(IMPLEMENTS, "implements", 0)                                        
  F(IMPORT, "import", 0)                                                
  F(INT, "int", 0)                                                      
  F(INTERFACE, "interface", 0)                                          
  F(LONG, "long", 0)                                                    
  K(NATIVE, "native", 0)                                                
  F(PACKAGE, "package", 0)                                              
  F(PRIVATE, "private", 0)                                              
  F(PROTECTED, "protected", 0)                                          
  F(PUBLIC, "public", 0)                                                
  F(SHORT, "short", 0)                                                  
  F(STATIC, "static", 0)                                                
  F(SUPER, "super", 0)                                                  
  F(SYNCHRONIZED, "synchronized", 0)                                    
  F(THROWS, "throws", 0)                                                
  F(TRANSIENT, "transient", 0)                                          
  F(VOLATILE, "volatile", 0)                                            
                                                                        
  /* Literals (ECMA-262, section 7.8, page 16). */                      
  K(NULL_LITERAL, "null", 0)                                            
  K(TRUE_LITERAL, "true", 0)                                            
  K(FALSE_LITERAL, "false", 0)                                          
  T(NUMBER, NULL, 0)                                                    
  T(STRING, NULL, 0)                                                    
                                                                        
  /* Identifiers (not keywords or future reserved words). */            
  T(IDENTIFIER, NULL, 0)                                                
                                                                        
  /* Illegal token - not able to scan. */                               
  T(ILLEGAL, "ILLEGAL", 0)                                              
                                                                        
  /* Scanner-internal use only. */                                      
  T(COMMENT, NULL, 0)


class Token {
 public:
  // All token values.
// 定义宏T
#define T(name, string, precedence) name,
  enum Value {
    /*
      TOKEN_LIST宏展开后变成
       T(EOS, "EOS", 0)  
       ...
       然后T宏展开后变成
        EOS
        ...
        F开头的会被忽略,因为F等于IGNORE_TOKEN
    */
    TOKEN_LIST(T, T, IGNORE_TOKEN)
    NUM_TOKENS
  };
#undef T

#ifdef DEBUG
  // Returns a string corresponding to the C   token name
  // (e.g. "LT" for the token LT).
  static const char* Name(Value tok) {
    ASSERT(0 <= tok && tok < NUM_TOKENS);
    return name_[tok];
  }
#endif
  // 判断token字符语义的函数
  // Predicates
  static bool IsAssignmentOp(Value tok) {
    return INIT_VAR <= tok && tok <= ASSIGN_MOD;
  }

  static bool IsBinaryOp(Value op) {
    return COMMA <= op && op <= MOD;
  }

  static bool IsCompareOp(Value op) {
    return EQ <= op && op <= IN;
  }

  static bool IsBitOp(Value op) {
    return (BIT_OR <= op && op <= SHR) || op == BIT_NOT;
  }

  static bool IsUnaryOp(Value op) {
    return (NOT <= op && op <= VOID) || op == ADD || op == SUB;
  }

  static bool IsCountOp(Value op) {
    return op == INC || op == DEC;
  }

  // Returns a string corresponding to the JS token string
  // (.e., "<" for the token LT) or NULL if the token doesn't
  // have a (unique) string (e.g. an IDENTIFIER).
  // 见token.cc关于string_的定义
  static const char* String(Value tok) {
    ASSERT(0 <= tok && tok < NUM_TOKENS);
    return string_[tok];
  }

  // Returns the precedence > 0 for binary and compare
  // operators; returns 0 otherwise.
  static int Precedence(Value tok) {
    ASSERT(0 <= tok && tok < NUM_TOKENS);
    return precedence_[tok];
  }

  // Returns the keyword value if str is a keyword;
  // returns IDENTIFIER otherwise. The class must
  // have been initialized.
  static Value Lookup(const char* str);

  // Must be called once to initialize the class.
  // Multiple calls are ignored.
  static void Initialize();

 private:
#ifdef DEBUG
  static const char* name_[NUM_TOKENS];
#endif
  static const char* string_[NUM_TOKENS];
  static int8_t precedence_[NUM_TOKENS];
};

} }  // namespace v8::internal

#endif  // V8_TOKEN_H_

token.cc

代码语言:javascript复制

#include "v8.h"

#include "token.h"

namespace v8 { namespace internal {

#ifdef DEBUG
#define T(name, string, precedence) #name,
const char* Token::name_[NUM_TOKENS] = {
  TOKEN_LIST(T, T, IGNORE_TOKEN)
};
#undef T
#endif


#define T(name, string, precedence) string,
const char* Token::string_[NUM_TOKENS] = {
  /*
    TOKEN_LIST宏展开后变成
      T(EOS, "EOS", 0)  
      ...
      然后T宏展开后变成
      "EOS"
       ...
       F开头的会被忽略,因为F等于IGNORE_TOKEN
  */
  TOKEN_LIST(T, T, IGNORE_TOKEN)
};
#undef T


#define T(name, string, precedence) precedence,
int8_t Token::precedence_[NUM_TOKENS] = {
    /*
    TOKEN_LIST宏展开后变成
      T(EOS, "EOS", 0)  
      ...
      然后T宏展开后变成
      0
      ...
      F开头的会被忽略,因为F等于IGNORE_TOKEN
    */
  TOKEN_LIST(T, T, IGNORE_TOKEN)
};
#undef T


// A perfect (0 collision) hash table of keyword token values.

// larger N will reduce the number of collisions (power of 2 for fast %)
const unsigned int N = 128;
// make this small since we have <= 256 tokens
static uint8_t Hashtable[N];
static bool IsInitialized = false;

// 哈希算法
static unsigned int Hash(const char* s) {
  // The following constants have been found using trial-and-error. If the
  // keyword set changes, they may have to be recomputed (make them flags
  // and play with the flag values). Increasing N is the simplest way to
  // reduce the number of collisions.

  // we must use at least 4 or more chars ('const' and 'continue' share
  // 'con')
  const unsigned int L = 5;
  // smaller S tend to reduce the number of collisions
  const unsigned int S = 4;
  // make this a prime, or at least an odd number
  const unsigned int M = 3;

  unsigned int h = 0;
  for (unsigned int i = 0; s[i] != '' && i < L; i  ) {
    h  = (h << S)   s[i];
  }
  // unsigned int % by a power of 2 (otherwise this will not be a bit mask)
  return h * M % N;
}

// 查找str字符串
Token::Value Token::Lookup(const char* str) {
  ASSERT(IsInitialized);
  // Hashtable[Hash(str)]得到str在string_中的索引
  Value k = static_cast<Value>(Hashtable[Hash(str)]);
  // 得到对应的字符串
  const char* s = string_[k];
  ASSERT(s != NULL || k == IDENTIFIER);
  // 对比
  if (s == NULL || strcmp(s, str) == 0) {
    return k;
  }
  return IDENTIFIER;
}


#ifdef DEBUG
// We need this function because C   doesn't allow the expression
// NULL == NULL, which is a result of macro expansion below. What
// the hell?
static bool IsNull(const char* s) {
  return s == NULL;
}
#endif

// 建立哈希表,用于Lookup查找
void Token::Initialize() {
  if (IsInitialized) return;

  // A list of all keywords, terminated by ILLEGAL.
#define T(name, string, precedence) name,
  static Value keyword[] = {
    /*
    TOKEN_LIST宏展开后变成
      T(EOS, "EOS", 0)  
      ...
      然后T宏展开后变成
      EOS
      ...
      但是只有K开头,在宏展开后会保留,因为T和F都等于IGNORE_TOKEN
    */
    TOKEN_LIST(IGNORE_TOKEN, T, IGNORE_TOKEN)
    ILLEGAL
  };
#undef T

  // Assert that the keyword array contains the 25 keywords, 3 future
  // reserved words (const, debugger, and native), and the 3 named literals
  // defined by ECMA-262 standard.
  ASSERT(ARRAY_SIZE(keyword) == 25   3   3   1);  //  1 for ILLEGAL sentinel

  // Initialize Hashtable.
  ASSERT(NUM_TOKENS <= 256);  // Hashtable contains uint8_t elements
  // 初始化哈希表
  for (unsigned int i = 0; i < N; i  ) {
    Hashtable[i] = IDENTIFIER;
  }

  // Insert all keywords into Hashtable.
  int collisions = 0;

  for (int i = 0; keyword[i] != ILLEGAL; i  ) {
    Value k = keyword[i];
    /*
      string_的内容和Value的枚举范围是一一对应的,keyword是他们的子集,
      即string_里包括关键字和非关键字(T和K),keyword包括了关键字的在string_中的索引,
      string_[k]是一个字符串
    */
    unsigned int h = Hash(string_[k]);
    //不等于IDENTIFIER说明已经赋过值,即冲突了
    if (Hashtable[h] != IDENTIFIER) collisions  ;
    // 保存哈希值到关键字索引的映射
    Hashtable[h] = k;
  }

  if (collisions > 0) {
    PrintF("%d collisions in keyword hashtablen", collisions);
    FATAL("Fix keyword lookup!");
  }

  IsInitialized = true;

  // Verify hash table.
#define T(name, string, precedence) 
  ASSERT(IsNull(string) || Lookup(string) == IDENTIFIER);

#define K(name, string, precedence) 
  ASSERT(Lookup(string) == name);

  TOKEN_LIST(T, K, IGNORE_TOKEN)

#undef K
#undef T
}

} }  // namespace v8::internal

0 人点赞