Program Listing for File tokenStreamMapping.h#

↰ Return to documentation for file (src/frontend/SageIII/astTokenStream/tokenStreamMapping.h)
#ifndef TOKEN_STREAM_SEQUENCE_MAPPING_HEADER
#define TOKEN_STREAM_SEQUENCE_MAPPING_HEADER

class TokenStreamSequenceToNodeMapping_key
   {
  // The purpose of this class is to support when to share the TokenStreamSequenceToNodeMapping
  // objects across multiple IR nodes of the AST.  Token sequences of IR nodes in the AST that
  // are the same (excluding leading a trailing tokens subsequences) should share the same
  // TokenStreamSequenceToNodeMapping objects.

     public:
          SgNode* node;
          int lower_bound, upper_bound;

       // DQ (4/21/2021): We need to include the SgSourceFile to allow header files to be supported.
          SgSourceFile* sourceFile;

       // TokenStreamSequenceToNodeMapping_key(SgNode* n, int input_lower_bound, int input_upper_bound);
          TokenStreamSequenceToNodeMapping_key(SgSourceFile* sourceFile, SgNode* n, int input_lower_bound, int input_upper_bound);
          TokenStreamSequenceToNodeMapping_key(const TokenStreamSequenceToNodeMapping_key & X);

          bool operator==(const TokenStreamSequenceToNodeMapping_key & X) const;
          bool operator<(const TokenStreamSequenceToNodeMapping_key & X) const;
   };


class TokenStreamSequenceToNodeMapping
   {
  // This is the principal data structure used in the token mapping.

  // This class is used to make the token sequence to each IR node (or nodes).
  // It is used as an element in a list to report all mapping of
  // subsequences to IR nodes in the AST.

     public:
       // To allow sharing of token stream sbsequences across multiple nodes
       // we need to permit this to be a collection of SgNode's. Likely a
       // vector would be a good choice since it would preserve order.
       // Pointer to the AST IR node.
          SgNode* node;

       // leading whitespace
          int leading_whitespace_start, leading_whitespace_end;

       // start,end of token subsequence (associated with specified node(s)).
          int token_subsequence_start, token_subsequence_end;

       // trailing whitespace
          int trailing_whitespace_start, trailing_whitespace_end;

       // DQ (12/31/2014): Added to support the middle subsequence of tokens in the SgIfStmt as a special case.
          int else_whitespace_start, else_whitespace_end;

       // Currently some normalized parts of the ROSE AST can share the same
       // TokenStreamSequenceToNodeMapping data structure.  The best example
       // of this is the case of a variable declaration using multiple variables
       // (e.g. "int a,b,c;").  This will currently be normalized to be three
       // seperate variable declarations (though this will be fixed in the future).
       // Since each of the variable declarations will have the same source
       // position in the generated AST, the same token sequence will map to
       // each of the seperate (normalized) variable declarations.  Thus the
       // TokenStreamSequenceToNodeMapping can be shared all of the seperate
       // variable declarations.  Now that we have a container of SgNodes,
       // this boolean should be true iff the container has more than 1 node.
          bool shared;

          bool constructedInEvaluationOfSynthesizedAttribute;

       // Use a vector as a container for the associated IR nodes for this token sequence when it is shared.
          std::vector<SgNode*> nodeVector;

       // Static date for generating unique keys into the tokenSequencePool
          static size_t tokenStreamSize;

       // A map of unique subsequences (intervals).  An alternative might be a Boost interval map.
       // static map<size_t,TokenStreamSequenceToNodeMapping*> tokenSequencePool;
       // static map<size_t,TokenStreamSequenceToNodeMapping*,TokenStreamSequenceToNodeMapping_key> tokenSequencePool;
          static std::map<TokenStreamSequenceToNodeMapping_key,TokenStreamSequenceToNodeMapping*> tokenSequencePool;

       // Constructor
          TokenStreamSequenceToNodeMapping(SgNode* n,
               int input_leading_whitespace_start,  int input_leading_whitespace_end,
               int input_token_subsequence_start,   int input_token_subsequence_end,
               int input_trailing_whitespace_start, int input_trailing_whitespace_end,
               int input_else_whitespace_start,     int input_else_whitespace_end);

       // Constructor
          TokenStreamSequenceToNodeMapping( const TokenStreamSequenceToNodeMapping & X );

       // DQ (4/21/2021): We need to make this dependent on the SgSourceFile so that we can support multiple files (e.g. header files).
       // Factory interval generator for new intervals (token sequences).
       // static TokenStreamSequenceToNodeMapping* createTokenInterval (SgNode* n,
       //      int input_leading_whitespace_start, int input_leading_whitespace_end,
       //      int input_token_subsequence_start, int input_token_subsequence_end,
       //      int input_trailing_whitespace_start, int input_trailing_whitespace_end,
       //      int input_else_whitespace_start,     int input_else_whitespace_end);
          static TokenStreamSequenceToNodeMapping* createTokenInterval (SgSourceFile* sourceFile, SgNode* n,
               int input_leading_whitespace_start, int input_leading_whitespace_end,
               int input_token_subsequence_start, int input_token_subsequence_end,
               int input_trailing_whitespace_start, int input_trailing_whitespace_end,
               int input_else_whitespace_start,     int input_else_whitespace_end);

          void display(std::string label) const;
   };


class Graph_TokenMappingTraversal : public AstSimpleProcessing
   {
     public:
       // File for output for generated graph.
          static std::ofstream file;

       // The map is stored so that we can lookup the token subsequence information using the SgNode pointer as a key.
          std::map<SgNode*,TokenStreamSequenceToNodeMapping*> & tokenStreamSequenceMap;

       // The vector is stored so that we can build the list of nodes with edges (edges
       // are missing the the token information, which might be better to support there).
          std::vector<stream_element*> & tokenList;

          Graph_TokenMappingTraversal(std::vector<stream_element*> & input_tokenList, std::map<SgNode*,TokenStreamSequenceToNodeMapping*> & tokenMap);

          void visit(SgNode* n);

       // static void graph_ast_and_token_stream(SgSourceFile* file, vector<stream_element*> & tokenList);
          static void graph_ast_and_token_stream(SgSourceFile* file, std::vector<stream_element*> & tokenList, std::map<SgNode*,TokenStreamSequenceToNodeMapping*> & tokenStreamSequenceMap);

          static void graph_ast_and_token_stream(SgSourceFile* file);
          static void graph_ast_and_token_stream(std::string filename);

       // Map the toke_id to a string.
          static std::string getTokenIdString (int i);

          static int* first_leading_whitespace_start;
   };


#include "frontierDetection.h"

// DQ (12/4/2014): Added alternative form of detection where to switch
// between unparsing from the AST and unparsing from the token stream.
#include "simpleFrontierDetection.h"

// DQ (11/8/2015): We need a seperate traversal to recognise from the
// token stream mapping, what subtrees are a part of macro expansions
// that are transformations.  These macro eexpansions must be unparsed
// as a single unit (we can't just unparse parts of them from the token
// stream and parts from the AST; because there representation in the
// token stream is only as the unexpanded macro).
#include "detectMacroOrIncludeFileExpansions.h"
#include "detectMacroExpansionsToBeUnparsedAsAstTransformations.h"


// DQ (1/7/2021): Adding function to header so that I can call it elsewhere for testing.
std::vector<stream_element*> getTokenStream( SgSourceFile* file );

#endif