AstContainer.h

Go to the documentation of this file.
00001 /*------------------------------------------------------------------------------
00002     This file is part of PHP-AST Project by Romain Gaucher (http://rgaucher.info).
00003 
00004     PHP-AST is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     PHP-AST/ORACLE is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with PHP-AST.  If not, see <http://www.gnu.org/licenses/>.
00016 ------------------------------------------------------------------------------*/
00017 
00018 #ifndef __AST_CONTAINER_H
00019 #define __AST_CONTAINER_H
00020 
00021 #include "tree.h"
00022 #include "tree_util.h"
00023 #include "AstNode.h"
00024 #include "AstPath.h"
00025 #include "AstRange.h"
00026 #include <libxml/parser.h>
00027 #include <libxml/tree.h>
00028 #include <iostream>
00029 #include <string>
00030 #include <map>
00031 #include <vector>
00032 #include <list>
00033 
00034 //! Simple Variable declaration using the position in the tree
00035 class VarBase {
00036   public:
00037     std::string name;
00038     tree<AstNode>::iterator position;
00039     std::string index;
00040   public:
00041     VarBase() {}
00042     VarBase(const std::string& _name, const tree<AstNode>::iterator& _pos, const std::string& _index = "")
00043      : name(_name), position(_pos), index(_index) {}
00044     VarBase(const VarBase& v) : name(v.name), position(v.position), index(v.index) {}   
00045     VarBase& operator=(const VarBase& v) {
00046         if (this != &v) {
00047             name = v.name; position = v.position; index = v.index;
00048         }
00049         return *this;   
00050     }
00051     ~VarBase() {}
00052     bool operator==(const VarBase& v) const {
00053         if (v.name != name || v.index != index || v.position != position)
00054             return false;
00055         return true;
00056     }
00057 };
00058 
00059 struct VarBaseCmp {
00060     bool operator()(const VarBase& v1, const VarBase& v2) const {
00061         if (v1.name != v2.name)
00062             return v1.name <= v2.name;
00063         else
00064             return v1.index > v2.index;
00065         return false;
00066     }
00067 };
00068 
00069 
00070 
00071 class MetricResult;
00072 class Metric;
00073 class Obfuscator;
00074 class Translation;
00075 class StringBuffer;
00076 
00077 typedef std::list<VarBase> AstVarBaseList;
00078 typedef std::list<tree<AstNode>::iterator > AstNodeIteratorList;
00079 typedef std::multimap<VarBase, VarBase, VarBaseCmp> MapAssignments;
00080 typedef std::map<VarBase, AstVarBaseList, VarBaseCmp> MapVarEquivalent;
00081 typedef std::map<std::string, AstVarBaseList>  MapVariables;
00082 typedef std::map<std::string, tree<AstNode>::iterator > MapFunctions;
00083 typedef std::map<std::string, tree<AstNode>::iterator > MapClasses;
00084 typedef std::map<std::string, std::list<std::string> > FunctionMapping; // one function to many
00085 
00086 /**
00087     BoxedFunction modelize a function as a series of input and output:
00088      - input : the function called inside the functions are inputs to something (variable, etc.)
00089                   ex -- foo = sql(query) in a function, 'sql' is an input
00090     - output : the function is actually returning as output something (transformation of variables)
00091 */
00092 struct BoxedFunction {
00093     //! input: variable = function(...)
00094     MapFunctions input;
00095     //! output: function(variable)
00096     MapFunctions output;
00097     // Class Prototype
00098     BoxedFunction() {}
00099     BoxedFunction(const BoxedFunction& b) : input(b.input), output(b.output) {}
00100     BoxedFunction& operator=(const BoxedFunction& b) {
00101         input = b.input;
00102         output = b.output;
00103         return *this;
00104     }
00105     ~BoxedFunction() {}
00106 
00107     inline bool isInput(const std::string& __str) {
00108         return input.find(__str) != input.end();
00109     }
00110     inline bool isOutput(const std::string& __str) {
00111         return output.find(__str) != output.end();
00112     }
00113 };
00114 
00115 
00116 /**
00117     The Ast class is the main container of the AST.
00118     It contains all possible information about the source code: variable names (context), function, etc.
00119 */
00120 class Ast
00121 {
00122   protected:
00123     //! Store the tree
00124     tree<AstNode> tr;
00125     //! Iterator to the root of the tree
00126     tree<AstNode>::iterator root;
00127     //! Function name to tree iterator
00128     MapFunctions functions;
00129     //! Variable name to list of variable (VarBase) which depends on the context of the variable
00130     MapVariables variables;
00131     //! Class name to tree iterator
00132     MapClasses   classes;
00133     //! Relations within variables (contexted)
00134     MapAssignments assign;
00135     //! For the assignments, I need an equivalence between some variables $a = foo($b,$c,$d) with foo as a black box
00136     //! then, the variables b,c,d are equivalent given that $a variable
00137     MapVarEquivalent equivalences;
00138     //! For one function, list of what function is used as input and what is used as output
00139     std::map<std::string, BoxedFunction> boxedFunction;
00140     //! Function to list of function used in
00141     FunctionMapping fctmap;
00142     //! Reference to another tree, used for in-skeleton transformation
00143     Ast *ref;
00144     //! Do a boxing of the current source code, this is helpful for non function based code (php,asp, etc.)
00145     BoxedFunction boxedSource;
00146 
00147   private:
00148     //! Walk into the XML tree in order to build the AST in-memory
00149     void walk(xmlNode *a_node, tree<AstNode>::iterator parent);
00150 
00151   protected:
00152     //! Check whether the current node is a "skeleton node" or not
00153     //! A skeleton tree is a simplified one, derived from the AST
00154     bool is_skeleton_node(const tree<AstNode>::iterator& ) const;
00155 
00156     //! Scan the tree to fill the information about variables, functions etc.
00157     void fillAstInformation();
00158     
00159     //! Clear all information
00160     void clearInfo();
00161     
00162     //! Going down one node, return the name of the value (may be function, class, variable, etc.)
00163     std::string getChildValue(tree<AstNode>::iterator parent, const std::string& nodeName = "T_STRING") const;
00164     
00165     //! Return the number of nested variables: the number of variable in the sub-tree
00166     unsigned nbNestedVariables(const tree<AstNode>::iterator& iter) const;
00167 
00168     //! Get all the function names used in a function or method
00169     void functionMapping(const tree<AstNode>::iterator& it, const std::string& fctName);
00170     
00171     //! Box one function
00172     void functionBoxing(const tree<AstNode>::iterator& it, const std::string& fctName);
00173 
00174     //! Source boxing
00175     void sourceBoxing(const tree<AstNode>::iterator& it);
00176 
00177   protected:
00178     //! Detect if there is an assignment in the subtree
00179     bool detectAssignment(const tree<AstNode>::iterator& it) const;
00180 
00181     //! Detect if there is a function declaration in the subtree
00182     bool detectFunction(const tree<AstNode>::iterator& i) const;
00183 
00184     //! Must not be 'const' since it can write to the MapAssignements...
00185     std::list<VarBase> getRightVariables(const tree<AstNode>::iterator& it, const VarBase& left);
00186 
00187     //! Get the variable in the left of an assignment
00188     VarBase getLeftVariable(const tree<AstNode>::iterator& it);
00189 
00190     //! Simply get the one variable with is on the subtree
00191     VarBase getSimpleVariable(const tree<AstNode>::iterator& iter);
00192 
00193   public:
00194     //! Get the list of all subvariable
00195     std::list<VarBase> getSubVariables(const tree<AstNode>::iterator& it) const;
00196 
00197   public:
00198     // Full copelien class
00199     Ast() : root(tr.begin()) { ref = 0;}
00200     Ast(const Ast& ast) {*this = ast;}
00201     Ast(const std::string& xmlFile);
00202     Ast& operator=(const Ast& ast) {
00203         if (this != &ast) {
00204             tr = ast.tr;
00205             root = ast.root;
00206             functions = ast.functions;
00207             variables = ast.variables;
00208             classes   = ast.classes;
00209             equivalences = ast.equivalences;
00210             assign = ast.assign;
00211             ref = ast.ref;
00212         }
00213         return *this;   
00214     }
00215     ~Ast() {}
00216     // Access
00217     tree<AstNode>* getTreePtr() { return (&tr); }
00218     const tree<AstNode>* getTreeConstPtr() const { return (const tree<AstNode>*)(&tr); }
00219     
00220     inline std::map<std::string, BoxedFunction> getBoxedFunctions() const {
00221         return boxedFunction;
00222     }
00223     inline FunctionMapping getMappedFunctions() const {
00224         return fctmap;
00225     }
00226     //! Get names informations
00227     inline std::list<std::string> getVariableNames() const {
00228         // extract the keys of the map 'variables'
00229         std::list<std::string> ret;
00230         for(MapVariables::const_iterator iter = variables.begin(); iter != variables.end(); ++iter)
00231             ret.push_back(iter->first);
00232         return ret;
00233     }
00234     inline std::list<std::string> getFunctionNames() const {
00235         std::list<std::string> ret;
00236         for(MapFunctions::const_iterator iter = functions.begin(); iter != functions.end(); ++iter)
00237             ret.push_back(iter->first);
00238         return ret;
00239     }
00240     inline std::list<std::string> getClassNames() const {
00241         std::list<std::string> ret;
00242         for(MapClasses::const_iterator iter = classes.begin(); iter != classes.end(); ++iter)
00243             ret.push_back(iter->first);
00244         return ret;
00245     }
00246 
00247     // Algorithms
00248     Ast skeleton() const;
00249     void trace(const std::string &varName) const;
00250 
00251     // Apply algorithms
00252     void apply  (Obfuscator* obs);
00253     StringBuffer convert(Translation* obs);
00254 
00255     MetricResult compute(Metric *metric) const;
00256     // Some functions...
00257     void printInfo(std::ostream& stream) const;
00258     // External functions
00259     friend std::ostream& operator<<(std::ostream& stream, const Ast& ast);
00260     friend void exportXML(const Ast& ast, const std::string& xmlName);
00261 };
00262 
00263 
00264 std::ostream& operator<<(std::ostream& stream, const tree<AstNode>& ast);
00265 std::ostream& operator<<(std::ostream& stream, const Ast& ast);
00266 std::ostream& operator<<(std::ostream& stream, const VarBase& v);
00267 std::ostream& operator<<(std::ostream& stream, const tree<AstNode>::iterator& iter);
00268 void exportXML(const Ast& ast, const std::string& xmlName);
00269 
00270 #endif

Generated on Wed Feb 27 20:31:06 2008 for php.ast.svn.src. by  doxygen 1.5.3