AstContainer.cpp

Go to the documentation of this file.
00001 /*------------------------------------------------------------------------------
00002     This file is part of PHP-AST Project by Romain Gaucher (http://rgaucher.info).
00003 
00004     PHP-AST is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     PHP-AST/ORACLE is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with PHP-AST.  If not, see <http://www.gnu.org/licenses/>.
00016 ------------------------------------------------------------------------------*/
00017 
00018 #include "AstContainer.h"
00019 #include "Utils.h"
00020 #include "Obfuscator.h"
00021 #include "Translation.h"
00022 #include "Metric.h"
00023 #include <string>
00024 #include <iostream>
00025 #include <fstream>
00026 #include <vector>
00027 #include <stdexcept>
00028 #include <iomanip>
00029 #include <stack>
00030 using namespace std;
00031 
00032 string getContent(xmlNode *a_node) {
00033     xmlChar *c = xmlNodeGetContent(a_node);
00034     string ret((char *)c);
00035     utils::replace(ret, "\n","");
00036     utils::replace(ret, "\r","");
00037     utils::replace(ret, "\b","");   
00038     xmlFree(c);
00039     return ret;
00040 }
00041 
00042 ostream& operator<<(ostream& stream, const Ast& ast) {
00043     kptree::print_tree_bracketed<AstNode>(ast.tr, stream);
00044     return stream;
00045 }
00046 
00047 ostream& operator<<(ostream& stream, const tree<AstNode>& ast){
00048     kptree::print_tree_bracketed<AstNode>(ast, stream);
00049     return stream;  
00050 }
00051 
00052 ostream& operator<<(ostream& stream, const tree<AstNode>::iterator& iter){
00053     stream << iter.node;
00054     return stream;
00055 }
00056 
00057 ostream& operator<<(ostream& stream, const VarBase& v) {
00058     if (v.index.length() < 1)
00059         stream << v.name;
00060     else
00061         stream << v.name << '[' << v.index << ']';
00062     return stream;
00063 }
00064 
00065 
00066 void Ast::apply(Obfuscator* obs) {
00067     obs->operator()(tr, &classes, &variables, &functions);
00068 }
00069 
00070 StringBuffer Ast::convert(Translation* obs) {
00071     clearInfo();
00072     fillAstInformation();
00073     return obs->operator()(tr, &classes, &variables, &functions, &assign, &equivalences);
00074 }
00075 
00076 
00077 MetricResult Ast::compute(Metric *metric) const
00078 {
00079     return metric->operator()(tr, &classes, &variables, &functions, &assign, &equivalences);
00080 }
00081 
00082 
00083 /**
00084     Get the value of the child node wich parent type is 'nodeName'
00085 */
00086 string Ast::getChildValue(tree<AstNode>::iterator parent, const string& nodeName) const
00087 {
00088     tree<AstNode>::sibling_iterator children, textable;
00089     for (children = tr.begin(parent); children != tr.end(parent); ++children){
00090         if (children->getType() == nodeName) {
00091             if (tr.number_of_siblings(children) < 1)
00092                 continue;
00093             textable = tr.begin(children);
00094             if (textable->getType() == "text")
00095                 return textable->getValue();
00096         }
00097     }
00098     return string();
00099 }
00100 
00101 
00102 /**
00103     Calculate the number of nested variables
00104 */
00105 unsigned Ast::nbNestedVariables(const tree<AstNode>::iterator& iter) const
00106 {
00107     unsigned nbVars = 0;
00108     tree<AstNode>::iterator lter = tr.begin(iter);
00109     for (;lter!=tr.end(iter);++lter) {
00110         if (lter->getType() == "T_VARIABLE")
00111             ++nbVars;
00112     }
00113     return nbVars;
00114 }
00115 
00116 void print_current_list(const list<VarBase>& plop) {
00117     for (list<VarBase>::const_iterator iter = plop.begin(); iter != plop.end(); ++iter)
00118         cerr << *iter << " | ";
00119     cerr << endl;
00120 }
00121 
00122 
00123 /**
00124     Detect an assignment in the subtree
00125 */
00126 bool Ast::detectAssignment(const tree<AstNode>::iterator& iter) const
00127 {
00128     if (tr.number_of_children(iter) == 3 && tr.child(iter, 1)->getType() == "CHAR61")
00129         return true;
00130     return false;
00131 }
00132 
00133 
00134 
00135 /**
00136     Boxing a source code. Get inputs/outputs in term of functions.
00137 */
00138 void Ast::sourceBoxing(const tree<AstNode>::iterator& it)
00139 {
00140     // Get the input
00141     for (tree<AstNode>::iterator iter = tr.begin(it); iter != tr.end(it); ++iter)
00142     {
00143         // skip the functions
00144         if (iter->getType() == "unticked_function_declaration_statement") {
00145             iter = tr.end(iter);
00146         }
00147         else if (detectAssignment(iter))
00148         {
00149             // check if the right side has a function, if so, this is an input
00150             for (tree<AstNode>::iterator jter = tr.begin(iter); jter != tr.end(iter); ++jter) {
00151                 string t = jter->getType();
00152                 if (t == "function_call") {
00153                     tree<AstNode>::iterator son = tr.begin(jter);
00154                     if (son->getType() == "T_STRING") {
00155                         tree<AstNode>::iterator  textable = tr.begin(son);
00156                         if (textable->getType() == "text") {
00157                             if (boxedSource.input.find(textable->getValue()) != boxedSource.input.end())
00158                                 continue;
00159                             //cout << "Add in the boxed input ( "<< fctName << " ): " <<  textable->getValue() << endl;
00160                             boxedSource.input.insert(make_pair(textable->getValue(), jter));
00161                         }
00162                     }
00163                 }
00164                 else if (t == "unticked_statement") {
00165                     tree<AstNode>::iterator son = tr.begin(jter);
00166                     if (utils::start_with(son->getType(), "T_")) {
00167                         tree<AstNode>::iterator  textable = tr.begin(son);
00168                         if (textable->getType() == "text") {
00169                             if (boxedSource.input.find(textable->getValue()) != boxedSource.input.end())
00170                                 continue;
00171                             //cout << "Add in the boxed input ( "<< fctName << " ): " <<  textable->getValue() << endl;
00172                             boxedSource.input.insert(make_pair(textable->getValue(), jter));
00173                         }
00174                     }
00175                 }
00176             }
00177         }
00178     }
00179 
00180     // Second pass to get the isolated calls, getting the output
00181     for (tree<AstNode>::iterator iter = tr.begin(it); iter != tr.end(it); ++iter) 
00182     {
00183         string t = iter->getType();
00184         if (t == "unticked_function_declaration_statement") {
00185             iter = tr.end(iter);
00186         }
00187         else if (t == "function_call") {
00188             tree<AstNode>::iterator son = tr.begin(iter);
00189             if (son->getType() == "T_STRING") {
00190                 tree<AstNode>::iterator  textable = tr.begin(son);
00191                 if (textable->getType() == "text") {
00192                     if (boxedSource.output.find(textable->getValue()) != boxedSource.output.end())
00193                         continue;
00194                     //cout << "Add in the boxed output ( "<< fctName << " ): " <<  textable->getValue() << endl;
00195                     boxedSource.output.insert(make_pair(textable->getValue(), iter));
00196                 }
00197             }
00198         }
00199         else if (t == "unticked_statement") {
00200             tree<AstNode>::iterator son = tr.begin(iter);
00201             if (utils::start_with(son->getType(), "T_")) {
00202                 tree<AstNode>::iterator  textable = tr.begin(son);
00203                 if (textable->getType() == "text") {
00204                     if (boxedSource.output.find(textable->getValue()) != boxedSource.output.end())
00205                         continue;
00206                     //cout << "Add in the boxed output ( "<< fctName << " ): " <<  textable->getValue() << endl;
00207                     boxedSource.output.insert(make_pair(textable->getValue(), iter));
00208                 }
00209             }
00210         }
00211     }
00212 }
00213 
00214 /**
00215     Boxing a function. Get inputs/outputs in term of functions.
00216 */
00217 void Ast::functionBoxing(const tree<AstNode>::iterator& it, const std::string& fctName)
00218 {
00219     if (boxedFunction.find(fctName) != boxedFunction.end())
00220         return;
00221     else
00222     {
00223         BoxedFunction bFct;
00224         // Get the input
00225         for (tree<AstNode>::iterator iter = tr.begin(it); iter != tr.end(it); ++iter)
00226         {
00227             if (detectAssignment(iter))
00228             {
00229                 // check if the right side has a function, if so, this is an input
00230                 for (tree<AstNode>::iterator jter = tr.begin(iter); jter != tr.end(iter); ++jter) {
00231                     string t = jter->getType();
00232                     if (t == "function_call") {
00233                         tree<AstNode>::iterator son = tr.begin(jter);
00234                         if (son->getType() == "T_STRING") {
00235                             tree<AstNode>::iterator  textable = tr.begin(son);
00236                             if (textable->getType() == "text") {
00237                                 if (bFct.input.find(textable->getValue()) != bFct.input.end())
00238                                     continue;
00239                                 //cout << "Add in the boxed input ( "<< fctName << " ): " <<  textable->getValue() << endl;
00240                                 bFct.input.insert(make_pair(textable->getValue(), jter));
00241                             }
00242                         }
00243                     }
00244                     else if (t == "unticked_statement") {
00245                         tree<AstNode>::iterator son = tr.begin(jter);
00246                         if (utils::start_with(son->getType(), "T_")) {
00247                             tree<AstNode>::iterator  textable = tr.begin(son);
00248                             if (textable->getType() == "text") {
00249                                 if (bFct.input.find(textable->getValue()) != bFct.input.end())
00250                                     continue;
00251                                 //cout << "Add in the boxed input ( "<< fctName << " ): " <<  textable->getValue() << endl;
00252                                 bFct.input.insert(make_pair(textable->getValue(), jter));
00253                             }
00254                         }
00255                     }
00256                 }
00257             }
00258         }
00259         // Second pass to get the isolated calls, getting the output
00260         for (tree<AstNode>::iterator iter = tr.begin(it); iter != tr.end(it); ++iter) 
00261         {
00262             string t = iter->getType();
00263             if (t == "function_call") {
00264                 tree<AstNode>::iterator son = tr.begin(iter);
00265                 if (son->getType() == "T_STRING") {
00266                     tree<AstNode>::iterator  textable = tr.begin(son);
00267                     if (textable->getType() == "text") {
00268                         if (bFct.output.find(textable->getValue()) != bFct.output.end())
00269                             continue;
00270                         //cout << "Add in the boxed output ( "<< fctName << " ): " <<  textable->getValue() << endl;
00271                         bFct.output.insert(make_pair(textable->getValue(), iter));
00272                     }
00273                 }
00274             }
00275             else if (t == "unticked_statement") {
00276                 tree<AstNode>::iterator son = tr.begin(iter);
00277                 if (utils::start_with(son->getType(), "T_")) {
00278                     tree<AstNode>::iterator  textable = tr.begin(son);
00279                     if (textable->getType() == "text") {
00280                         if (bFct.output.find(textable->getValue()) != bFct.output.end())
00281                             continue;
00282                         //cout << "Add in the boxed output ( "<< fctName << " ): " <<  textable->getValue() << endl;
00283                         bFct.output.insert(make_pair(textable->getValue(), iter));
00284                     }
00285                 }
00286             }
00287         }
00288         boxedFunction.insert(make_pair(fctName, bFct));
00289         //cout << "box size = " << boxedFunction.size() << endl;
00290     }
00291 }
00292 
00293 /**
00294     Mapping one function to others. The other functions are all the function call in
00295     the function.
00296 */
00297 void Ast::functionMapping(const tree<AstNode>::iterator& it, const std::string& fctName)
00298 {
00299     if (fctmap.find(fctName) != fctmap.end())
00300         return;
00301     else
00302     {
00303         list<string> related;
00304         for (tree<AstNode>::iterator iter = tr.begin(it); iter != tr.end(it); ++iter) {
00305             string t = iter->getType();
00306             if (t == "function_call") {
00307                 tree<AstNode>::iterator son = tr.begin(iter);
00308                 if (son->getType() == "T_STRING") {
00309                     tree<AstNode>::iterator  textable = tr.begin(son);
00310                     if (textable->getType() == "text") {
00311                         related.push_back(textable->getValue());
00312                     }
00313                 }
00314             }
00315             else if (t == "unticked_statement") {
00316                 tree<AstNode>::iterator son = tr.begin(iter);
00317                 if (utils::start_with(son->getType(), "T_")) {
00318                     tree<AstNode>::iterator  textable = tr.begin(son);
00319                     if (textable->getType() == "text") {
00320                         related.push_back(textable->getValue());
00321                     }
00322                 }
00323             }
00324         }
00325         // Insert into the fctmap
00326         fctmap.insert(make_pair(fctName, related));
00327     }
00328 }
00329 
00330 
00331 /**
00332     Get in post-order all the variables
00333 */
00334 list<VarBase> Ast::getSubVariables(const tree<AstNode>::iterator& it) const
00335 {
00336     list<VarBase> varBaseList;
00337     if (nbNestedVariables(it) < 1)
00338         return varBaseList;
00339     //cerr << "start:- getSubVars" << endl;
00340     string type;
00341     for (tree<AstNode>::iterator iter=tr.begin(it);iter!=tr.end(it) && iter!=tr.end();iter++)
00342     {
00343         type = iter->getType();
00344         //cout << "\t\t\t => type = " << type << endl;
00345         if (type == "reference_variable")
00346         {
00347             //cerr << "\tref_var ";
00348             unsigned nbchilds = tr.number_of_children(iter);
00349             //cerr << nbchilds << endl;
00350             if (nbchilds == 1) {
00351                 //cout << '[' << iter->getType() << " -> " << tr.child(iter,0)->getType()  << ']' << endl;
00352 
00353                 if (tr.child(iter, 0)->getType() == "compound_variable") {
00354                     // single variables
00355                     tree<AstNode>::iterator var = iter;
00356                     for(;var->getType() != "text" && var != tr.end(it); ++var)
00357                         ;
00358                     //cerr << setw(15) << "ref :- [" << var->getValue() << ',' << it;
00359                     varBaseList.push_back(VarBase(var->getValue(), it));
00360                     //cerr << " ]" << endl;
00361                     iter = var;
00362                 }
00363             }
00364             else if (nbchilds == 4) {
00365                 // array ?
00366                 tree<AstNode>::iterator array = tr.child(iter, 0);
00367                 tree<AstNode>::iterator index = tr.child(iter, 2);
00368                 for(;array->getType() != "text" && array != tr.end(it); ++array)
00369                     ;
00370                 for(;index->getType() != "text" && index != tr.end(it); ++index)
00371                     ;
00372                 //cerr << setw(15) << "array:- [" << array->getValue() << ',' << it << ',' << index->getValue();
00373                 varBaseList.push_back(VarBase(array->getValue(), it, index->getValue()));
00374                 //cerr << " ]" << endl;
00375                 iter = index;
00376             }
00377         }
00378         else if (type == "expr_without_variable" || type == "encaps_var")
00379         {
00380             //cerr << "\texpr_wo_var ";
00381             unsigned nbchilds = tr.number_of_children(iter);
00382             //cerr << nbchilds << endl;
00383             for (unsigned c=0;c<nbchilds;++c)
00384             {
00385                 list<VarBase> varBaseListTemp = getSubVariables(tr.child(iter, c));
00386                 if (varBaseListTemp.size() > 0) {
00387                     // merge the list
00388                     for (list<VarBase>::const_iterator jter=varBaseListTemp.begin();jter!=varBaseListTemp.end();++jter) {
00389                         if (find(varBaseList.begin(),varBaseList.end(),*jter) == varBaseList.end()) {
00390                             //cout << *jter << endl;
00391                             varBaseList.push_back(*jter);
00392                         }
00393                     }
00394                 }
00395             }
00396             /*
00397             // go to the T_VARIABLE
00398             tree<AstNode>::iterator var = iter;
00399             for(;var->getType() != "T_VARIABLE" && var != tr.end(it) && var != tr.end(); ++var)
00400                 ;
00401             var = tr.child(var, 0);
00402             cerr << setw(15) << "expr:- [" << var->getValue() << ',' << it;
00403             varBaseList.push_back(VarBase(var->getValue(), it));
00404             cerr << " ]" << endl;
00405             //iter = var;
00406             */
00407         }
00408         else if (type == "T_VARIABLE")
00409         {
00410             //cout << "TVariable" << endl;
00411             tree<AstNode>::iterator var = iter;
00412             for(;var->getType() != "text" && var != tr.end(it); ++var)
00413                 ;
00414             //cerr << setw(15) << "ref :- [" << var->getValue() << ',' << it;
00415             varBaseList.push_back(VarBase(var->getValue(), it));
00416             //cerr << " ]" << endl;
00417             iter = var;
00418         }
00419     }
00420     //cerr << "end:- getSubVars" << endl;
00421     return varBaseList;
00422 }
00423 
00424 /**
00425     Get in post-order the right variables assignments
00426 */
00427 list<VarBase> Ast::getRightVariables(const tree<AstNode>::iterator& it, const VarBase& left)
00428 {
00429 
00430     list<VarBase> varBaseList;
00431     unsigned nestedVars = nbNestedVariables(it);
00432     if (nestedVars == 0)
00433         return varBaseList;
00434     else if (nestedVars == 1 && !detectAssignment(it)) {
00435         VarBase simple = getSimpleVariable(it);
00436         varBaseList.push_back(simple);
00437         return varBaseList;
00438     }
00439     else
00440     {
00441         // Go to the last right 'expr_without_variable'
00442         unsigned nbchilds = tr.number_of_children(it);
00443         tree<AstNode>::iterator iter = tr.child(it, nbchilds - 1);
00444         //
00445         if (detectAssignment(iter)) {
00446             // Assignment => Let's move to the right, and grab the left variable as a 'left variable'
00447             VarBase leftVar = getLeftVariable(tr.child(iter, 0));
00448             if (leftVar.name != "")
00449                 varBaseList.push_back(leftVar);
00450             list<VarBase> rightVars = getRightVariables(tr.child(iter, 2), left);
00451             // merge the result
00452             for (list<VarBase>::const_iterator jt=rightVars.begin(); jt!=rightVars.end();++jt)
00453                 varBaseList.push_back(*jt);
00454         }
00455         else {
00456             AstVarBaseList locVarList;
00457             // get all variables, put that in a equivalences
00458             for (tree<AstNode>::iterator jter = iter;jter != tr.end(iter); ++jter) {
00459                 string type(jter->getType());
00460                 if (type == "reference_variable")
00461                 {
00462                     unsigned nbchilds = tr.number_of_children(jter);
00463                     if (nbchilds == 1) {
00464                         if (tr.child(jter, 0)->getType() == "compound_variable") {
00465                             // single variables
00466                             tree<AstNode>::iterator var = jter;
00467                             for(;var->getType() != "text"; ++var)
00468                                 ;
00469                             //cerr << "I got this? " << var->getValue() << endl;
00470                             VarBase right(var->getValue(), it);
00471                             if (!(find(locVarList.begin(), locVarList.end(),right) != locVarList.end()))
00472                                 locVarList.push_back(right);
00473                             jter = var;
00474                         }
00475                     }
00476                     else if (nbchilds == 4) {
00477                         // array ?
00478                         tree<AstNode>::iterator array = tr.child(jter, 0);
00479                         tree<AstNode>::iterator index = tr.child(jter, 2);
00480                         for(;array->getType() != "text" && array != tr.end(); ++array)
00481                             ;
00482                         for(;index->getType() != "text" && index != tr.end(); ++index)
00483                             ;
00484                         VarBase right(array->getValue(), it, index->getValue());
00485                         if (!(find(locVarList.begin(), locVarList.end(),right) != locVarList.end()))
00486                             locVarList.push_back(right);
00487                         jter = index;                               
00488                     }
00489                 }
00490                 else if (type == "expr_without_variable")
00491                 {
00492                     //cerr << "expr_wo_var ";
00493                     unsigned nbchilds = tr.number_of_children(iter);
00494                     //cerr << nbchilds << endl;
00495                     for (unsigned c=0;c<nbchilds;++c)
00496                     {
00497                         list<VarBase> varBaseListTemp = getSubVariables(tr.child(iter, c));
00498                         if (varBaseListTemp.size() > 0) {
00499                             // merge the list
00500                             for (list<VarBase>::const_iterator jter=varBaseListTemp.begin();jter!=varBaseListTemp.end();++jter) {
00501                                 if (find(locVarList.begin(),locVarList.end(),*jter) == locVarList.end())
00502                                     locVarList.push_back(*jter);
00503                             }
00504                         }
00505                     }
00506 
00507                 /*
00508                     // go to the T_VARIABLE
00509                     tree<AstNode>::iterator var = jter;
00510                     for(;var->getType() != "T_VARIABLE"; ++var)
00511                         ;
00512                     var = tr.child(var, 0);
00513                     //cerr << "I got this? " << var->getValue() << endl;
00514                     VarBase right(var->getValue(), it);
00515                     if (!(find(locVarList.begin(), locVarList.end(),right) != locVarList.end()))
00516                         locVarList.push_back(right);
00517                 */
00518                 }
00519             }
00520             // local equivalences
00521             equivalences.insert(make_pair(left, locVarList));                   
00522         }
00523         return varBaseList;
00524     }
00525 }
00526 
00527 /**
00528     Get a simple variable name; we know that there is one expression with one variable inside.
00529     We need to get this!
00530 */
00531 VarBase Ast::getSimpleVariable(const tree<AstNode>::iterator& it)
00532 {
00533     tree<AstNode>::iterator iter = it;
00534     // go the the variable
00535     for (;iter != tr.end(it); ++iter) {
00536         if (iter->getType() == "reference_variable")
00537         {
00538             unsigned nbchilds = tr.number_of_children(iter);
00539             if (nbchilds == 1) {
00540                 if (tr.child(iter, 0)->getType() == "compound_variable") {
00541                     // single variables
00542                     tree<AstNode>::iterator var = iter;
00543                     for(;var->getType() != "text"; ++var)
00544                         ;
00545                     return VarBase(var->getValue(), it);
00546                 }
00547             }
00548             else if (nbchilds == 4) {
00549                 // array ?
00550                 tree<AstNode>::iterator array = tr.child(iter, 0);
00551                 tree<AstNode>::iterator index = tr.child(iter, 2);
00552                 for(;array->getType() != "text" && array != tr.end(); ++array)
00553                     ;
00554                 for(;index->getType() != "text" && index != tr.end(); ++index)
00555                     ;
00556                 return VarBase(array->getValue(), it, index->getValue());
00557             }
00558         }
00559         else if (iter->getType() == "expr_without_variable")
00560         {
00561             // go to the T_VARIABLE
00562             tree<AstNode>::iterator var = iter;
00563             for(;var->getType() != "T_VARIABLE"; ++var)
00564                 ;
00565             var = tr.child(var, 0);
00566             return VarBase (var->getValue(), it);
00567         }
00568     }
00569     // backtrack for: strings, functions, class
00570     return VarBase();
00571 }
00572 
00573 /**
00574     Get left variable name
00575 */
00576 VarBase Ast::getLeftVariable(const tree<AstNode>::iterator& iter)
00577 {
00578     tree<AstNode>::iterator it = iter;
00579     for (;it->getType() != "reference_variable"; ++it)
00580         ;
00581     unsigned nbchilds = tr.number_of_children(it);
00582     if (nbchilds == 1) {
00583         tree<AstNode>::iterator var = it;
00584         for(;var->getType() != "text"; ++var)
00585             ;
00586         return VarBase(var->getValue(), it);
00587     }
00588     else if (nbchilds == 4) {
00589         // $foo['var'] pattern detected!
00590         tree<AstNode>::iterator array = tr.child(it, 0);
00591         tree<AstNode>::iterator index = tr.child(it, 2);
00592         for(;array->getType() != "text" && array != tr.end(); ++array)
00593             ;
00594         for(;index->getType() != "text" && index != tr.end(); ++index)
00595             ;
00596         return VarBase(array->getValue(), it, index->getValue());
00597     }
00598     else {
00599         // Not supported
00600         cerr << "getLeftVariable, unsupported elemnts nb = " << *it << endl;
00601         return VarBase();
00602     }
00603     return VarBase();
00604 }
00605 
00606 void Ast::fillAstInformation()
00607 {
00608     tree<AstNode>::iterator iter = tr.begin();
00609     sourceBoxing(iter);
00610     tree<AstNode>::sibling_iterator textable;
00611     tree<AstNode>::fixed_depth_iterator fter = iter;
00612     for (;iter != tr.end(); ++iter)
00613     {
00614         string type = iter->getType();
00615         if (type == "unticked_class_declaration_statement")
00616         {   
00617             string className = getChildValue(iter, "T_STRING");
00618             if (classes.find(className) != classes.end())
00619                 continue;
00620             classes.insert(make_pair(className, iter));
00621         }
00622         else if (type == "unticked_function_declaration_statement")
00623         {
00624             string funcName = getChildValue(iter, "T_STRING");
00625             if (functions.find(funcName) != functions.end())
00626                 continue;
00627             functions.insert(make_pair(funcName, iter));
00628             // Fill the function mapping win this function
00629             functionMapping(iter, funcName);
00630             // Get the boxed functions
00631             functionBoxing(iter, funcName);
00632         }
00633         // Variables Names detection
00634         else if (type == "reference_variable")
00635         {
00636             tree<AstNode>::iterator next = iter; ++next;
00637             if (next->getType() != "compound_variable") {
00638                 // variable with array: $_GET['foo'] => $_GET, [, 'foo', ]
00639                 tree<AstNode>::iterator array = tr.child(iter, 0);
00640                 tree<AstNode>::iterator index = tr.child(iter, 2);
00641                 for(;array->getType() != "text"; ++array)
00642                     ;
00643                 for(;index->getType() != "text"; ++index)
00644                     ;
00645                 VarBase var(array->getValue(), iter, index->getValue());
00646                 if (!(variables.find(var.name) != variables.end())) {
00647                     AstVarBaseList astFooIterList;
00648                     variables.insert(make_pair(var.name, astFooIterList));
00649                 }
00650                 variables.find(var.name)->second.push_back(var);
00651             }
00652         }
00653         else if (type == "T_VARIABLE")
00654         {
00655             textable = tr.begin(iter);
00656             if (textable->getType() == "text") {
00657                 VarBase var(textable->getValue(), iter);
00658                 if (!(variables.find(var.name) != variables.end())) {
00659                     AstVarBaseList astFooIterList;
00660                     variables.insert(make_pair(var.name, astFooIterList));
00661                 }
00662                 variables.find(var.name)->second.push_back(var);
00663             }
00664         }
00665     }
00666     for (iter = tr.begin();iter != tr.end(); ++iter) {
00667         // Assignments, variables relation
00668         if (detectAssignment(iter))
00669         {
00670             VarBase leftVariable = getLeftVariable(tr.child(iter, 0));
00671             unsigned nbchilds = tr.number_of_children(iter);
00672             list<VarBase> rightVariables = getRightVariables(tr.child(iter, nbchilds - 1), leftVariable);
00673             // Store the information
00674             // for r in rights | reverse:
00675             //      add ( left <- r )
00676             //      add (    r <- r + 1 )
00677             list<VarBase>::reverse_iterator prev = rightVariables.rbegin();
00678             for (list<VarBase>::reverse_iterator i=rightVariables.rbegin(); i!=rightVariables.rend();++i) {
00679                 // add the link with the left variables
00680                 assign.insert(make_pair(leftVariable, *i));
00681                 if (!(i == rightVariables.rbegin()))
00682                     assign.insert(make_pair(*i, *prev));
00683                 prev = i;
00684             }
00685             MapVarEquivalent::iterator rIter;
00686             for (rIter = equivalences.begin(); rIter != equivalences.end(); ++rIter)
00687                 if (rIter->first == leftVariable) {
00688                     //print_current_list(rIter->second);
00689                 }
00690             // get the next same_depth iterator since I explored all the children
00691             tree<AstNode>::fixed_depth_iterator fter = iter;
00692             iter = ++fter;
00693         }
00694     }
00695 
00696 }
00697 
00698 void Ast::walk(xmlNode *a_node, tree<AstNode>::iterator parent)
00699 {
00700     xmlNode *cur_node = 0;
00701     tree<AstNode>::iterator iter = parent;
00702     for (cur_node = a_node; cur_node; cur_node = cur_node->next) 
00703     {
00704         if (cur_node->type == XML_ELEMENT_NODE) {
00705             string name((char *)cur_node->name);
00706             iter = tr.append_child(parent,AstNode(name));
00707         }
00708         else if (cur_node->type == XML_TEXT_NODE) {
00709             string content = getContent(cur_node);
00710             string name((char *)cur_node->name);
00711             if (content.length() > 0) {
00712                 iter = tr.append_child(parent, AstNode(name, content));
00713             }
00714         }
00715         walk(cur_node->children, iter);
00716     }
00717 }
00718 
00719 Ast::Ast(const string& xmlName)
00720 {
00721     xmlDoc *doc = 0;
00722     xmlNode *root_element = 0;
00723 #ifdef xmlReadFile
00724     doc = xmlReadFile(xmlName.c_str(), 0, 0);
00725 #else
00726     doc = xmlParseFile(xmlName.c_str());
00727 #endif
00728     if (doc == 0) {
00729         cerr << "error: could not parse file :" << xmlName << endl;
00730     }
00731     root_element = xmlDocGetRootElement(doc);
00732     root = tr.begin();
00733     root = tr.insert(root, AstNode("root"));
00734     walk(root_element, root);
00735     xmlFreeDoc(doc);
00736     xmlCleanupParser();
00737     fillAstInformation();
00738 }
00739 
00740 bool Ast::is_skeleton_node(const tree<AstNode>::iterator& it) const 
00741 {
00742     string t = it->getType();
00743     if (t == "T_INLINE_HTML")
00744         return false;
00745     else if (t == "text") {
00746         // look at the parent, if it's T_VARIABLE or T_STRING, keep it
00747         tree<AstNode>::iterator parent = tr.parent(it);
00748         string pt = parent->getType();
00749         if (pt == "T_VARIABLE" || pt == "T_STRING" || pt == "T_CONSTANT_ENCAPSED_STRING")
00750             return true;
00751         return false;
00752     }
00753     else if ((t == "root" || t == "start" || t == "CHAR61" || t == "expr" || t == "top_statement_list"
00754     || t == "internal_functions_in_yacc" || t == "function_call" || t == "statement" || t == "unticked_statement"
00755     || utils::start_with(t, "T_")))
00756         return true;
00757     return false;
00758 }
00759 
00760 bool Ast::detectFunction(const tree<AstNode>::iterator& i) const {
00761     if (i->getType() == "function_call")
00762         return true;
00763     return false;
00764 }
00765 
00766 
00767 void Ast::trace(const std::string &varName) const
00768 {
00769     // Find that variables
00770     AstVarBaseList obsVars, totalVars;
00771     MapVariables::const_iterator pos=variables.find(varName);
00772     if (pos != variables.end())
00773         obsVars = pos->second;
00774     // check with the assignments to see whether we should look for other variables
00775     totalVars = obsVars;
00776     for (MapAssignments::const_iterator iter=assign.begin();iter!=assign.end();++iter) 
00777     {
00778         for (AstVarBaseList::iterator jter=totalVars.begin();jter!=totalVars.end();++jter)
00779         {
00780             //cerr << "What about?" << iter->second << " , " << *jter << endl;
00781             if (iter->second.name == jter->name) {
00782                 if (find(totalVars.begin(), totalVars.end(), iter->first) == totalVars.end())
00783                     totalVars.push_back(iter->first);               
00784             }
00785         }
00786     }
00787     print_current_list(totalVars);
00788     // For each variable in the domain, look for the functions, calls etc.
00789     for (AstVarBaseList::iterator iter=totalVars.begin(); iter!=totalVars.end();++iter) {
00790         cerr << *iter << endl;
00791         // take this variable and rewind it!
00792         tree<AstNode>::iterator nter = iter->position;
00793         do {
00794             nter = tr.parent(nter);
00795             // Check for functions
00796             if (detectFunction(nter)) {
00797                 cerr << nter << endl;
00798             }
00799         } while (nter != tr.begin());           
00800     }
00801 }
00802 
00803 Ast Ast::skeleton() const
00804 {
00805     Ast temp(*this);
00806     temp.ref = const_cast<Ast *>(this);
00807     for (tree<AstNode>::iterator iter=temp.tr.begin(); iter!=temp.tr.end();++iter)
00808     {
00809         if (!temp.is_skeleton_node(iter)) {
00810             // just delete it with reparent
00811             temp.tr.reparent(temp.tr.parent(iter), iter);
00812         }
00813     }
00814     return temp;    
00815 }
00816 
00817 
00818 void Ast::printInfo(ostream& stream) const
00819 {
00820     stream << "-- Ast Information --" << endl;
00821     stream << "Functions:" << endl;
00822     for (MapFunctions::const_iterator iter = functions.begin(); iter != functions.end(); ++iter) {
00823         stream << '\t' << iter->first << '|' << iter->second->getType() << endl;
00824     }
00825     stream << "Classes:" << endl;
00826     for (MapClasses::const_iterator iter = classes.begin(); iter != classes.end(); ++iter) {
00827         stream << '\t' << iter->first << '|' << iter->second->getType() << endl;
00828     }
00829     stream << "Variables:" << endl;
00830     for (MapVariables::const_iterator iter = variables.begin(); iter != variables.end(); ++iter) {
00831         stream << setw(10) << iter->first << endl;
00832     }
00833     stream << "Assignments:" << endl;
00834     for (MapAssignments::const_iterator iter = assign.begin(); iter != assign.end(); ++iter) {
00835         stream << '\t' << iter->first << " <-- " << iter->second << endl;
00836     }
00837     
00838     stream << "Equivalences:" << endl;
00839     for (MapVarEquivalent::const_iterator iter = equivalences.begin(); iter != equivalences.end(); ++iter)
00840     {
00841         stream << setw(10) << iter->first << ": \t";
00842         for (AstVarBaseList::const_iterator jter=iter->second.begin(); jter!=iter->second.end();++jter)
00843             stream << *jter << ", ";
00844         stream << endl;
00845     }
00846 }
00847 
00848 void Ast::clearInfo()
00849 {
00850     classes = MapClasses();
00851     variables = MapVariables();
00852     functions = MapFunctions();
00853 }
00854 
00855 string htmlentities(const string& str)
00856 {
00857     string ret;
00858     for(string::const_iterator iter=str.begin();iter!=str.end();++iter) {
00859         switch (*iter) {
00860             case '&' : ret += "&amp;"; break;
00861             case '<' : ret += "&lt;"; break;
00862             case '>' : ret += "&gt;"; break;
00863             case '"' : ret += "&quot;"; break;
00864             default: ret += *iter; break;
00865         }
00866     }
00867     return ret;
00868 }
00869 
00870 void writeSiblingsXML(const tree<AstNode>& t, const tree<AstNode>::iterator iRoot, ostream& stream)
00871 {
00872     if(t.empty()) 
00873         return;
00874     if (iRoot->getType() == "root") {
00875         tree<AstNode>::sibling_iterator iChildren = t.begin(iRoot);
00876         stream << "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" << endl;
00877         writeSiblingsXML(t,iChildren,stream);
00878     }
00879     else if (t.number_of_children(iRoot) == 0) {
00880         string type = iRoot->getType();
00881         stream << "<php:" << type << '>';
00882         if (iRoot->getValue().length() > 0)
00883             stream << htmlentities(iRoot->getValue());
00884         stream << "</php:" << type << '>' << endl;
00885     }
00886     else {
00887         string type = iRoot->getType();
00888         string xmlns="";
00889         if (type == "start")
00890             xmlns = " xmlns:php=\"http://php.net/csl\"";
00891         stream << "<php:" << type << xmlns << '>' << endl;
00892         int siblingNum;
00893         tree<AstNode>::sibling_iterator iChildren;
00894         for (iChildren = t.begin(iRoot), siblingNum = 0; iChildren != t.end(iRoot); ++iChildren) 
00895         {
00896             writeSiblingsXML(t,iChildren,stream);
00897         }
00898         stream << "</php:" << type << '>' << endl;
00899     }
00900 }
00901 
00902 void exportXML(const Ast& ast, const string& xmlName)
00903 {
00904     if (ast.tr.empty()) {
00905         throw length_error("AstContainer::exportXML - The AST is empty");
00906         return;
00907     }
00908     ofstream stream(xmlName.c_str());
00909     if (!stream.is_open())
00910         return;
00911     // skip <root>
00912     for(tree<AstNode>::sibling_iterator iRoots = ast.tr.begin(); iRoots != ast.tr.end(); iRoots++) {
00913         writeSiblingsXML(ast.tr,iRoots,stream);
00914     }
00915     stream.close();
00916 }

Generated on Wed Feb 27 20:31:06 2008 for php.ast.svn.src. by  doxygen 1.5.3