xmlparser.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002   file : $URL: https://frepple.svn.sourceforge.net/svnroot/frepple/trunk/src/utils/xmlparser.cpp $
00003   version : $LastChangedRevision: 1337 $  $LastChangedBy: jdetaeye $
00004   date : $LastChangedDate: 2010-08-17 10:34:28 +0200 (Tue, 17 Aug 2010) $
00005  ***************************************************************************/
00006 
00007 /***************************************************************************
00008  *                                                                         *
00009  * Copyright (C) 2007-2010 by Johan De Taeye                               *
00010  *                                                                         *
00011  * This library is free software; you can redistribute it and/or modify it *
00012  * under the terms of the GNU Lesser General Public License as published   *
00013  * by the Free Software Foundation; either version 2.1 of the License, or  *
00014  * (at your option) any later version.                                     *
00015  *                                                                         *
00016  * This library is distributed in the hope that it will be useful,         *
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of          *
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser *
00019  * General Public License for more details.                                *
00020  *                                                                         *
00021  * You should have received a copy of the GNU Lesser General Public        *
00022  * License along with this library; if not, write to the Free Software     *
00023  * Foundation Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 *
00024  * USA                                                                     *
00025  *                                                                         *
00026  ***************************************************************************/
00027 
00028 #define FREPPLE_CORE
00029 #include "frepple/utils.h"
00030 #include <sys/stat.h>
00031 
00032 /* Uncomment the next line to create a lot of debugging messages during
00033  * the parsing of XML-data. */
00034 //#define PARSE_DEBUG
00035 
00036 // With VC++ we use the Win32 functions to browse a directory
00037 #ifdef _MSC_VER
00038 #define WIN32_LEAN_AND_MEAN
00039 #include <windows.h>
00040 #else
00041 // With Unix-like systems we use a check suggested by the autoconf tools
00042 #if HAVE_DIRENT_H
00043 # include <dirent.h>
00044 # define NAMLEN(dirent) strlen((dirent)->d_name)
00045 #else
00046 # define dirent direct
00047 # define NAMLEN(dirent) (dirent)->d_namlen
00048 # if HAVE_SYS_NDIR_H
00049 #  include <sys/ndir.h>
00050 # endif
00051 # if HAVE_SYS_DIR_H
00052 #  include <sys/dir.h>
00053 # endif
00054 # if HAVE_NDIR_H
00055 #  include <ndir.h>
00056 # endif
00057 #endif
00058 #endif
00059 
00060 
00061 namespace frepple
00062 {
00063 namespace utils
00064 {
00065 
00066 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::STANDARD = 1;
00067 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLAN = 2;
00068 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLANDETAIL = 4;
00069 
00070 
00071 void  XMLInput::processingInstruction
00072 (const XMLCh *const target, const XMLCh *const data)
00073 {
00074   char* type = xercesc::XMLString::transcode(target);
00075   char* value = xercesc::XMLString::transcode(data);
00076   try
00077   {
00078     // Look up the class
00079     const MetaClass* j = Command::metadataInstruction->findClass(type);
00080     if (!j || !j->processingInstruction)
00081     {
00082       string msg = string("Unknown processing instruction ") + type;
00083       xercesc::XMLString::release(&type);
00084       xercesc::XMLString::release(&value);
00085       throw LogicException(msg);
00086     }
00087     try
00088     {
00089       // Execute the processing instruction
00090       j->processingInstruction(value);
00091     }
00092     catch (DataException e)
00093     {
00094       if (abortOnDataException)
00095       {
00096         xercesc::XMLString::release(&type);
00097         xercesc::XMLString::release(&value);
00098         throw;
00099       }
00100       else logger << "Continuing after data error: " << e.what() << endl;
00101     }
00102     xercesc::XMLString::release(&type);
00103     xercesc::XMLString::release(&value);
00104   }
00105   catch (...)
00106   {
00107     xercesc::XMLString::release(&type);
00108     xercesc::XMLString::release(&value);
00109     throw;
00110   }
00111 }
00112 
00113 
00114 void XMLInput::startElement(const XMLCh* const uri, const XMLCh* const n,
00115     const XMLCh* const qname, const xercesc::Attributes& atts)
00116 {
00117   // Validate the state
00118   assert(!states.empty());
00119 
00120   // Check for excessive number of open objects
00121   if (numElements >= maxdepth)
00122     throw DataException("XML-document with elements nested excessively deep");
00123 
00124   // Push the element on the stack
00125   datapair *pElement = &m_EStack[numElements+1];
00126   pElement->first.reset(n);
00127   pElement->second.reset();
00128 
00129   // Store a pointer to the attributes
00130   attributes = &atts;
00131 
00132   switch (states.top())
00133   {
00134     case SHUTDOWN:
00135       // STATE: Parser is shutting down, and we can ignore all input that
00136       // is still coming
00137       return;
00138 
00139     case IGNOREINPUT:
00140       // STATE: Parser is ignoring a part of the input
00141       if (pElement->first.getHash() == endingHashes.top())
00142         // Increase the count of occurences before the ignore section ends
00143         ++ignore;
00144       ++numElements;
00145       return;
00146 
00147     case INIT:
00148       // STATE: The only time the parser comes in this state is when we read
00149       // opening tag of the ROOT tag.
00150 #ifdef PARSE_DEBUG
00151       if (!m_EHStack.empty())
00152         logger << "Initialize root tag for reading object "
00153         << getCurrentObject() << " ("
00154         << typeid(*getCurrentObject()).name() << ")" << endl;
00155       else
00156         logger << "Initialize root tag for reading object NULL" << endl;
00157 #endif
00158       states.top() = READOBJECT;
00159       endingHashes.push(pElement->first.getHash());
00160       // Note that there is no break or return here. We also execute the
00161       // statements of the following switch-case.
00162 
00163     case READOBJECT:
00164       // STATE: Parser is reading data elements of an object
00165       // Debug
00166 #ifdef PARSE_DEBUG
00167       logger << "   Start element " << pElement->first.getName()
00168       << " - object " << getCurrentObject() << endl;
00169 #endif
00170 
00171       // Call the handler of the object
00172       assert(!m_EHStack.empty());
00173       try {getCurrentObject()->beginElement(*this, pElement->first);}
00174       catch (DataException e)
00175       {
00176         if (abortOnDataException) throw;
00177         else logger << "Continuing after data error: " << e.what() << endl;
00178       }
00179 
00180       // Now process all attributes. For attributes we only call the
00181       // endElement() member and skip the beginElement() method.
00182       numElements += 1;
00183       if (states.top() != IGNOREINPUT)
00184         for (unsigned int i=0, cnt=atts.getLength(); i<cnt; i++)
00185         {
00186           char* val = xercesc::XMLString::transcode(atts.getValue(i));
00187           m_EStack[numElements+1].first.reset(atts.getLocalName(i));
00188           m_EStack[numElements+1].second.setData(val);
00189           #ifdef PARSE_DEBUG
00190           char* attname = xercesc::XMLString::transcode(atts.getQName(i));
00191           logger << "   Processing attribute " << attname
00192           << " - object " << getCurrentObject() << endl;
00193           xercesc::XMLString::release(&attname);
00194           #endif
00195           try {getCurrentObject()->endElement(*this, m_EStack[numElements+1].first, m_EStack[numElements+1].second);}
00196           catch (DataException e)
00197           {
00198             if (abortOnDataException) throw;
00199             else logger << "Continuing after data error: " << e.what() << endl;
00200           }
00201           xercesc::XMLString::release(&val);
00202           // Stop processing attributes if we are now in the ignore mode
00203           if (states.top() == IGNOREINPUT) break;
00204         }
00205   }  // End of switch statement
00206 
00207   // Outside of this handler, no attributes are available
00208   attributes = NULL;
00209 }
00210 
00211 
00212 void XMLInput::endElement(const XMLCh* const uri,
00213     const XMLCh* const s,
00214     const XMLCh* const qname)
00215 {
00216   // Validate the state
00217   assert(numElements >= 0);
00218   assert(!states.empty());
00219   assert(numElements < maxdepth);
00220 
00221   // Remove an element from the stack
00222   datapair *pElement = &(m_EStack[numElements--]);
00223 
00224   switch (states.top())
00225   {
00226     case INIT:
00227       // This should never happen!
00228       throw LogicException("Unreachable code reached");
00229 
00230     case SHUTDOWN:
00231       // STATE: Parser is shutting down, and we can ignore all input that is
00232       // still coming
00233       return;
00234 
00235     case IGNOREINPUT:
00236       // STATE: Parser is ignoring a part of the input
00237 #ifdef PARSE_DEBUG
00238       logger << "   End element " << pElement->first.getName()
00239       << " - IGNOREINPUT state" << endl;
00240 #endif
00241       // Continue if we aren't dealing with the tag being ignored
00242       if (pElement->first.getHash() != endingHashes.top()) return;
00243       if (ignore == 0)
00244       {
00245         // Finished ignoring now
00246         states.pop();
00247         endingHashes.pop();
00248 #ifdef PARSE_DEBUG
00249         logger << "Finish IGNOREINPUT state" << endl;
00250 #endif
00251       }
00252       else
00253         --ignore;
00254       break;
00255 
00256     case READOBJECT:
00257       // STATE: Parser is reading data elements of an object
00258 #ifdef PARSE_DEBUG
00259       logger << "   End element " << pElement->first.getName()
00260       << " - object " << getCurrentObject() << endl;
00261 #endif
00262 
00263       // Check if we finished with the current handler
00264       assert(!m_EHStack.empty());
00265       if (pElement->first.getHash() == endingHashes.top())
00266       {
00267         // Call the ending handler of the Object, with a special
00268         // flag to specify that this object is now ended
00269         objectEnded = true;
00270         try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00271         catch (DataException e)
00272         {
00273           if (abortOnDataException) throw;
00274           else logger << "Continuing after data error: " << e.what() << endl;
00275         }
00276         objectEnded = false;
00277 #ifdef PARSE_DEBUG
00278         logger << "Finish reading object " << getCurrentObject() << endl;
00279 #endif
00280         // Pop from the handler object stack
00281         prev = getCurrentObject();
00282         m_EHStack.pop_back();
00283         endingHashes.pop();
00284 
00285         // Pop from the state stack
00286         states.pop();
00287         if (m_EHStack.empty())
00288           shutdown();
00289         else
00290         {
00291           // Call also the endElement function on the owning object
00292           try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00293           catch (DataException e)
00294           {
00295             if (abortOnDataException) throw;
00296             else logger << "Continuing after data error: " << e.what() << endl;
00297           }
00298 #ifdef PARSE_DEBUG
00299           logger << "   End element " << pElement->first.getName()
00300           << " - object " << getCurrentObject() << endl;
00301 #endif
00302         }
00303       }
00304       else
00305         // This tag is not the ending tag of an object
00306         // Call the function of the Object
00307         try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
00308         catch (DataException e)
00309         {
00310           if (abortOnDataException) throw;
00311           else logger << "Continuing after data error: " << e.what() << endl;
00312         }
00313   }
00314 }
00315 
00316 
00317 // Unfortunately the prototype for this handler function differs between 
00318 // Xerces-c 2.x and 3.x
00319 #if XERCES_VERSION_MAJOR==2
00320 void XMLInput::characters(const XMLCh *const c, const unsigned int n)
00321 #else
00322 void XMLInput::characters(const XMLCh *const c, const XMLSize_t n)
00323 #endif
00324 {
00325   // No data capture during the ignore state
00326   if (states.top()==IGNOREINPUT) return;
00327 
00328   // Process the data
00329   char* name = xercesc::XMLString::transcode(c);
00330   m_EStack[numElements].second.addData(name, strlen(name));
00331   xercesc::XMLString::release(&name);
00332 }
00333 
00334 
00335 void XMLInput::warning(const xercesc::SAXParseException& exception)
00336 {
00337   char* message = xercesc::XMLString::transcode(exception.getMessage());
00338   logger << "Warning: " << message
00339   << " at line: " << exception.getLineNumber() << endl;
00340   xercesc::XMLString::release(&message);
00341 }
00342 
00343 
00344 DECLARE_EXPORT void XMLInput::readto(Object * pPI)
00345 {
00346   // Keep track of the tag where this object will end
00347   assert(numElements >= -1);
00348   endingHashes.push(m_EStack[numElements+1].first.getHash());
00349   if (pPI)
00350   {
00351     // Push a new object on the handler stack
00352 #ifdef PARSE_DEBUG
00353     logger << "Start reading object " << pPI
00354     << " (" << typeid(*pPI).name() << ")" << endl;
00355 #endif
00356     prev = getCurrentObject();
00357     m_EHStack.push_back(make_pair(pPI,static_cast<void*>(NULL)));
00358     states.push(READOBJECT);
00359   }
00360   else
00361   {
00362     // Ignore the complete content of this element
00363 #ifdef PARSE_DEBUG
00364     logger << "Start ignoring input" << endl;
00365 #endif
00366     states.push(IGNOREINPUT);
00367   }
00368 }
00369 
00370 
00371 void XMLInput::shutdown()
00372 {
00373   // Already shutting down...
00374   if (states.empty() || states.top() == SHUTDOWN) return;
00375 
00376   // Message
00377 #ifdef PARSE_DEBUG
00378   logger << "   Forcing a shutdown - SHUTDOWN state" << endl;
00379 #endif
00380 
00381   // Change the state
00382   states.push(SHUTDOWN);
00383 
00384   // Done if we have no elements on the stack, i.e. a normal end.
00385   if (numElements<0) return;
00386 
00387   // Call the ending handling of all objects on the stack
00388   // This allows them to finish off in a valid state, and delete any temporary
00389   // objects they may have allocated.
00390   objectEnded = true;
00391   m_EStack[numElements].first.reset("Not a real tag");
00392   m_EStack[numElements].second.reset();
00393   while (!m_EHStack.empty())
00394   {
00395     try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);}
00396     catch (DataException e)
00397     {
00398       if (abortOnDataException) throw;
00399       else logger << "Continuing after data error: " << e.what() << endl;
00400     }
00401     m_EHStack.pop_back();
00402   }
00403 }
00404 
00405 
00406 void XMLInput::reset()
00407 {
00408   // Delete the xerces parser object
00409   delete parser;
00410   parser = NULL;
00411 
00412   // Call the ending handling of all objects on the stack
00413   // This allows them to finish off in a valid state, and delete any temporary
00414   // objects they may have allocated.
00415   if (!m_EHStack.empty())
00416   {
00417     // The next line is to avoid calling the endElement handler twice for the
00418     // last object. E.g. endElement handler causes and exception, and as part
00419     // of the exception handling we call the reset method.
00420     if (objectEnded) m_EHStack.pop_back();
00421     objectEnded = true;
00422     m_EStack[++numElements].first.reset("Not a real tag");
00423     m_EStack[++numElements].second.reset();
00424     while (!m_EHStack.empty())
00425     {
00426       try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);}
00427       catch (DataException e)
00428       {
00429         if (abortOnDataException) throw;
00430         else logger << "Continuing after data error: " << e.what() << endl;
00431       }
00432       m_EHStack.pop_back();
00433     }
00434   }
00435 
00436   // Cleanup of stacks
00437   while (!states.empty()) states.pop();
00438   while (!endingHashes.empty()) endingHashes.pop();
00439 
00440   // Set all variables back to their starting values
00441   numElements = -1;
00442   ignore = 0;
00443   objectEnded = false;
00444   attributes = NULL;
00445 }
00446 
00447 
00448 void XMLInput::parse(xercesc::InputSource &in, Object *pRoot, bool validate)
00449 {
00450   try
00451   {
00452     // Create a Xerces parser
00453     parser = xercesc::XMLReaderFactory::createXMLReader();
00454 
00455     // Set the features of the parser. A bunch of the options are dependent
00456     // on whether we want to validate the input or not.
00457     parser->setProperty(xercesc::XMLUni::fgXercesScannerName, const_cast<XMLCh*>
00458         (validate ? xercesc::XMLUni::fgSGXMLScanner : xercesc::XMLUni::fgWFXMLScanner));
00459     parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, validate);
00460     parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpacePrefixes, false);
00461     parser->setFeature(xercesc::XMLUni::fgXercesIdentityConstraintChecking, false);
00462     parser->setFeature(xercesc::XMLUni::fgXercesDynamic, false);
00463     parser->setFeature(xercesc::XMLUni::fgXercesSchema, validate);
00464     parser->setFeature(xercesc::XMLUni::fgXercesSchemaFullChecking, false);
00465     parser->setFeature(xercesc::XMLUni::fgXercesValidationErrorAsFatal,true);
00466     parser->setFeature(xercesc::XMLUni::fgXercesIgnoreAnnotations,true);
00467 
00468     if (validate)
00469     {
00470       // Specify the no-namespace schema file
00471       string schema = Environment::searchFile("frepple.xsd");
00472       if (schema.empty())
00473         throw RuntimeException("Can't find XML schema file 'frepple.xsd'");
00474       XMLCh *c = xercesc::XMLString::transcode(schema.c_str());
00475       parser->setProperty(
00476         xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, c
00477       );
00478       xercesc::XMLString::release(&c);
00479     }
00480 
00481     // If we are reading into a NULL object, there is no need to use a
00482     // content handler or a handler stack.
00483     if (pRoot)
00484     {
00485       // Set the event handler. If we are reading into a NULL object, there is
00486       // no need to use a content handler.
00487       parser->setContentHandler(this);
00488 
00489       // Get the parser to read data into the object pRoot.
00490       m_EHStack.push_back(make_pair(pRoot,static_cast<void*>(NULL)));
00491       states.push(INIT);
00492     }
00493 
00494     // Set the error handler
00495     parser->setErrorHandler(this);
00496 
00497     // Parse the input
00498     parser->parse(in);
00499   }
00500   // Note: the reset() method needs to be called in all circumstances. The
00501   // reset method allows all objects to finish in a valid state and clean up
00502   // any memory they may have allocated.
00503   catch (const xercesc::XMLException& toCatch)
00504   {
00505     char* message = xercesc::XMLString::transcode(toCatch.getMessage());
00506     string msg(message);
00507     xercesc::XMLString::release(&message);
00508     reset();
00509     throw RuntimeException("Parsing error: " + msg);
00510   }
00511   catch (const xercesc::SAXParseException& toCatch)
00512   {
00513     char* message = xercesc::XMLString::transcode(toCatch.getMessage());
00514     ostringstream msg;
00515     if (toCatch.getLineNumber() > 0)
00516       msg << "Parsing error: " << message << " at line " << toCatch.getLineNumber();
00517     else
00518       msg << "Parsing error: " << message;
00519     xercesc::XMLString::release(&message);
00520     reset();
00521     throw RuntimeException(msg.str());
00522   }
00523   catch (const exception& toCatch)
00524   {
00525     reset();
00526     ostringstream msg;
00527     msg << "Error during XML parsing: " << toCatch.what();
00528     throw RuntimeException(msg.str());
00529   }
00530   catch (...)
00531   {
00532     reset();
00533     throw RuntimeException(
00534       "Parsing error: Unexpected exception during XML parsing");
00535   }
00536   reset();
00537 
00538   // Execute the commands defined in the input stream.
00539   // The commands are executed only after a successful parsing.
00540   executeCommands();
00541 }
00542 
00543 
00544 DECLARE_EXPORT ostream& operator << (ostream& os, const XMLEscape& x)
00545 {
00546   for (const char* p = x.data; *p; ++p)
00547   {
00548     switch (*p)
00549     {
00550       case '&': os << "&amp;"; break;
00551       case '<': os << "&lt;"; break;
00552       case '>': os << "&gt;"; break;
00553       case '"': os << "&quot;"; break;
00554       case '\'': os << "&apos;"; break;
00555       default: os << *p;
00556     }
00557   }
00558   return os;
00559 }
00560 
00561 
00562 DECLARE_EXPORT void XMLOutput::incIndent()
00563 {
00564   indentstring[m_nIndent++] = '\t';
00565   if (m_nIndent > 40) m_nIndent = 40;
00566   indentstring[m_nIndent] = '\0';
00567 }
00568 
00569 
00570 DECLARE_EXPORT void XMLOutput::decIndent()
00571 {
00572   if (--m_nIndent < 0) m_nIndent = 0;
00573   indentstring[m_nIndent] = '\0';
00574 }
00575 
00576 
00577 DECLARE_EXPORT void XMLOutput::writeElement
00578 (const Keyword& tag, const Object* object, mode m)
00579 {
00580   // Avoid NULL pointers and skip hidden objects
00581   if (!object || object->getHidden()) return;
00582 
00583   // Adjust current and parent object pointer
00584   const Object *previousParent = parentObject;
00585   parentObject = currentObject;
00586   currentObject = object;
00587   ++numObjects;
00588   ++numParents;
00589 
00590   // Call the write method on the object
00591   if (m != DEFAULT)
00592     // Mode is overwritten
00593     object->writeElement(this, tag, m);
00594   else
00595     // Choose wether to save a reference of the object.
00596     // The root object can't be saved as a reference.
00597     object->writeElement(this, tag, numParents>2 ? REFERENCE : DEFAULT);
00598 
00599   // Adjust current and parent object pointer
00600   --numParents;
00601   currentObject = parentObject;
00602   parentObject = previousParent;
00603 }
00604 
00605 
00606 DECLARE_EXPORT void XMLOutput::writeElementWithHeader(const Keyword& tag, const Object* object)
00607 {
00608   // Root object can't be null...
00609   if (!object)
00610     throw RuntimeException("Can't accept a NULL object as XML root");
00611 
00612   // There should not be any saved objects yet
00613   if (numObjects > 0)
00614     throw LogicException("Can't have multiple headers in a document");
00615   assert(!parentObject);
00616   assert(!currentObject);
00617 
00618   // Write the first line for the xml document
00619   writeString(getHeaderStart());
00620 
00621   // Adjust current object pointer
00622   currentObject = object;
00623 
00624   // Write the object
00625   ++numObjects;
00626   ++numParents;
00627   BeginObject(tag, getHeaderAtts());
00628   object->writeElement(this, tag, NOHEADER);
00629 
00630   // Adjust current and parent object pointer
00631   currentObject = NULL;
00632   parentObject = NULL;
00633 }
00634 
00635 
00636 DECLARE_EXPORT void XMLOutput::writeHeader(const Keyword& tag)
00637 {
00638   // There should not be any saved objects yet
00639   if (numObjects > 0 || !parentObject || !currentObject)
00640     throw LogicException("Writing invalid header to XML document");
00641 
00642   // Write the first line and the opening tag
00643   writeString(getHeaderStart());
00644   BeginObject(tag, getHeaderAtts());
00645 
00646   // Fake a dummy parent
00647   numParents += 2;
00648 }
00649 
00650 
00651 DECLARE_EXPORT bool XMLElement::getBool() const
00652 {
00653   switch (getData()[0])
00654   {
00655     case 'T':
00656     case 't':
00657     case '1':
00658       return true;
00659     case 'F':
00660     case 'f':
00661     case '0':
00662       return false;
00663   }
00664   throw DataException("Invalid boolean value: " + string(getData()));
00665 }
00666 
00667 
00668 DECLARE_EXPORT const char* Attribute::getName() const
00669 {
00670   if (ch) return ch;
00671   Keyword::tagtable::const_iterator i = Keyword::getTags().find(hash);
00672   if (i == Keyword::getTags().end())
00673     throw LogicException("Undefined element keyword");
00674   return i->second->getName().c_str();
00675 }
00676 
00677 
00678 DECLARE_EXPORT Keyword::Keyword(const string& name) : strName(name)
00679 {
00680   // Error condition: name is empty
00681   if (name.empty()) throw LogicException("Creating keyword without name");
00682 
00683   // Create a number of variations of the tag name
00684   strStartElement = string("<") + name;
00685   strEndElement = string("</") + name + ">\n";
00686   strElement = string("<") + name + ">";
00687   strAttribute = string(" ") + name + "=\"";
00688 
00689   // Compute the hash value
00690   dw = hash(name.c_str());
00691 
00692   // Create a properly encoded Xerces string
00693   xercesc::XMLPlatformUtils::Initialize();
00694   xmlname = xercesc::XMLString::transcode(name.c_str());
00695 
00696   // Verify that the hash is "perfect".
00697   check();
00698 }
00699 
00700 
00701 DECLARE_EXPORT Keyword::Keyword(const string& name, const string& nspace)
00702   : strName(name)
00703 {
00704   // Error condition: name is empty
00705   if (name.empty())
00706     throw LogicException("Creating keyword without name");
00707   if (nspace.empty())
00708     throw LogicException("Creating keyword with empty namespace");
00709 
00710   // Create a number of variations of the tag name
00711   strStartElement = string("<") + nspace + ":" + name;
00712   strEndElement = string("</") + nspace + ":" + name + ">\n";
00713   strElement = string("<") + nspace + ":" + name + ">";
00714   strAttribute = string(" ") + nspace + ":" + name + "=\"";
00715 
00716   // Compute the hash value
00717   dw = hash(name);
00718 
00719   // Create a properly encoded Xerces string
00720   xercesc::XMLPlatformUtils::Initialize();
00721   xmlname = xercesc::XMLString::transcode(string(nspace + ":" + name).c_str());
00722 
00723   // Verify that the hash is "perfect".
00724   check();
00725 }
00726 
00727 
00728 void Keyword::check()
00729 {
00730   // To be thread-safe we make sure only a single thread at a time
00731   // can execute this check.
00732   static Mutex dd;
00733   {
00734     ScopeMutexLock l(dd);
00735     tagtable::const_iterator i = getTags().find(dw);
00736     if (i!=getTags().end() && i->second->getName()!=strName)
00737       throw LogicException("Tag XML-tag hash function clashes for "
00738           + i->second->getName() + " and " + strName);
00739     getTags().insert(make_pair(dw,this));
00740   }
00741 }
00742 
00743 
00744 DECLARE_EXPORT Keyword::~Keyword()
00745 {
00746   // Remove from the tag list
00747   tagtable::iterator i = getTags().find(dw);
00748   if (i!=getTags().end()) getTags().erase(i);
00749 
00750   // Destroy the xerces string
00751   xercesc::XMLString::release(&xmlname);
00752   xercesc::XMLPlatformUtils::Terminate();
00753 }
00754 
00755 
00756 DECLARE_EXPORT const Keyword& Keyword::find(const char* name)
00757 {
00758   tagtable::const_iterator i = getTags().find(hash(name));
00759   return *(i!=getTags().end() ? i->second : new Keyword(name));
00760 }
00761 
00762 
00763 DECLARE_EXPORT Keyword::tagtable& Keyword::getTags()
00764 {
00765   static tagtable alltags;
00766   return alltags;
00767 }
00768 
00769 
00770 DECLARE_EXPORT hashtype Keyword::hash(const char* c)
00771 {
00772   if (c == 0 || *c == 0) return 0;
00773 
00774   // Compute hash
00775   const char* curCh = c;
00776   hashtype hashVal = *curCh++;
00777   while (*curCh)
00778     hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++;
00779 
00780   // Divide by modulus
00781   return hashVal % 954991; 
00782 }
00783 
00784 
00785 DECLARE_EXPORT hashtype Keyword::hash(const XMLCh* t)
00786 {
00787   char* c = xercesc::XMLString::transcode(t);
00788   if (c == 0 || *c == 0)
00789   {
00790     xercesc::XMLString::release(&c);
00791     return 0;
00792   }
00793   
00794   // Compute hash
00795   const char* curCh = c;
00796   hashtype hashVal = *curCh++;
00797   while (*curCh)
00798     hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++;
00799 
00800   // Divide by modulus
00801   xercesc::XMLString::release(&c);
00802   return hashVal % 954991;
00803 }
00804 
00805 
00806 DECLARE_EXPORT void Keyword::printTags()
00807 {
00808   for (tagtable::iterator i = getTags().begin(); i != getTags().end(); ++i)
00809     logger << i->second->getName() << "   " << i->second->dw << endl;
00810 }
00811 
00812 
00813 DECLARE_EXPORT void XMLInput::executeCommands()
00814 {
00815   try {cmds.execute();}
00816   catch (...)
00817   {
00818     try {throw;}
00819     catch (exception& e)
00820     {logger << "Error executing commands: " << e.what() << endl;}
00821     catch (...)
00822     {logger << "Error executing commands: Unknown exception type" << endl;}
00823     throw;
00824   }
00825 }
00826 
00827 
00828 void XMLInputFile::parse(Object *pRoot, bool validate)
00829 {
00830   // Check if string has been set
00831   if (filename.empty())
00832     throw DataException("Missing input file or directory");
00833 
00834   // Check if the parameter is the name of a directory
00835   struct stat stat_p;
00836   if (stat(filename.c_str(), &stat_p))
00837     // Can't verify the status
00838     throw RuntimeException("Couldn't open input file '" + filename + "'");
00839   else if (stat_p.st_mode & S_IFDIR)
00840   {
00841     // Data is a directory: loop through all *.xml files now. No recursion in
00842     // subdirectories is done.
00843     // The code is unfortunately different for Windows & Linux. Sigh...
00844 #ifdef _MSC_VER
00845     string f = filename + "\\*.xml";
00846     WIN32_FIND_DATA dir_entry_p;
00847     HANDLE h = FindFirstFile(f.c_str(), &dir_entry_p);
00848     if (h == INVALID_HANDLE_VALUE)
00849       throw RuntimeException("Couldn't open input file '" + f + "'");
00850     do
00851     {
00852       f = filename + '/' + dir_entry_p.cFileName;
00853       XMLInputFile(f.c_str()).parse(pRoot);
00854     }
00855     while (FindNextFile(h, &dir_entry_p));
00856     FindClose(h);
00857 #elif HAVE_DIRENT_H
00858     struct dirent *dir_entry_p;
00859     DIR *dir_p = opendir(filename.c_str());
00860     while (NULL != (dir_entry_p = readdir(dir_p)))
00861     {
00862       int n = NAMLEN(dir_entry_p);
00863       if (n > 4 && !strcmp(".xml", dir_entry_p->d_name + n - 4))
00864       {
00865         string f = filename + '/' + dir_entry_p->d_name;
00866         XMLInputFile(f.c_str()).parse(pRoot, validate);
00867       }
00868     }
00869     closedir(dir_p);
00870 #else
00871     throw RuntimeException("Can't process a directory on your platform");
00872 #endif
00873   }
00874   else
00875   {
00876     // Normal file
00877     // Parse the file
00878     XMLCh *f = xercesc::XMLString::transcode(filename.c_str());
00879     xercesc::LocalFileInputSource in(f);
00880     xercesc::XMLString::release(&f);
00881     XMLInput::parse(in, pRoot, validate);
00882   }
00883 }
00884 
00885 } // end namespace
00886 } // end namespace

Documentation generated for frePPLe by  doxygen