Xml reader – cpp

The xml reader from the xmlreader in cpp joins together the xmlattribute and xmlobject and this last class, this one is the biggest because it does most of the work. So shall try and explain each part.

As before a xml is

<tagname attributesname="attributesvalue">value</tagname>

The main structure includes the xml definition ( ) in the _xmlMainDetails and the rest of the xml file is in _xmlDetails vectors of xmlObject.

The main file is loaded with the loadLoad ( filename ) and then printout the xml that has been loaded, shall have to do some more public functions to view etc the loaded xml file, but this is just the basics of a xml reader. The rest are private functions because they will load the xml file in the _xmlDetails variable.

class xmlReader {
  protected:
    vector<xmlObject> _xmlMainDetails;
    vector<xmlObject> _xmlDetails;
 
  public: 
    xmlReader();
 
    // open file and read in the xml file and place into the _xmlDetails
    bool loadFile(string filename);
    void printOuterXML();
 
  private :
    xmlObject readLine(string xmlToSplitUp, string* tagName);
    string readUntilCharacter(string line, char characterStart, char characterEnd, string *returnLine);
    xmlAttribute getAttribute(string attributeString);
    vector<xmlAttribute> getAttributesFromString(string str);
};

The BADATTRIBUTE are const string values that I have included at the bottom, but they are within a try {} catch {} code, so that it will throw a error to be catch.

The getAttribute, will try and obtain the attribute from a string parameter, e.g. if the string parameter is in the style of attributename=”attributevalue”, then it will store the name and value in the xmlAttribute to return back to the calling method.

/*xmlReader*/
xmlReader::xmlReader()
{
}
 
// attribute is normally in the format of attributeName="attributeValue"
xmlAttribute xmlReader::getAttribute(string attributeString)
{
  xmlAttribute returnAttribute;
 
  // make sure that there is a = in the attribute string
  int findEqual = attributeString.find('=');
  if (findEqual > 0)
  {
    // set the attribute name to the substring till the equal
    returnAttribute._attributeName = attributeString.substr(0,findEqual);
    // make sure that there is some characters after the '=' sign.
    if (attributeString.length() > (findEqual+3))
    {
      returnAttribute._attributeValue = attributeString.substr(findEqual+2,(attributeString.length() - (findEqual +3)));
    }
    else
      throw BADATTRIBUTE;
  }else
    // if there does not appear to be ="" at the end of the string then throw a error.
    throw BADATTRIBUTE;
 
  return returnAttribute;
}

The getAttributesFromString if there are 0-x amount of attributes within a string it will process each one in turn and request the above method getAttributes to actually fill in the xmlAttribute variable.

vector<xmlAttribute> xmlReader::getAttributesFromString(string str)
{
    vector<xmlAttribute> returnAtt;
    xmlAttribute attribute;
    int args;
    char st1[CHARACTERLENGHT];
 
    // args normally equals 1 because there is a attribute present
    // else there was no attribute there, just do one at a time
    args = sscanf(str.c_str(), "%s", st1);
    while (args  == 1 && (str.length() > 1)) {
      // see if there is a real attribute attributeName="attributeValue"
      try {
	attribute = getAttribute(st1);
	// push back in the vector array the attribute
	returnAtt.push_back(attribute);
      } catch (string errorStr)		// any errors
      {
	cout << "ERROR : " << errorStr << endl;
      }
      // re-do the string to pull out any more attributes.
      str = str.substr(strlen(st1));
      // see if there is any more attributes present.
      args = sscanf(str.c_str(), "%s", st1);
    }
    return returnAtt;
}

readLine, is the main part of the xml reading object. Here is the basics of what this class is doing.

If there is no tagname within a xml line, return.

If the xml line is the main definition process that style of xml line input, and pull out any attributes that are present.

Else process a normal xml line input with attributes (if present) get the value of the tag and then check to make sure that the end tag name is the same as the first tagname.

// scan through the xml string and pull out the tags and the attributes and value.
xmlObject xmlReader::readLine(string xmlToSplitUp, string* tagName)
{
  xmlObject returnObj;
  string returnLine, value, endTagName;
  int findXml;
 
  // pick out the tag name, if none then return and throw a bad tag name error.
  *tagName = readUntilCharacter(xmlToSplitUp, '<','>', &returnLine);
  if (tagName->length() ==0)
  {
     throw BADTAGNAME;
     return returnObj;
  }
 
  // if there is a xml version etc in the tagname then process the xml version encoding values.
  findXml=tagName->find("xml");
  if ((findXml > 0 && findXml < tagName->length()) && tagName->length() > 1 )
  {
    // this is the xml version etc.
    // there should be ? at each end of the xml version statement
    string xmlStr = readUntilCharacter(*tagName, '?','?', &returnLine);
    if (returnLine != "?") 
    {
      throw BADXMLVERSION;
      return returnObj;
    }
    // go passed the xml characters.
    returnLine = xmlStr.substr(findXml+3);
    // read any of the attributes from the string
    returnObj.setAttributeVector(getAttributesFromString(returnLine));
    // I am storing the version and any other xml details, so set the return value to store in the correct place.
    returnObj.setXmlMainDetails(true);
  }else if (tagName->length() > 1) 
  {
    // need to see if there is any attributes
    int findTagAtts = tagName->find(' ');
    if (findTagAtts < tagName->length())
    {
      // the attributes are passed the space character in the tagName variable
      string attributes = tagName->substr(findTagAtts);
      // store only the tagName in the tagName variable since pulled out the attributes
      *tagName = tagName->substr(0,findTagAtts);
      // get the attributes into a vector and store in the return object
      returnObj.setAttributeVector(getAttributesFromString(attributes));
    }
 
    if (returnLine.length() > 1)
    {
      // pull out the value in the xml line <tagname>VALUE</tagname>
      value = readUntilCharacter(returnLine,'>','<',&returnLine);
      returnObj.setTagValue(value);
    }
    if (returnLine.length() > 1)
    {
      // pick out the end tag name and make sure it is the same as the first one.
      endTagName = readUntilCharacter(returnLine,'<','>',&returnLine);
      string compareEndTag = "/"+*tagName;
      //if the end tag is not the same as the tag name then throw a error.
      if (endTagName != compareEndTag) 
      {
	throw BADXMLTAGEND;
      } 
    }
    returnObj.setTagName(*tagName);
 
   }
  return returnObj;
}

readUntilCharacter, is to pick out a string between two character points e.g. string = “a place to run to, here is it, codingfriends.com” and you want to pick out “here is it” there is two ‘,’ each side of it. So to pull out the readUntilCharacter(“a place to run to, here is it, codingfriends.com”, ‘,’ , ‘,’ , &returnLine) the returnLine is the rest of the line after the last character searched for.

// pick out the characters between two character points, and also return the rest of the line.
string xmlReader::readUntilCharacter(string line, char characterStart, char characterEnd, string *returnLine)
{
  string returnString;
  // find the first occurrence of the character integer placement
  int firstChar = line.find(characterStart);
  // if there is one.
  if (firstChar >= 0)
  {
    // setup the return string, even if a second part cannot be found.
    returnString = line.substr(firstChar+1, (line.length()- firstChar)-1);
    int secChar = returnString.find(characterEnd);
    //if the secound part can be found
    if (secChar > 0)
    {
      *returnLine = returnString.substr(secChar, (returnString.length() - secChar));
      returnString = returnString.substr(0,secChar);
    }
  }
  return returnString;
}

loadFile will load the xml file into the private _xmlMainDetails and _xmlDetails and just see’s if file is present and try’s to load it.

// read in the XML file and place each line into the vector xmlObject 
bool xmlReader::loadFile(string filename)
{
  xmlObject xmlObj;
  string line, tagName;
 
  ifstream xmlfile(filename.c_str());
  if (xmlfile.is_open())
  {
      // if the xml version and also the encodingvalues are present.    
      while (!xmlfile.eof())
      {
	  // pull out the start tag and compare against the endtag
	  getline(xmlfile,line);
	  try 
	  {
	    // pick out the xml details from line and return a xmlObject 
	    // to add to the vector array of xml objects
	    // also return the tagName if any futher processing is required.
	    xmlObj = readLine(line, &tagName);
	    // if there is ?xml version etc details present store, else store into the main xml details
	    if (xmlObj.getXmlMainDetails())
	    {
	      _xmlMainDetails.push_back(xmlObj);
	    }
	    else
	    {  
	      _xmlDetails.push_back(xmlObj);
	    }
	  }
	  // if any error occur during the reading of the xml line.
	  catch (string errorStr)
	  {
	    cout << "ERROR : " << errorStr << endl;
	  }
      }
 
      xmlfile.close();
  }
  else
  {
      cout << "Unable to open the file" << endl;
  }
}

Will print out the xml loaded from the xml file.

/* print Out the outer XML values */
void xmlReader::printOuterXML()
{
    cout << "XML Reader Main Object (Xml main details) " << endl;
    for (int i =0; i < _xmlMainDetails.size(); i++)
      _xmlMainDetails.at(i).printOutXmlObject();
 
    cout << "XML Reader xml details" << endl;
    for (int i =0; i < _xmlDetails.size(); i++)
      _xmlDetails.at(i).printOutXmlObject();
}

here are the constant string values.

const int CHARACTERLENGHT = 80;
const string BADXMLVERSION = "Xml version - first line ? - problem";
const string BADTAGNAME = "Tag name was not present";
const string BADXMLTAGEND = "End tag is not the same as tag name";
const string BADATTRIBUTE = "Attribute in wrong format attributeName=\"attributeValue\"";

Leave a Reply

Your email address will not be published. Required fields are marked *