Problem in parsing

Hi i am trying to parse a HTML document, but i am not able to that correctly...
here is my source cose
package com.wolfram.nutch.parse;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.nutch.parse.HTMLMetaTags;
import org.apache.nutch.parse.HtmlParseFilter;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.protocol.Content;
import java.util.Enumeration;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.*;
import org.xml.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import java.io.*;
import org.apache.xerces.parsers.DOMParser;
/** Adds basic searchable fields to a document. */
public class WolframHtmlParseFilter implements HtmlParseFilter {
     public static final Log LOG = LogFactory
               .getLog(WolframHtmlParseFilter.class);
     private Configuration conf;
     public static final String META_KEYWORDS_NAME = "keywords";
     public static final String META_SUMMARY_NAME = "summary";
    public static final String META_SYNONYMS_NAME = "synonyms";
     public Parse filter(Content content, Parse parse, HTMLMetaTags metaTags,
               DocumentFragment doc) {
          // Trying to find the document's recommended term
          String keywords = null;
          String summary = null;
        String synonyms = null;
        Document d = doc.getOwnerDocument();
      String htmlfile = content.toString();
      //String htmlfile = "<html><title></title><img /> <img /><img /></html>";
      Reader reader;
      Document actualdoc = null;
      DOMParser parser = new DOMParser();
      try {
//           Create a factory
          DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
//           Use document builder factory
          DocumentBuilder builder = factory.newDocumentBuilder();
//          Parse the document
          reader=new CharArrayReader(htmlfile.toCharArray());
          parser.parse(new org.xml.sax.InputSource(new StringReader(htmlfile)));
          actualdoc = parser.getDocument();
      catch(Exception e) {
          System.err.println(e);
      System.out.println("the string is" + actualdoc);
      NodeList images = actualdoc.getElementsByTagName("img");
      int length = images.getLength();
      System.out.println("the length is" + length);
      for(int i = 0;i<length;i++)
          Node image = images.item(i);
          String nodename = image.getNodeName();
          String alttext = image.getAttributes().getNamedItem("alt").getNodeValue();
          System.out.println(alttext);
          if (!metaTags.getNoIndex()) {
               Properties generalMetaTags = metaTags.getGeneralTags();
               for (Enumeration tagNames = generalMetaTags.propertyNames(); tagNames
                         .hasMoreElements();) {
                    Object element = tagNames.nextElement();
                    if (element.equals("keywords")) {
                         keywords = generalMetaTags.getProperty("keywords");
                    if (element.equals("dc.keywords")) {
                         keywords = generalMetaTags.getProperty("dc.keywords");
                    if (element.equals("description")) {
                         summary = generalMetaTags.getProperty("description");
                    if (element.equals("dc.description")) {
                         summary = generalMetaTags.getProperty("dc.description");
                         System.out.println("in dc.Description");
                if (element.equals("synonyms")){
                    synonyms = generalMetaTags.getProperty("synonyms");
               if (keywords != null) {
                    parse.getData().getParseMeta()
                              .set(META_KEYWORDS_NAME, keywords);
               if (summary != null) {
                    parse.getData().getParseMeta().set(META_SUMMARY_NAME, summary);
            if (synonyms != null){
                parse.getData().getParseMeta().set(META_SYNONYMS_NAME, synonyms);
          return parse;
     public void setConf(Configuration conf) {
          this.conf = conf;
     public Configuration getConf() {
          return this.conf;
and the error i am getting is[Fatal Error] :1:1: Content is not allowed in prolog.
org.xml.sax.SAXParseException: Content is not allowed in prolog.
the string isnull
any ideas please....i am stuck on this problem from last 3 days.....
any help is highly appreciated...thanks                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           

Obviously, don't use an XML parser for something that isn't XML. Either switch over to using an HTML parser that produces a DOM, or clean up the HTML beforehand so that it's well-formed XHTML.
Look at JTidy and TagSoup.

Similar Messages

  • Encoding problem (c++ parser v2).

    Hi,
    I have problems with parsing XML that contains locale specific chars.
    My ORACLE_HOME and ORA_NLS env. vars are set correctly.
    Here is a sample:
    <?xml version="1.0" encoding="ISO-8859-1"?>
    <ROOT>
    <TEST>i</TEST>
    </ROOT>
    After xmlparse() method call I get LPX-00225 error. But when I add two SPACES or non-locale specific characters in front of </TEST> tag xmlparse() works fine.
    Any suggestion?
    I'm using Oracle 8.1.7, 2.0.4.0.0 version of XDK for C++ on AIX4.3 and Compaq Tru64
    An other sample :
    <?xml version="1.0" encoding="ISO-8859-1"?>
    <ROOT>
    <TEST att="i">bla bla bla</TEST>
    </ROOT>
    I get LPX-00244 error.
    If i add two spaces after non-locale specific character it works fine.
    I've read that 2.0.7.0.0 version solve this problem, so when this version will be available on AIX4.3 and Compaq Tru64 ?
    I've read that you are planning on shipping the NLS datafile along with the XDK in a future release. I'm intersted to known when, because I don't want to install the Oracle 8i client on all the machine wich run Xml application.
    Thanks
    Didier

    I verified that your test case is fixed in 2.0.7. You will get that version with Oracle 9.0.0 on AIX and Compaq Unix.
    Unfortunately, there is no schedule for shipping the NLS data files with the XDK.

  • Problem in parsing white spaces using if_ixml_parser

    Hello People,
    I have a problem in parsing XML file in one of my programs.
    The program uses the method get_value( ) of class if_ixml_node to fetch a value. In the XML file when the value is spaceA, then after parsing, the value fetched is A and not spaceA.
    Is there any way to fetch the white spaces also?
    Here is the sample tag. The value inside the tags is " A" and after parsing I get "A" the blank is ignored.
    <ns1:IndiceCible> A</ns1:IndiceCible>
    DATA : l_node_fils       TYPE REF TO if_ixml_node.
    indice_val = l_node_fils->get_value( ).
    In some XML parsing forums I saw that if we add the option as shown below, then the parsing works properly. But this did not work in my case.
    <ns1:IndiceCible xml:space="preserve"> A</ns1:IndiceCible>
    The input XML file is as follows.
    <?xml version="1.0" encoding="UTF-8"?><SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"><SOAP-ENV:Header><env:Flow xmlns:env="http://xml.inetpsa.com/Structure/Informatique/DeveloppementSI/ReferentielEchange"><env:MessageID>0414
    2636F4888F4B4A579A21E10000000A52931C</env:MessageID><env:From>R3P INTERFACES</env:From><env:To>SAP</env:To><env:TimeStamp>2010-03-04T14:26:36.672+01:00</env:TimeStamp></env:Flow></SOAP-ENV:Header><SOAP-ENV:Body><env:Response xmlns:env="http://xml.inetpsa.
    com/Structure/Informatique/DeveloppementSI/ReferentielEchange"><env:Values><ns1:ListeTranscodifications xmlns:ns1="http://xml.inetpsa.com/ProduitProcess/Produit/Transcodification"><ns1:Transcodification id="1 "><ns1:Statut>KO</ns1:Statut><ns1:Message>ERRO
    R : Indice incorrect</ns1:Message><ns1:ReferenceCible>969099979A</ns1:ReferenceCible><ns1:IndiceCible> A</ns1:IndiceCible><ns1:ColoreCible>SS_TEINTE</ns1:ColoreCible></ns1:Transcodification></ns1:ListeTranscodifications></env:Values><env:Status><env:Code>
    0</env:Code><env:Label></env:Label></env:Status></env:Response></SOAP-ENV:Body></SOAP-ENV:Envelope>
    Regards,
    Praveen

    use IF_XML_PARSER~CL_ADD_PRESERVE_SPACE_ELEMENT method:
    PARAMETERS preserve TYPE flag AS checkbox.
      DATA lo_ixml          TYPE REF TO if_ixml.
      DATA lo_streamfactory TYPE REF TO if_ixml_stream_factory.
      DATA lo_parser        TYPE REF TO if_ixml_parser.
      DATA lo_istream       TYPE REF TO if_ixml_istream.
      DATA lo_document      TYPE REF TO if_ixml_document.
      DATA lo_node          TYPE REF TO if_ixml_node.
      DATA l_node_value          TYPE string.
      lo_ixml = cl_ixml=>create( ).
      lo_streamfactory = lo_ixml->create_stream_factory( ).
      lo_istream = lo_streamfactory->create_istream_string(
            string = '<?xml version="1.0" encoding="iso-8859-1"?><DATA>  WAAA</DATA>' ).
      lo_document = lo_ixml->create_document( ).
      lo_parser = lo_ixml->create_parser( stream_factory = lo_streamfactory
                                          istream        = lo_istream
                                          document       = lo_document ).
    IF preserve = 'X'.
      lo_parser->ADD_PRESERVE_SPACE_ELEMENT( ). "<========= HERE
    ENDIF.
      lo_parser->parse( ).
      lo_node = lo_document->find_from_name( name = 'DATA').
      l_node_value = lo_node->get_value( ).
      ASSERT ( preserve = 'X' AND l_node_value = '  WAAA' )
            OR ( preserve = space AND l_node_value = 'WAAA' ).

  • Problem in parsing an XML using SAX parser

    Hai All,
    I have got a problem in parsing an XML using SAX parser.
    I have an XML (sample below) which need to be parsed
    <line-items>
    <item num="1">
         <part-number>PN1234</part-number>
         <quantity uom="ea">10</quantity>
         <lpn>LPN1060</lpn>
         <reference num="1">Line ref 1</reference>
         <reference num="2">Line ref 2</reference>
         <reference num="3">Line ref 3</reference>
    </item>
    <item num="2">
         <part-number>PN1527</part-number>
         <quantity uom="lbs">5</quantity>
         <lpn>LPN2152</lpn>
         <reference num="1">Line ref 1</reference>
         <reference num="2">Line ref 2</reference>
         <reference num="3">Line ref 3</reference>
    </item>
    <item num="n">
    </item>
    </line-items>
    There can be any number of items( 1 to n). I need to parse these
    item values using SAX parser and invoke a stored procedure for
    each item with its
    values(partnumber,qty,lpn,refnum1,refnum2,refnum3).
    Suppose if there are 100 items, i need to invoke the stored
    procedure sp1() 100 times for each item.
    I need to invoke the stored procedure in endDocument() method of
    SAX event handler and not in endelement() method.
    What is the best way to store those values and invoke the stored
    procedure in enddocument() method.
    Any help would br greatly appreciated.
    Thanks in advance
    Pooja.

    VO or ValueObject is a trendy new name for Beans.
    So just create an item class with variables for each of the sub elements.
    <item>
    <part-number>PN1234</part-number>
    <quantity uom="ea">10</quantity>
    <lpn>LPN1060</lpn>
    <reference num="1">Line ref 1</reference>
    <reference num="2">Line ref 2</reference>
    <reference num="3">Line ref 3</reference>
    </item>
    public class ItemVO
    String partNumber;
    int quantity;
    String quantityType;
    String lpn;
    List references = new ArrayList();
    * @return Returns the lpn.
    public String getLpn()
    return this.lpn;
    * @param lpn The lpn to set.
    public void setLpn(String lpn)
    this.lpn = lpn;
    * @return Returns the partNumber.
    public String getPartNumber()
    return this.partNumber;
    * @param partNumber The partNumber to set.
    public void setPartNumber(String partNumber)
    this.partNumber = partNumber;
    * @return Returns the quantity.
    public int getQuantity()
    return this.quantity;
    * @param quantity The quantity to set.
    public void setQuantity(int quantity)
    this.quantity = quantity;
    * @return Returns the quantityType.
    public String getQuantityType()
    return this.quantityType;
    * @param quantityType The quantityType to set.
    public void setQuantityType(String quantityType)
    this.quantityType = quantityType;
    * @return Returns the references.
    public List getReferences()
    return this.references;
    * @param references The references to set.
    public void setReferences(List references)
    this.references = references;

  • Can someone help me with a problem of parsing an XML file?

    Hello,
    I'm having some problems parsing an xml file. I get a SAXNotSupportedException when setting a property value.
    Here is the piece of code where I have the problem:
    SAXParserFactory spf = SAXParserFactory.newInstance();
    spf.setNamespaceAware(true);
    SAXParser saxParser = spf.newSAXParser();
    XMLReader xmlReader = saxParser.getXMLReader();
    DefaultHandler defHandler = new DefaultHandler();
    xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", defHandler);
    and the log is:
    Problem with the parser org.xml.sax.SAXNotSupportedException: PAR012 For propertyID "http://xml.org/sax/properties/lexical-handler", the value "org.xml.sax.helpers.DefaultHandler@4ff4f74a" cannot be cast to LexicalHandler.
    http://xml.org/sax/properties/lexical-handler org.xml.sax.helpers.DefaultHandler@4ff4f74a LexicalHandler
    I've been working on this problem but I can't find the error.
    Does anyone have an idea of what to do to solve it?
    Thanx in advance,
    M@G

    before deciding which XML technology to use, you should see if your application fit in the category below:
    use SAX:
    1. The XML file is rather large (30 or 40+ MB)
    2. I don't need the xml document in memory. I will parse the document and store the data in my own object.
    use DOM or JDOM
    1. The XML file is relatively small (less than 30 MB) or I can increase the runtime memory for larger xml file.
    2. I will need to walk up and down the xml document tree severals time.
    3. My application is in Java and it's not going to be rewritten in C++, etc (use JDOM)
    NOTE:
    JDOM is rather easier to use (for Java developer), but it's not an www.org.com standardlized xml parser.
    personally, i like JDOM for traversing the DOM.

  • Problem in Parsing an XML

    I have saved the XML data into a variable and want to retrive the values in XML and store them in separate variables.While trying to do this with Parsing I encounter an error in DOM creation.I have tried out following codes in Form:Ready event.But those are not working.
    When I place a messagebox on top of the line which creates DOM its displaying the message.But it is not displaying when I place it in line next to line which creates DOM.So i think it is problem with the line of Creating DOM.Please help me out.Its very urgent.I have delivery in 2 days.
    var temp = xfa.resolveNode("$data.nodes.item(0)");
    var test = temp.saveXML();
    // Now create the DOM:
    var x = XML.parse(test);
    var policy = x.resolveNodes("policies");
    var number = policy.item(0);
    var company = number.getElement("CNACompany").value;
    var temp = xfa.resolveNode("$data.nodes.item(0)");
    var test = temp.saveXML();
    // Now create the DOM:
    var x = XMLData.parse(test,false);
    var xPathExpr = "//data/policies/[PolicyNumber='WC 1234567890']";
    var number = XMLData.applyXPath(x, xPathExpr);
    With this code for creating DOM we are getting some junk value for 'x' as 57.13mm.
    Thanks in Advance...
    Pavan.

    Hi,
    I'm not familiar with the XML.parse() or XMLData.parse() functions. I can't identify the XML or XMLData objects. They are not declared and they are not XFA objects.
    Still, it looks like you need to get and set a form/data value.
    Here is an example getting the value of a form field:
    Your have the following form:
      someText
      otherText
    Use the SOM expression of the CNACompany field to access its value:
    var CNAValue = form1.policies.CNACompany.rawValue;
    If the value is not merged in the form, try searching the data DOM instead. For example:
    var CNAValue = xfa.record.policies.CNACompany.value;
    To set the value of a form field or data value, do as follows:
    form1.policies.CNACompany.rawValue = newValue;
    xfa.record.policies.CNACompany.value = "newValue";
    Note: to get/set a value from the data file use the value property. To get/set a value from a form field, use the rawValue property.
    Hope this helps,
    Hélène
    Adobe Systems Inc.
    FYI
    The Adobe XML Form Object Model Reference posted on the Developer Center contains several scripting examples in Appendix A.
    LiveCycle Developer Center
    http://www.adobe.com/devnet/livecycle/designing_forms.html
    Adobe XML Form Object Model Reference
    http://www.adobe.com/devnet/livecycle/articles/Adobe_XML_Form_Object_Model_Reference.pdf

  • Facing Problem in parsing a string to date

    Hi,
    I was trying to change a string into date with date format ("EEEE,MMM,d,h:mm") but I always get the year as 1970.
    here is my code
    String strDate="Saturday,Jan 19 7:31";
    String dateFormat3="EEEE,MMM,d,h:mm";
         try {
         DateFormat myDateFormat = new SimpleDateFormat(dateFormat3);
         result1=myDateFormat.parse(strDate);
    catch(ParseException pe) {
                System.out.println("ERROR: could not parse date in string \"" +
            }any solution for it.

    This is my actual code
    import java.text.DateFormat;
    import java.text.ParseException;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.Locale;
    public class TestingDate {
          * @param args
         public static void main(String[] args) {
              // TODO Auto-generated method stub
              String dateFormat="EEEE, MMM d h:mm a";
              Date test=new Date(2007,0,19, 19, 31);
              System.out.println(" original date is "+test);
              String stringResult=DateToString(test,dateFormat);
              System.out.println("Date to string is "+stringResult);
              Date dateResult=stringToDate(stringResult,dateFormat);
              System.out.println(" String to date is "+dateResult);
              String stringResult2=DateToString(dateResult,dateFormat);
              System.out.println(" Date to string  is "+stringResult2);
    public static String DateToString(Date test, String dateFormat) {
             String result = null;
             try {
                  DateFormat myDateFormat = new SimpleDateFormat(dateFormat);
                     result = myDateFormat.format(test);
                     //System.out.println(" reslut date is "+result);
              } catch (Exception e) {
                   System.out.println(" Exception is "+e);
              return result;
    public static Date stringToDate(String strDate,String dateFormat1){
         Date result1=null;
         try {
              DateFormat myDateFormat = new SimpleDateFormat(dateFormat1);
              result1=myDateFormat.parse(strDate);
         catch(Exception e){
              System.out.println(" exception is "+e);
         return result1;
    }I am facing problem in getting the actual date. Please suggest the solution.

  • Performance Problem in parsing large XML file (15MB)

    Hi,
    I'm trying to parse a large XML file(15 MB) and facing a clear performance problem. A Simple XML Validation using the following code snippet:
    DBMS_LOB.fileopen(targetFile, DBMS_LOB.file_readonly);
    DBMS_LOB.loadClobfromFile
    tempCLOB,
    targetFile,
    DBMS_LOB.getLength(targetFile),
    dest_offset,
    src_offset,
    nls_charset_id(CONSTANT_CHARSET),
    lang_context,
    conv_warning
    DBMS_LOB.fileclose(targetFile);
    p_xml_document := XMLType(tempCLOB, p_schema_url, 0, 0);
    p_xml_document.schemaValidate();
    is taking 30 mins on a HP-UX (4GB ram, 2 CPU) machine (Oracle version : 9.2.0.4).
    Please explain what could be going wrong.
    Thanks In Advance,
    Vineet

    Thanks Mark,
    I'll open a TAR and also upload the schema and instance XML.
    If i'm not changing the track too much :-) one more thing in continuation:
    If i skip the Schema Validation step and directly insert the instance document into a Schema linked XMLType table, what does OracleXDB do in such a case?
    i'm getting a severe performance hit here too... the same file as above takes almost 40 mins to Insert.
    code snippet:
    DBMS_LOB.fileopen(targetFile, DBMS_LOB.file_readonly);
    DBMS_LOB.loadClobfromFile
    tempCLOB,
    targetFile,
    DBMS_LOB.getLength(targetFile),
    dest_offset,
    src_offset,
    nls_charset_id(CONSTANT_CHARSET),
    lang_context,
    conv_warning
    DBMS_LOB.fileclose(targetFile);
    p_xml_document := XMLType(tempCLOB, p_schema_url, 0, 0);
    -- p_xml_document.schemaValidate();
    insert into INCOMING_XML values(p_xml_document);
    Here table INCOMING_XML is :
    TABLE of SYS.XMLTYPE(XMLSchema "http://INCOMING_XML.xsd" Element "MatchingResponse") STORAGE Object-
    relational TYPE "XDBTYPE_MATCHING_RESPONSE"
    This table and type XDBTYPE_MATCHING_RESPONSE were created using the mapping provided in the registered XML Schema.
    Thanks,
    Vineet

  • Problem in parsing in ABAP mapping

    Hi all , In our ABAP mapping  we are trying to create a DOM tree from a XML file but we are getting the error in the following statement
    l_rc = if_iparser->parse( ).
    when we track the error using
    i = l_error->get_line( ). (result 0)
    i = l_error->get_column( ). (result 2017)
    str = l_error->get_reason( ). (Expected '<' or '/>' tag)
    but strange thing is we can open the file using Stylus studio/ Altova / IE ..so may the problem is not in the XML file ....Has anybody faced the problem before ??
    Kind regarrds
    Goutam

    Hi,
    Have a look at this link.
    https://www.sdn.sap.com/irj/servlet/prt/portal/prtroot/docs/library/uuid/46759682-0401-0010-1791-bd1972bc0b8a
    I guess,your code may have problem.
    Try doing this from document.
    iparser = ixmlfactory->create_parser( stream_factory = streamfactory
    istream = istream
    document = idocument ).
    iparser->parse( ).
    Regards,
    Akshay Jamgaonkar.
    Hope this will help.

  • Problem in Parsing File

    Hi,
    I have one parse.PROPERTIES file.. In that all data is in key-value pair. There are some keys has multiple value, and some keys has multiline value. I want to parse that file and store result in Map depending on key....
    Content of File is as FOLLOWS:
       value=server
       multilinevalue = {
                "width" : "100px",
                "values" : [ { "25GB" : "25 GB" },
                                    { "50GB" : "50 GB" },
                                    { "75GB" : "75 GB" },
                                    { "100GB" : "100 GB" },
                                    { "150GB" : "150 GB" },
                                    { "200GB" : "200 GB" },
                                    { "500GB" : "500 GB" },
                                    { "1 TB" : "1 TB" } ] }
        multiplevalue= { "status.x" : 0, "status.y" : 30, "status.width" : 270,     "status.height" : 30 }
                                  Can anyone give me code how I can parse it...
    Thank u,

    Can anyone give me code how I can parse it...This is not a difficult problem and I'm sure someone can help you if you show some effort.

  • Problem in parsing XML using DOM Parser.

    Hi,
    I am parsing an XML using DOM Parser.
    When i try to get attributes of a node, i dont get in the order it is written. For Eg. This the node:
    <Level0 label="News" link="/website/ing_news.nsf/ViewNewsForm?OpenForm&All" level="202" uid="COGN-4MNMT3" parentid="aaaa">
    When i try to print the attribute values i should get in the order:
    News, /website/ing_news.nsf/ViewNewsForm?OpenForm&All, 202, COGN-4MNMT3, aaaa
    BUT I AM GETTING IN THE ORDER:
    News, 202, /website/ing_news.nsf/ViewNewsForm?OpenForm&All, aaaa, COGN-4MNMT3
    Is there any way to sort this problem out?
    Thanks and Regards,
    Ashok

    Hi Guys,
    Thanks a lot for your replies.
    But i want to keep all the values as attributes only.
    the XML file is as shown below:
    <Menu>
    <Level0 label="News" link="/website/ing_news.nsf/ViewNewsForm?OpenForm&All" level="202" uid="COGN-4MNMT3" parentid="aaaa" children="3">
         <Level1 label="ING News" link="" level="1" uid="COGN-4MNN89" parentid="COGN-4MNMT3" children="3" >
              <Level2 label="All ING News" link="/website/ing_news.nsf/ViewNewsForm?OpenForm&All" level="2" uid="INGD-4MVTK2" parentid="COGN-4MNN89" children="0">
              </Level2>
    </Level1>
    </Level0>
    The code i was using to get attributes is:
    String strElementName = new String(node.getNodeName());
         // System.out.println("strElementName:"+node.getNodeName());
    NamedNodeMap attrs = node.getAttributes();
    if (attrs != null) {
    int iLength = attrs.getLength();
    for (int i = 0; i < iLength; i++) {
    String strAttributes = (String) attrs.item(i).getNodeName();
    String strValues = (String) attrs.item(i).getNodeValue();
    Also is it not possible to Enforce the order using some Schema/DTD in this case?
    TIA
    Ashok

  • Problem in parsing huge XML document

    Hi,
    i am getting the problem when trying to parse xml doc. which contains data more then 1 lacs. using jaxp (i.e. NodeList). i get the execption outofmemory.

    Which implementation of JAXP are you using? If it is a large file containing many of the same "documents", then consider using SAX as it is an event driven parser meaning it does not have to read the whole of the file before it can start processing it. Instead, it fires events when it sees the beginning and end of a "document" and also on completion of the elements. You can then create and destroy objects during the document processing stage which helps keep the memory consumption under control.

  • Problem in parsing XML using DOM

    I am getting one XML file as string like <?xml version="1.0" encoding="ISO-8859-1" ?> <DMSI-ACTIVITY-COMMENTS> </DMSI-ACTIVITY-COMMENTS>
    Every time I want to add new node <ACTIVITY>
              <NAME></NAME>
              <ID></ID>
              <COMMENT></COMMENT>
         </ACTIVITY> whenever user make any change in my application.
    I wrote code....
    InputStream inputStream = new ByteArrayInputStream(file.getBytes());
    Document doc = docBuilder.parse(inputStream);
    Element activityNode = doc.createElement("ACTIVITY");
    Element nameNode = doc.createElement("NAME");
    activityNode.appendChild(nameNode);
    Text nametextNode = doc.createTextNode(name);
    nameNode.appendChild(nametextNode);
    Element root = doc.getDocumentElement();
    root.appendChild(activityNode);
    String resultUDA = doc.toString();
    Problem: Here I am getting value of resultUDA is [#document: null]. I need updated XML as String....
    any one can give me suggestion or any other option to solve this issue...
    thanks in advance...

    I got it ....pls check on it
    http://www.theserverside.com/discussions/thread.tss?thread_id=26060

  • Problem in parsing a particular XML file.

    Hello, I have an XML file like this:
    <eGov_IT:Intestazione xmlns:eGov_IT="http://www.cnipa.it/schemas/2003/eGovIT/Busta1_0/" xmlns:SOAP_ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.cnipa.it/schemas/2003/eGovIT/Busta1_0/
    E:\Progetti\EchoPorte\sviluppo\WEBCON~1\WEB-INF\risorse\Messaggio.xsd">
    <eGov_IT:IntestazioneMessaggio xmlns:eGov_IT="http://www.cnipa.it/schemas/2003/eGovIT/Busta1_0/">
    <eGov_IT:Collaborazione>PortaDelegata_PortaDiDominio_0000001_2007-10-05_12:26</eGov_IT:Collaborazione>
    <eGov_IT:Identificatore>PortaDelegata_PortaDiDominio_0000002_2007-10-05_12:26</eGov_IT:Identificatore>
    </eGov_IT:IntestazioneMessaggio>
    As you can see there are 2 elements in which the values are really similar. In fact in the Schema we use they have to match the same regular expression; here's the extract from my schema.
    <xsd:element name="Collaborazione" type="IdentificatoreType"/>
    <xsd:element name="Identificatore" type="IdentificatoreType"/>
    <xsd:simpleType name="IdentificatoreType">
    <xsd:restriction base="xsd:string">
    <xsd:pattern value="[\w]+_[\w]+_\d{7}_\d{4}\-\d{2}\-\d{2}_\d{2}:\d{2}"/>
    </xsd:restriction>
    </xsd:simpleType>
    I can CORRECTLY validate this expression using this code:
    public void validate (String doc, String schema) throws SAXParseException, SAXException
    try
    SchemaFactory schemaFactory = SchemaFactory.newInstance( XMLConstants.W3C_XML_SCHEMA_NS_URI );
    Schema schemaXSD = schemaFactory.newSchema( new File ( schema ) );
    Validator validator = schemaXSD.newValidator();
    DocumentBuilderFactory.newInstance().newDocumentBuilder();
    ByteArrayInputStream baisDoc = new ByteArrayInputStream(doc.getBytes());
    Document document = parser.parse(baisDoc);
    validator.validate( new DOMSource( document ) );
    And, in case the validation fails, I correctly gain a SAXParseException.
    The problem is that I can't understand if, in this case, the error is in the "Collaborazione" element or in the "Identificatore" element, because I get the following detailed message from the Exception:
    "cvc-pattern-valid: Value '' is not facet-valid with respect to pattern '[\w]+_[\w]+_\d{7}_\d{4}\-\d{2}\-\d{2}_\d{2}:\d{2}' for type 'IdentificatoreType'."
    How can I get more detailed informations about this error?
    Thanks everybody,
    Cris

    Check here:
    http://forum.java.sun.com/thread.jspa?threadID=5223284
    This is the correct post with my problem.
    Thanks!
    Cristiano

  • Problem in parsing date having Chinese character when dateformat is 'MMM'

    I m calling jsp page using following code:
    var ratewin = window.showModalDialog("Details.jsp?startDate="+startDate,window, dlgSettings );
    In my javascript when checked by adding alerts I m getting correct values before passing to jsp,
    alert("startDate:"+startDate);
    In jsp page my code is like below:
    String startDate = request.getParameter("startDate");     
    but here I m getting garbage values in month when the dateformat is 'MMM', because of which date parsing is failing.
    This happens only Chinese character.
    following 2 encoding are already in my jsp page,can anyone help to find solution?
         <%@ page pageEncoding="UTF-8" contentType="text/html;charset=UTF-8"%>
         <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8"/>
    I have even tried to read it as UTF-8 but still that's failing.

    This is my actual code
    import java.text.DateFormat;
    import java.text.ParseException;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.Locale;
    public class TestingDate {
          * @param args
         public static void main(String[] args) {
              // TODO Auto-generated method stub
              String dateFormat="EEEE, MMM d h:mm a";
              Date test=new Date(2007,0,19, 19, 31);
              System.out.println(" original date is "+test);
              String stringResult=DateToString(test,dateFormat);
              System.out.println("Date to string is "+stringResult);
              Date dateResult=stringToDate(stringResult,dateFormat);
              System.out.println(" String to date is "+dateResult);
              String stringResult2=DateToString(dateResult,dateFormat);
              System.out.println(" Date to string  is "+stringResult2);
    public static String DateToString(Date test, String dateFormat) {
             String result = null;
             try {
                  DateFormat myDateFormat = new SimpleDateFormat(dateFormat);
                     result = myDateFormat.format(test);
                     //System.out.println(" reslut date is "+result);
              } catch (Exception e) {
                   System.out.println(" Exception is "+e);
              return result;
    public static Date stringToDate(String strDate,String dateFormat1){
         Date result1=null;
         try {
              DateFormat myDateFormat = new SimpleDateFormat(dateFormat1);
              result1=myDateFormat.parse(strDate);
         catch(Exception e){
              System.out.println(" exception is "+e);
         return result1;
    }I am facing problem in getting the actual date. Please suggest the solution.

Maybe you are looking for