cppAdaptive1

 view release on metacpan or  search on metacpan

src/dlib/xml_parser/xml_parser_kernel_1.h  view on Meta::CPAN

            );
   

        private:
    
            // -----------------------------------

            // attribute_list interface implementation
            class attrib_list : public attribute_list
            {
            public:
                // the list of attribute name/value pairs
                map list;

                bool is_in_list (
                    const std::string& key
                ) const
                {
                    return list.is_in_domain(key);
                }

                const std::string& operator[] (
                    const std::string& key
                ) const
                {
                    return list[key];
                }

                bool at_start (
                ) const { return list.at_start(); }

                void reset (
                ) const { return list.reset(); }

                bool current_element_valid (
                ) const { return list.current_element_valid(); }

                const type& element (
                ) const { return list.element(); }

                type& element (
                ) { return list.element(); }

                bool move_next (
                ) const { return list.move_next(); }

                unsigned long size (
                ) const { return list.size(); }
            };

            
            // -----------------------------------

            enum token_type
            {
                element_start, // the first tag of an element
                element_end,   // the last tag of an element
                empty_element, // the singular tag of an empty element
                pi,            // processing instruction
                chars,         // the non-markup data between tags
                chars_cdata,   // the data from a CDATA section
                eof,           // this token is returned when we reach the end of input
                error,         // this token indicates that the tokenizer couldn't
                               // determine which category the next token fits into
                dtd,           // this token is for an entire dtd 
                comment        // this is a token for comments
            };
            /*
                notes about the tokens:
                    the tokenizer guarantees that the following tokens to not 
                    contain the '<' character except as the first character of the token
                    element_start, element_end, empty_element, and pi.  they also only
                    contain the '>' characer as their last character.

                    it is also guaranteed that pi is at least of the form <??>.  that
                    is to say that it always always begins with <? and ends with ?>.

                    it is also guaranteed that all markup tokens will begin with the '<'
                    character and end with the '>'. there won't be any leading or
                    trailing whitespaces.   this whitespace is considered a chars token.
            */


            // private member functions
            inline void get_next_token(
                std::istream& in,
                std::string& token_text,
                int& token_kind,
                unsigned long& line_number
            );
            /*!
                ensures
                    gets the next token from in and puts it in token_text and
                    token_kind == the kind of the token found and
                    line_number is incremented every time a '\n' is encountered and
                    entity references are translated into the characters they represent
                    only for chars tokens
            !*/

            inline int parse_element (
                const std::string& token,
                std::string& name,
                attrib_list& atts
            );
            /*!
                requires
                    token is a token of kind start_element or empty_element
                ensures
                    gets the element name and puts it into the string name and
                    parses out the attributes and puts them into the attribute_list atts

                    return 0 upon success or
                    returns -1 if it failed to parse token
            !*/

            inline int parse_pi (
                const std::string& token,
                std::string& target,
                std::string& data
            );
            /*!

src/dlib/xml_parser/xml_parser_kernel_1.h  view on Meta::CPAN

// ----------------------------------------------------------------------------------------
 
    void xml_parser::
    clear(
    )
    {
        // unregister all event handlers
        eh_list.clear();
        dh_list.clear();
    }

// ----------------------------------------------------------------------------------------
        
    void xml_parser::
    parse (
        std::istream& in
    )
    {
        DLIB_CASSERT ( in.fail() == false ,
            "\tvoid xml_parser::parse"
            << "\n\tthe input stream must not be in the fail state"
            << "\n\tthis: " << this
            );


        // save which exceptions in will throw and make it so it won't throw any
        // for the life of this function
        std::ios::iostate old_exceptions = in.exceptions();
        // set it to not throw anything
        in.exceptions(std::ios::goodbit);


        try 
        {
            unsigned long line_number = 1;

            // skip any whitespace before the start of the document
            while (in.peek() == ' ' || in.peek() == '\t' || in.peek() == '\n' || in.peek() == '\r' ) 
            {
                if (in.peek() == '\n')
                    ++line_number;
                in.get();
            }



            stack tags; // this stack contains the last start tag seen
            bool seen_fatal_error = false;
            bool seen_root_tag = false;  // this is true after we have seen the root tag
            


            // notify all the document_handlers that we are about to being parsing
            for (unsigned long i = 0; i < dh_list.size(); ++i)
            {
                dh_list[i]->start_document();
            }


            std::string chars_buf; // used to collect chars data between consecutive
                                // chars and chars_cdata tokens so that 
                                // document_handlers receive all chars data between
                                // tags in one call

            // variables to be used with the parsing functions
            attrib_list atts;
            std::string name;
            std::string target;
            std::string data;

            

            // variables to use with the get_next_token() function
            std::string token_text;
            int token_kind;

            get_next_token(in,token_text,token_kind,line_number);


            while (token_kind != eof)
            {          
                bool is_empty = false;  // this becomes true when this token is an empty_element

                switch (token_kind)
                {


                case empty_element: is_empty = true;
                case element_start:
                    {
                        seen_root_tag = true;

                        int status = parse_element(token_text,name,atts);
                        // if there was no error parsing the element
                        if (status == 0)
                        {
                            // notify all the document_handlers
                            for (unsigned long i = 0; i < dh_list.size(); ++i)
                            {
                                dh_list[i]->start_element(line_number,name,atts);
                                if (is_empty)
                                    dh_list[i]->end_element(line_number,name);
                            }
                        }
                        else
                        {
                            seen_fatal_error = true;
                        }

                        // if this is an element_start token then push the name of
                        // the element on to the stack
                        if (token_kind == element_start)
                        {
                            tags.push(name);
                        }

                    }break;

                // ----------------------------------------

                case element_end:

src/dlib/xml_parser/xml_parser_kernel_1.h  view on Meta::CPAN

                            }
                        }
                        else
                        {
                            seen_fatal_error = true;
                        }


                    }break;

                // ----------------------------------------

                case pi:
                    {

                        int status = parse_pi (token_text,target,data);
                        // if there was no error parsing the element
                        if (status == 0)
                        {
                            // notify all the document_handlers
                            for (unsigned long i = 0; i < dh_list.size(); ++i)
                            {
                                dh_list[i]->processing_instruction(line_number,target,data);
                            }
                        }
                        else
                        {
                            // notify all the error_handlers
                            for (unsigned long i = 0; i < eh_list.size(); ++i)
                            {
                                eh_list[i]->error(line_number);
                            }
                        }
                        while (in.peek() == ' ' || in.peek() == '\t' || in.peek() == '\n' || in.peek() == '\r' ) 
                        {
                            if (in.peek() == '\n')
                                ++line_number;
                            in.get();
                        }


                    }break;

                // ----------------------------------------

                case chars:
                    {
                        if (tags.size() != 0)
                        {
                            chars_buf += token_text;
                        }
                        else if (token_text.find_first_not_of(" \t\r\n") != std::string::npos)
                        {
                            // you can't have non whitespace chars data outside the root element
                            seen_fatal_error = true;                        
                        }
                    }break;

                // ----------------------------------------

                case chars_cdata:
                    {
                        if (tags.size() != 0)
                        {
                            chars_buf += token_text;
                        }
                        else
                        {
                            // you can't have chars_data outside the root element
                            seen_fatal_error = true;
                        }
                    }break;

                // ----------------------------------------

                case eof:
                    break;

                // ----------------------------------------

                case error:
                    {
                        seen_fatal_error = true;
                    }break;

                // ----------------------------------------

                case dtd:       // fall though
                case comment:   // do nothing
                    break;

                // ----------------------------------------


                }

                // if there was a fatal error then quit loop
                if (seen_fatal_error)
                    break;

                // if we have seen the last tag then quit the loop
                if (tags.size() == 0 && seen_root_tag)
                    break;
                

                get_next_token(in,token_text,token_kind,line_number);

                // if the next token is not a chars or chars_cdata token then flush
                // the chars_buf to the document_handlers
                if ( (token_kind != chars) && 
                    (token_kind != chars_cdata) &&
                    (token_kind != dtd) && 
                    (token_kind != comment) &&
                    (chars_buf.size() != 0)
                    )
                {
                    // notify all the document_handlers
                    for (unsigned long i = 0; i < dh_list.size(); ++i)
                    {
                        dh_list[i]->characters(chars_buf);
                    }
                    chars_buf.erase();
                }


            } //while (token_kind != eof)




            // you can't have any unmatched tags or any fatal erros
            if (tags.size() != 0 || seen_fatal_error)
            {
                // notify all the error_handlers
                for (unsigned long i = 0; i < eh_list.size(); ++i)
                {
                    eh_list[i]->fatal_error(line_number);
                }
                
            }


            // notify all the document_handlers that we have ended parsing
            for (unsigned long i = 0; i < dh_list.size(); ++i)
            {
                dh_list[i]->end_document();
            }
        
        }
        catch (...)
        {
            // notify all the document_handlers that we have ended parsing
            for (unsigned long i = 0; i < dh_list.size(); ++i)
            {
                dh_list[i]->end_document();
            }

            // restore the old exception settings to in
            in.exceptions(old_exceptions);

            // don't forget to rethrow the exception
            throw;
        }

        // restore the old exception settings to in
        in.exceptions(old_exceptions);

    }

// ----------------------------------------------------------------------------------------
        

src/dlib/xml_parser/xml_parser_kernel_1.h  view on Meta::CPAN

        dh_list.add(dh_list.size(),temp);
    }

// ----------------------------------------------------------------------------------------
        
    void xml_parser::
    add_error_handler (
        error_handler& item
    )
    {
        error_handler* temp = &item;
        eh_list.add(eh_list.size(),temp);
    }

// ----------------------------------------------------------------------------------------
        
    void xml_parser::
    swap (
        xml_parser& item
    )
    {
        dh_list.swap(item.dh_list);
        eh_list.swap(item.eh_list);
    }
   
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
    // private member function definitions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
        
    void xml_parser::
    get_next_token(
        std::istream& in,
        std::string& token_text,
        int& token_kind,
        unsigned long& line_number
    )
    {

        token_text.erase();

        std::istream::int_type ch1 = in.get();
        std::istream::int_type ch2;


        switch (ch1)
        {

        // -----------------------------------------

            // this is the start of some kind of a tag
        case '<':
            {
                ch2 = in.get();
                switch (ch2)
                {
                
                // ---------------------------------

                    // this is a dtd, comment, or chars_cdata token 
                case '!':
                    {
                        // if this is a CDATA section *******************************
                        if ( in.peek() == '[')
                        {
                            token_kind = chars_cdata;

                            // throw away the '['
                            in.get();

                            // make sure the next chars are CDATA[
                            std::istream::int_type ch = in.get();
                            if (ch != 'C')                                
                                token_kind = error;
                            ch = in.get();
                            if (ch != 'D')
                                token_kind = error;
                            ch = in.get();
                            if (ch != 'A')
                                token_kind = error;
                            ch = in.get();
                            if (ch != 'T')
                                token_kind = error;
                            ch = in.get();
                            if (ch != 'A')
                                token_kind = error;
                            ch = in.get();
                            if (ch != '[')
                                token_kind = error;
                            // if this is an error token then end
                            if (token_kind == error)
                                break;


                            // get the rest of the chars and put them into token_text
                            int brackets_seen = 0; // this is the number of ']' chars
                                                   // we have seen in a row
                            bool seen_closing = false; // true if we have seen ]]>
                            do
                            {
                                ch = in.get();

                                if (ch == '\n')
                                    ++line_number;

                                token_text += ch;

                                // if this is the closing 
                                if (brackets_seen == 2 && ch == '>')
                                    seen_closing = true;
                                // if we are seeing a bracket
                                else if (ch == ']')
                                    ++brackets_seen;
                                // if we didn't see a bracket
                                else
                                    brackets_seen = 0;


                            } while ( (!seen_closing) && (ch != EOF) );

                            // check if this is an error token
                            if (ch == EOF)
                            {
                                token_kind = error;
                            }
                            else



( run in 0.839 second using v1.01-cache-2.11-cpan-5a3173703d6 )