| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606 |
- //
- // SmallXmlParser.cs
- //
- // Author:
- // Atsushi Enomoto <atsushi@ximian.com>
- //
- // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
- //
- // Permission is hereby granted, free of charge, to any person obtaining
- // a copy of this software and associated documentation files (the
- // "Software"), to deal in the Software without restriction, including
- // without limitation the rights to use, copy, modify, merge, publish,
- // distribute, sublicense, and/or sell copies of the Software, and to
- // permit persons to whom the Software is furnished to do so, subject to
- // the following conditions:
- //
- // The above copyright notice and this permission notice shall be
- // included in all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- //
- //
- // small xml parser that is mostly compatible with
- //
- using System;
- using System.Collections;
- using System.Globalization;
- using System.IO;
- using System.Text;
- namespace Mono.Xml
- {
- public class SmallXmlParser
- {
- public interface IContentHandler
- {
- void OnStartParsing (SmallXmlParser parser);
- void OnEndParsing (SmallXmlParser parser);
- void OnStartElement (string name, IAttrList attrs);
- void OnEndElement (string name);
- void OnProcessingInstruction (string name, string text);
- void OnChars (string text);
- void OnIgnorableWhitespace (string text);
- }
- public interface IAttrList
- {
- int Length { get; }
- bool IsEmpty { get; }
- string GetName (int i);
- string GetValue (int i);
- string GetValue (string name);
- string [] Names { get; }
- string [] Values { get; }
- }
- class AttrListImpl : IAttrList
- {
- public int Length {
- get { return attrNames.Count; }
- }
- public bool IsEmpty {
- get { return attrNames.Count == 0; }
- }
- public string GetName (int i)
- {
- return (string) attrNames [i];
- }
- public string GetValue (int i)
- {
- return (string) attrValues [i];
- }
- public string GetValue (string name)
- {
- for (int i = 0; i < attrNames.Count; i++)
- if ((string) attrNames [i] == name)
- return (string) attrValues [i];
- return null;
- }
- public string [] Names {
- get { return (string []) attrNames.ToArray (typeof (string)); }
- }
- public string [] Values {
- get { return (string []) attrValues.ToArray (typeof (string)); }
- }
- ArrayList attrNames = new ArrayList ();
- ArrayList attrValues = new ArrayList ();
- internal void Clear ()
- {
- attrNames.Clear ();
- attrValues.Clear ();
- }
- internal void Add (string name, string value)
- {
- attrNames.Add (name);
- attrValues.Add (value);
- }
- }
- IContentHandler handler;
- TextReader reader;
- Stack elementNames = new Stack ();
- Stack xmlSpaces = new Stack ();
- string xmlSpace;
- StringBuilder buffer = new StringBuilder (200);
- char [] nameBuffer = new char [30];
- bool isWhitespace;
- AttrListImpl attributes = new AttrListImpl ();
- int line = 1, column;
- bool resetColumn;
- public SmallXmlParser ()
- {
- }
- private Exception Error (string msg)
- {
- return new SmallXmlParserException (msg, line, column);
- }
- private Exception UnexpectedEndError ()
- {
- string [] arr = new string [elementNames.Count];
- // COMPACT FRAMEWORK NOTE: CopyTo is not visible through the Stack class
- (elementNames as ICollection).CopyTo (arr, 0);
- return Error (String.Format (
- "Unexpected end of stream. Element stack content is {0}", String.Join (",", arr)));
- }
- private bool IsNameChar (char c, bool start)
- {
- switch (c) {
- case ':':
- case '_':
- return true;
- case '-':
- case '.':
- return !start;
- }
- if (c > 0x100) { // optional condition for optimization
- switch (c) {
- case '\u0559':
- case '\u06E5':
- case '\u06E6':
- return true;
- }
- if ('\u02BB' <= c && c <= '\u02C1')
- return true;
- }
- switch (Char.GetUnicodeCategory (c)) {
- case UnicodeCategory.LowercaseLetter:
- case UnicodeCategory.UppercaseLetter:
- case UnicodeCategory.OtherLetter:
- case UnicodeCategory.TitlecaseLetter:
- case UnicodeCategory.LetterNumber:
- return true;
- case UnicodeCategory.SpacingCombiningMark:
- case UnicodeCategory.EnclosingMark:
- case UnicodeCategory.NonSpacingMark:
- case UnicodeCategory.ModifierLetter:
- case UnicodeCategory.DecimalDigitNumber:
- return !start;
- default:
- return false;
- }
- }
- private bool IsWhitespace (int c)
- {
- switch (c) {
- case ' ':
- case '\r':
- case '\t':
- case '\n':
- return true;
- default:
- return false;
- }
- }
- public void SkipWhitespaces ()
- {
- SkipWhitespaces (false);
- }
- private void HandleWhitespaces ()
- {
- while (IsWhitespace (Peek ()))
- buffer.Append ((char) Read ());
- if (Peek () != '<' && Peek () >= 0)
- isWhitespace = false;
- }
- public void SkipWhitespaces (bool expected)
- {
- while (true) {
- switch (Peek ()) {
- case ' ':
- case '\r':
- case '\t':
- case '\n':
- Read ();
- if (expected)
- expected = false;
- continue;
- }
- if (expected)
- throw Error ("Whitespace is expected.");
- return;
- }
- }
- private int Peek ()
- {
- return reader.Peek ();
- }
- private int Read ()
- {
- int i = reader.Read ();
- if (i == '\n')
- resetColumn = true;
- if (resetColumn) {
- line++;
- resetColumn = false;
- column = 1;
- }
- else
- column++;
- return i;
- }
- public void Expect (int c)
- {
- int p = Read ();
- if (p < 0)
- throw UnexpectedEndError ();
- else if (p != c)
- throw Error (String.Format ("Expected '{0}' but got {1}", (char) c, (char) p));
- }
- private string ReadUntil (char until, bool handleReferences)
- {
- while (true) {
- if (Peek () < 0)
- throw UnexpectedEndError ();
- char c = (char) Read ();
- if (c == until)
- break;
- else if (handleReferences && c == '&')
- ReadReference ();
- else
- buffer.Append (c);
- }
- string ret = buffer.ToString ();
- buffer.Length = 0;
- return ret;
- }
- public string ReadName ()
- {
- int idx = 0;
- if (Peek () < 0 || !IsNameChar ((char) Peek (), true))
- throw Error ("XML name start character is expected.");
- for (int i = Peek (); i >= 0; i = Peek ()) {
- char c = (char) i;
- if (!IsNameChar (c, false))
- break;
- if (idx == nameBuffer.Length) {
- char [] tmp = new char [idx * 2];
- // COMPACT FRAMEWORK NOTE: Array.Copy(sourceArray, destinationArray, count) is not available.
- Array.Copy (nameBuffer, 0, tmp, 0, idx);
- nameBuffer = tmp;
- }
- nameBuffer [idx++] = c;
- Read ();
- }
- if (idx == 0)
- throw Error ("Valid XML name is expected.");
- return new string (nameBuffer, 0, idx);
- }
- public void Parse (TextReader input, IContentHandler handler)
- {
- this.reader = input;
- this.handler = handler;
- handler.OnStartParsing (this);
- while (Peek () >= 0)
- ReadContent ();
- HandleBufferedContent ();
- if (elementNames.Count > 0)
- throw Error (String.Format ("Insufficient close tag: {0}", elementNames.Peek ()));
- handler.OnEndParsing (this);
- Cleanup ();
- }
- private void Cleanup ()
- {
- line = 1;
- column = 0;
- handler = null;
- reader = null;
- #if CF_1_0
- elementNames = new Stack ();
- xmlSpaces = new Stack ();
- #else
- elementNames.Clear ();
- xmlSpaces.Clear ();
- #endif
- attributes.Clear ();
- buffer.Length = 0;
- xmlSpace = null;
- isWhitespace = false;
- }
- public void ReadContent ()
- {
- string name;
- if (IsWhitespace (Peek ())) {
- if (buffer.Length == 0)
- isWhitespace = true;
- HandleWhitespaces ();
- }
- if (Peek () == '<') {
- Read ();
- switch (Peek ()) {
- case '!': // declarations
- Read ();
- if (Peek () == '[') {
- Read ();
- if (ReadName () != "CDATA")
- throw Error ("Invalid declaration markup");
- Expect ('[');
- ReadCDATASection ();
- return;
- }
- else if (Peek () == '-') {
- ReadComment ();
- return;
- }
- else if (ReadName () != "DOCTYPE")
- throw Error ("Invalid declaration markup.");
- else
- throw Error ("This parser does not support document type.");
- case '?': // PIs
- HandleBufferedContent ();
- Read ();
- name = ReadName ();
- SkipWhitespaces ();
- string text = String.Empty;
- if (Peek () != '?') {
- while (true) {
- text += ReadUntil ('?', false);
- if (Peek () == '>')
- break;
- text += "?";
- }
- }
- handler.OnProcessingInstruction (
- name, text);
- Expect ('>');
- return;
- case '/': // end tags
- HandleBufferedContent ();
- if (elementNames.Count == 0)
- throw UnexpectedEndError ();
- Read ();
- name = ReadName ();
- SkipWhitespaces ();
- string expected = (string) elementNames.Pop ();
- xmlSpaces.Pop ();
- if (xmlSpaces.Count > 0)
- xmlSpace = (string) xmlSpaces.Peek ();
- else
- xmlSpace = null;
- if (name != expected)
- throw Error (String.Format ("End tag mismatch: expected {0} but found {1}", expected, name));
- handler.OnEndElement (name);
- Expect ('>');
- return;
- default: // start tags (including empty tags)
- HandleBufferedContent ();
- name = ReadName ();
- while (Peek () != '>' && Peek () != '/')
- ReadAttribute (attributes);
- handler.OnStartElement (name, attributes);
- attributes.Clear ();
- SkipWhitespaces ();
- if (Peek () == '/') {
- Read ();
- handler.OnEndElement (name);
- }
- else {
- elementNames.Push (name);
- xmlSpaces.Push (xmlSpace);
- }
- Expect ('>');
- return;
- }
- }
- else
- ReadCharacters ();
- }
- private void HandleBufferedContent ()
- {
- if (buffer.Length == 0)
- return;
- if (isWhitespace)
- handler.OnIgnorableWhitespace (buffer.ToString ());
- else
- handler.OnChars (buffer.ToString ());
- buffer.Length = 0;
- isWhitespace = false;
- }
- private void ReadCharacters ()
- {
- isWhitespace = false;
- while (true) {
- int i = Peek ();
- switch (i) {
- case -1:
- return;
- case '<':
- return;
- case '&':
- Read ();
- ReadReference ();
- continue;
- default:
- buffer.Append ((char) Read ());
- continue;
- }
- }
- }
- private void ReadReference ()
- {
- if (Peek () == '#') {
- // character reference
- Read ();
- ReadCharacterReference ();
- } else {
- string name = ReadName ();
- Expect (';');
- switch (name) {
- case "amp":
- buffer.Append ('&');
- break;
- case "quot":
- buffer.Append ('"');
- break;
- case "apos":
- buffer.Append ('\'');
- break;
- case "lt":
- buffer.Append ('<');
- break;
- case "gt":
- buffer.Append ('>');
- break;
- default:
- throw Error ("General non-predefined entity reference is not supported in this parser.");
- }
- }
- }
- private int ReadCharacterReference ()
- {
- int n = 0;
- if (Peek () == 'x') { // hex
- Read ();
- for (int i = Peek (); i >= 0; i = Peek ()) {
- if ('0' <= i && i <= '9')
- n = n << 4 + i - '0';
- else if ('A' <= i && i <='F')
- n = n << 4 + i - 'A' + 10;
- else if ('a' <= i && i <='f')
- n = n << 4 + i - 'a' + 10;
- else
- break;
- Read ();
- }
- } else {
- for (int i = Peek (); i >= 0; i = Peek ()) {
- if ('0' <= i && i <= '9')
- n = n << 4 + i - '0';
- else
- break;
- Read ();
- }
- }
- return n;
- }
- private void ReadAttribute (AttrListImpl a)
- {
- SkipWhitespaces (true);
- if (Peek () == '/' || Peek () == '>')
- // came here just to spend trailing whitespaces
- return;
- string name = ReadName ();
- string value;
- SkipWhitespaces ();
- Expect ('=');
- SkipWhitespaces ();
- switch (Read ()) {
- case '\'':
- value = ReadUntil ('\'', true);
- break;
- case '"':
- value = ReadUntil ('"', true);
- break;
- default:
- throw Error ("Invalid attribute value markup.");
- }
- if (name == "xml:space")
- xmlSpace = value;
- a.Add (name, value);
- }
- private void ReadCDATASection ()
- {
- int nBracket = 0;
- while (true) {
- if (Peek () < 0)
- throw UnexpectedEndError ();
- char c = (char) Read ();
- if (c == ']')
- nBracket++;
- else if (c == '>' && nBracket > 1) {
- for (int i = nBracket; i > 2; i--)
- buffer.Append (']');
- break;
- }
- else {
- for (int i = 0; i < nBracket; i++)
- buffer.Append (']');
- nBracket = 0;
- buffer.Append (c);
- }
- }
- }
- private void ReadComment ()
- {
- Expect ('-');
- Expect ('-');
- while (true) {
- if (Read () != '-')
- continue;
- if (Read () != '-')
- continue;
- if (Read () != '>')
- throw Error ("'--' is not allowed inside comment markup.");
- break;
- }
- }
- }
- internal class SmallXmlParserException : SystemException
- {
- int line;
- int column;
- public SmallXmlParserException (string msg, int line, int column)
- : base (String.Format ("{0}. At ({1},{2})", msg, line, column))
- {
- this.line = line;
- this.column = column;
- }
- public int Line {
- get { return line; }
- }
- public int Column {
- get { return column; }
- }
- }
- }
|