You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
207 lines
4.6 KiB
207 lines
4.6 KiB
#!/usr/bin/ruby
|
|
# encoding: utf-8
|
|
|
|
require 'antlr3/test/functional'
|
|
|
|
class XMLLexerTest < ANTLR3::Test::Functional
|
|
inline_grammar( <<-'END' )
|
|
lexer grammar XML;
|
|
options { language = Ruby; }
|
|
|
|
@members {
|
|
include ANTLR3::Test::CaptureOutput
|
|
include ANTLR3::Test::RaiseErrors
|
|
|
|
def quote(text)
|
|
text = text.gsub(/\"/, '\\"')
|
|
\%("#{ text }")
|
|
end
|
|
}
|
|
|
|
DOCUMENT
|
|
: XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
|
|
;
|
|
|
|
fragment DOCTYPE
|
|
:
|
|
'<!DOCTYPE' WS rootElementName=GENERIC_ID
|
|
{say("ROOTELEMENT: " + $rootElementName.text)}
|
|
WS
|
|
(
|
|
( 'SYSTEM' WS sys1=VALUE
|
|
{say("SYSTEM: " + $sys1.text)}
|
|
|
|
| 'PUBLIC' WS pub=VALUE WS sys2=VALUE
|
|
{say("PUBLIC: " + $pub.text)}
|
|
{say("SYSTEM: " + $sys2.text)}
|
|
)
|
|
( WS )?
|
|
)?
|
|
( dtd=INTERNAL_DTD
|
|
{say("INTERNAL DTD: " + $dtd.text)}
|
|
)?
|
|
'>'
|
|
;
|
|
|
|
fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
|
|
|
|
fragment PI :
|
|
'<?' target=GENERIC_ID WS?
|
|
{say("PI: " + $target.text)}
|
|
( ATTRIBUTE WS? )* '?>'
|
|
;
|
|
|
|
fragment XMLDECL :
|
|
'<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
|
|
{say("XML declaration")}
|
|
( ATTRIBUTE WS? )* '?>'
|
|
;
|
|
|
|
|
|
fragment ELEMENT
|
|
: ( START_TAG
|
|
(ELEMENT
|
|
| t=PCDATA
|
|
{say("PCDATA: " << quote($t.text))}
|
|
| t=CDATA
|
|
{say("CDATA: " << quote($t.text))}
|
|
| t=COMMENT
|
|
{say("Comment: " << quote($t.text))}
|
|
| pi=PI
|
|
)*
|
|
END_TAG
|
|
| EMPTY_ELEMENT
|
|
)
|
|
;
|
|
|
|
fragment START_TAG
|
|
: '<' WS? name=GENERIC_ID WS?
|
|
{say("Start Tag: " + $name.text)}
|
|
( ATTRIBUTE WS? )* '>'
|
|
;
|
|
|
|
fragment EMPTY_ELEMENT
|
|
: '<' WS? name=GENERIC_ID WS?
|
|
{say("Empty Element: " + $name.text)}
|
|
( ATTRIBUTE WS? )* '/>'
|
|
;
|
|
|
|
fragment ATTRIBUTE
|
|
: name=GENERIC_ID WS? '=' WS? value=VALUE
|
|
{say("Attr: " + $name.text + " = "+ $value.text)}
|
|
;
|
|
|
|
fragment END_TAG
|
|
: '</' WS? name=GENERIC_ID WS? '>'
|
|
{say("End Tag: " + $name.text)}
|
|
;
|
|
|
|
fragment COMMENT
|
|
: '<!--' (options {greedy=false;} : .)* '-->'
|
|
;
|
|
|
|
fragment CDATA
|
|
: '<![CDATA[' (options {greedy=false;} : .)* ']]>'
|
|
;
|
|
|
|
fragment PCDATA : (~'<')+ ;
|
|
|
|
fragment VALUE :
|
|
( '\"' (~'\"')* '\"'
|
|
| '\'' (~'\'')* '\''
|
|
)
|
|
;
|
|
|
|
fragment GENERIC_ID
|
|
: ( LETTER | '_' | ':')
|
|
( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
|
|
;
|
|
|
|
fragment LETTER
|
|
: 'a'..'z'
|
|
| 'A'..'Z'
|
|
;
|
|
|
|
fragment WS :
|
|
( ' '
|
|
| '\t'
|
|
| ( '\n'
|
|
| '\r\n'
|
|
| '\r'
|
|
)
|
|
)+
|
|
;
|
|
END
|
|
|
|
it "should be valid" do
|
|
lexer = XML::Lexer.new( <<-'END'.fixed_indent( 0 ) )
|
|
<?xml version='1.0'?>
|
|
<!DOCTYPE component [
|
|
<!ELEMENT component (PCDATA|sub)*>
|
|
<!ATTLIST component
|
|
attr CDATA #IMPLIED
|
|
attr2 CDATA #IMPLIED
|
|
>
|
|
<!ELMENT sub EMPTY>
|
|
|
|
]>
|
|
<component attr="val'ue" attr2='val"ue'>
|
|
<!-- This is a comment -->
|
|
Text
|
|
<![CDATA[huhu]]>
|
|
öäüß
|
|
&
|
|
<
|
|
<?xtal cursor='11'?>
|
|
<sub/>
|
|
<sub></sub>
|
|
</component>
|
|
END
|
|
|
|
lexer.map { |tk| tk }
|
|
|
|
lexer.output.should == <<-'END'.fixed_indent( 0 )
|
|
XML declaration
|
|
Attr: version = '1.0'
|
|
ROOTELEMENT: component
|
|
INTERNAL DTD: [
|
|
<!ELEMENT component (PCDATA|sub)*>
|
|
<!ATTLIST component
|
|
attr CDATA #IMPLIED
|
|
attr2 CDATA #IMPLIED
|
|
>
|
|
<!ELMENT sub EMPTY>
|
|
|
|
]
|
|
Start Tag: component
|
|
Attr: attr = "val'ue"
|
|
Attr: attr2 = 'val"ue'
|
|
PCDATA: "
|
|
"
|
|
Comment: "<!-- This is a comment -->"
|
|
PCDATA: "
|
|
Text
|
|
"
|
|
CDATA: "<![CDATA[huhu]]>"
|
|
PCDATA: "
|
|
öäüß
|
|
&
|
|
<
|
|
"
|
|
PI: xtal
|
|
Attr: cursor = '11'
|
|
PCDATA: "
|
|
"
|
|
Empty Element: sub
|
|
PCDATA: "
|
|
"
|
|
Start Tag: sub
|
|
End Tag: sub
|
|
PCDATA: "
|
|
"
|
|
End Tag: component
|
|
END
|
|
end
|
|
|
|
end
|