I’ve been searching for a complete EBNF for XML 1.1 without much success. I found one for XML 1.0, but I was hoping to avoid manually patching it for the XML 1.1 changes.
In the end, I decided that it would be easiest to just parse the EBNF directly out of the specification. Here it is, for reference:
| [1] | document |
::= | ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) |
| [2] | Char |
::= | [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] |
/* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ |
| [2a] | RestrictedChar |
::= | [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F] |
| [3] | S |
::= | (#x20 | #x9 | #xD | #xA)+ |
| [4] | NameStartChar |
::= | ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] |
| [4a] | NameChar |
::= | NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] |
| [5] | Name |
::= | NameStartChar (NameChar)* |
| [6] | Names |
::= | Name (#x20 Name)* |
| [7] | Nmtoken |
::= | (NameChar)+ |
| [8] | Nmtokens |
::= | Nmtoken (#x20 Nmtoken)* |
| [9] | EntityValue |
::= | '"' ([^%&"] | PEReference | Reference)* '"' |
| "'" ([^%&'] | PEReference | Reference)* "'" |
|||
| [10] | AttValue |
::= | '"' ([^<&"] | Reference)* '"' |
| "'" ([^<&'] | Reference)* "'" |
|||
| [11] | SystemLiteral |
::= | ('"' [^"]* '"') | ("'" [^']* "'") |
| [12] | PubidLiteral |
::= | '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" |
| [13] | PubidChar |
::= | #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] |
| [14] | CharData |
::= | [^<&]* - ([^<&]* ']]>' [^<&]*) |
| [15] | Comment |
::= | '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' |
| [16] | PI |
::= | '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' |
| [17] | PITarget |
::= | Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) |
| [18] | CDSect |
::= | CDStart CData CDEnd |
| [19] | CDStart |
::= | '<![CDATA[' |
| [20] | CData |
::= | (Char* - (Char* ']]>' Char*)) |
| [21] | CDEnd |
::= | ']]>' |
| [22] | prolog |
::= | XMLDecl Misc* (doctypedecl Misc*)? |
| [23] | XMLDecl |
::= | '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' |
| [24] | VersionInfo |
::= | S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') |
| [25] | Eq |
::= | S? '=' S? |
| [26] | VersionNum |
::= | '1.1' |
| [27] | Misc |
::= | Comment | PI | S |
| [28] | doctypedecl |
::= | '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' |
[VC: Root Element Type] |
| [WFC: External Subset] | ||||
| [28a] | DeclSep |
::= | PEReference | S |
[WFC: PE Between Declarations] |
| [28b] | intSubset |
::= | (markupdecl | DeclSep)* |
|
| [29] | markupdecl |
::= | elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment |
[VC: Proper Declaration/PE Nesting] |
| [WFC: PEs in Internal Subset] |
| [30] | extSubset |
::= | TextDecl? extSubsetDecl |
| [31] | extSubsetDecl |
::= | ( markupdecl | conditionalSect | DeclSep)* |
| [32] | SDDecl |
::= | S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) |
[VC: Standalone Document Declaration] |
| [39] | element |
::= | EmptyElemTag |
|
| STag content ETag |
[WFC: Element Type Match] | |||
| [VC: Element Valid] |
| [40] | STag |
::= | '<' Name (S Attribute)* S? '>' |
[WFC: Unique Att Spec] |
| [41] | Attribute |
::= | Name Eq AttValue |
[VC: Attribute Value Type] |
| [WFC: No External Entity References] | ||||
| [WFC: No < in Attribute Values] |
| [42] | ETag |
::= | '</' Name S? '>' |
| [43] | content |
::= | CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* |
| [44] | EmptyElemTag |
::= | '<' Name (S Attribute)* S? '/>' |
[WFC: Unique Att Spec] |
| [45] | elementdecl |
::= | '<!ELEMENT' S Name S contentspec S? '>' |
[VC: Unique Element Type Declaration] |
| [46] | contentspec |
::= | 'EMPTY' | 'ANY' | Mixed | children |
| [47] | children |
::= | (choice | seq) ('?' | '*' | '+')? |
|
| [48] | cp |
::= | (Name | choice | seq) ('?' | '*' | '+')? |
|
| [49] | choice |
::= | '(' S? cp ( S? '|' S? cp )+ S? ')' |
[VC: Proper Group/PE Nesting] |
| [50] | seq |
::= | '(' S? cp ( S? ',' S? cp )* S? ')' |
[VC: Proper Group/PE Nesting] |
| [51] | Mixed |
::= | '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
|
| '(' S? '#PCDATA' S? ')' |
[VC: Proper Group/PE Nesting] | |||
| [VC: No Duplicate Types] |
| [52] | AttlistDecl |
::= | '<!ATTLIST' S Name AttDef* S? '>' |
| [53] | AttDef |
::= | S Name S AttType S DefaultDecl |
| [54] | AttType |
::= | StringType | TokenizedType | EnumeratedType |
|
| [55] | StringType |
::= | 'CDATA' |
|
| [56] | TokenizedType |
::= | 'ID' |
[VC: ID] |
| [VC: One ID per Element Type] | ||||
| [VC: ID Attribute Default] | ||||
| 'IDREF' |
[VC: IDREF] | |||
| 'IDREFS' |
[VC: IDREF] | |||
| 'ENTITY' |
[VC: Entity Name] | |||
| 'ENTITIES' |
[VC: Entity Name] | |||
| 'NMTOKEN' |
[VC: Name Token] | |||
| 'NMTOKENS' |
[VC: Name Token] |
| [57] | EnumeratedType |
::= | NotationType | Enumeration |
|
| [58] | NotationType |
::= | 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' |
[VC: Notation Attributes] |
| [VC: One Notation Per Element Type] | ||||
| [VC: No Notation on Empty Element] | ||||
| [VC: No Duplicate Tokens] | ||||
| [59] | Enumeration |
::= | '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' |
[VC: Enumeration] |
| [VC: No Duplicate Tokens] |
| [60] | DefaultDecl |
::= | '#REQUIRED' | '#IMPLIED' |
|
| (('#FIXED' S)? AttValue) |
[VC: Required Attribute] | |||
| [VC: Attribute Default Value Syntactically Correct] | ||||
| [WFC: No < in Attribute Values] | ||||
| [VC: Fixed Attribute Default] | ||||
| [WFC: No External Entity References] |
| [61] | conditionalSect |
::= | includeSect | ignoreSect |
|
| [62] | includeSect |
::= | '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' |
[VC: Proper Conditional Section/PE Nesting] |
| [63] | ignoreSect |
::= | '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' |
[VC: Proper Conditional Section/PE Nesting] |
| [64] | ignoreSectContents |
::= | Ignore ('<![' ignoreSectContents ']]>' Ignore)* |
|
| [65] | Ignore |
::= | Char* - (Char* ('<![' | ']]>') Char*) |
| [66] | CharRef |
::= | '&#' [0-9]+ ';' |
|
| '&#x' [0-9a-fA-F]+ ';' |
[WFC: Legal Character] |
| [67] | Reference |
::= | EntityRef | CharRef |
|
| [68] | EntityRef |
::= | '&' Name ';' |
[WFC: Entity Declared] |
| [VC: Entity Declared] | ||||
| [WFC: Parsed Entity] | ||||
| [WFC: No Recursion] | ||||
| [69] | PEReference |
::= | '%' Name ';' |
[VC: Entity Declared] |
| [WFC: No Recursion] | ||||
| [WFC: In DTD] |
| [70] | EntityDecl |
::= | GEDecl | PEDecl |
| [71] | GEDecl |
::= | '<!ENTITY' S Name S EntityDef S? '>' |
| [72] | PEDecl |
::= | '<!ENTITY' S '%' S Name S PEDef S? '>' |
| [73] | EntityDef |
::= | EntityValue | (ExternalID NDataDecl?) |
| [74] | PEDef |
::= | EntityValue | ExternalID |
| [75] | ExternalID |
::= | 'SYSTEM' S SystemLiteral |
|
| 'PUBLIC' S PubidLiteral S SystemLiteral |
||||
| [76] | NDataDecl |
::= | S 'NDATA' S Name |
[VC: Notation Declared] |
| [77] | TextDecl |
::= | '<?xml' VersionInfo? EncodingDecl S? '?>' |
| [78] | extParsedEnt |
::= | ( TextDecl? content ) - ( Char* RestrictedChar Char* ) |
| [80] | EncodingDecl |
::= | S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) |
|
| [81] | EncName |
::= | [A-Za-z] ([A-Za-z0-9._] | '-')* |
/* Encoding name contains only Latin characters */ |
| [82] | NotationDecl |
::= | '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' |
[VC: Unique Notation Name] |
| [83] | PublicID |
::= | 'PUBLIC' S PubidLiteral |
