Alvis-NLPPlatform

 view release on metacpan or  search on metacpan

etc/alvis-nlpplatform/dtd/enriched-document.dtd  view on Meta::CPAN

<!-- $Id: enriched-document.dtd,v 1.1 2007/03/19 09:46:48 ht Exp $ -->

<!-- This DTD prescribes the format of Alvis enriched document records -->


<!ELEMENT documentCollection (documentRecord*)>


<!ELEMENT documentRecord (acquisition, linguisticAnalysis?, relevance?)>
<!ATTLIST documentRecord id CDATA #REQUIRED>


<!ELEMENT acquisition (acquisitionData, originalDocument?, canonicalDocument,
		       metaData?, links?, analysis?)>

<!ELEMENT acquisitionData (modifiedDate, expiryDate?, checkedDate?,
			   httpServer?, urls)>
<!ELEMENT modifiedDate (#PCDATA)>
<!ELEMENT expiryDate (#PCDATA)>
<!ELEMENT checkedDate (#PCDATA)>
<!ELEMENT httpServer (#PCDATA)>
<!ELEMENT urls (url*)>
<!ELEMENT url (#PCDATA)>

<!ELEMENT originalDocument (#PCDATA)>
<!-- The "encoding" attribute may be "base64" or "quoted-printable" -->
<!ATTLIST originalDocument mimeType CDATA #REQUIRED
                           charSet CDATA #REQUIRED
                           compression CDATA #IMPLIED
                           encoding CDATA #IMPLIED>
<!-- originalDocument.mimeType chosen from IANA's list -->
<!-- originalDocument.charSet chosen from IANA's list -->
<!-- originalDocument.compression may take the following values:
	"deflate", "gzip" -->
<!-- originalDocument.encoding may take the following values:
	"quoted-printable", "base64", "xml" -->

<!ELEMENT canonicalDocument (section*)>
<!ELEMENT section (#PCDATA|list|ulink|section)*>
<!ATTLIST section title CDATA #IMPLIED>
<!ELEMENT list (item*)>
<!ELEMENT item (#PCDATA|list|ulink)*>
<!ELEMENT ulink (#PCDATA)>
<!ATTLIST ulink url CDATA #IMPLIED>

<!ELEMENT metaData (meta*)>
<!ELEMENT meta (#PCDATA)>
<!ATTLIST meta name CDATA #REQUIRED>
<!-- meta.name may take values chosen from the Dublin Core element set -->

<!ELEMENT links (outlinks?, inlinks?, inlinkHosts?)>
<!ELEMENT outlinks (link*)>
<!ELEMENT inlinks (link*)>
<!ELEMENT inlinkHosts (#PCDATA)>
<!ELEMENT link (anchorText?, location)>
<!ATTLIST link type CDATA #REQUIRED>
<!-- link.type may take the following values: "a", "img", "frame" -->
<!ELEMENT anchorText (#PCDATA)>
<!ELEMENT location (#PCDATA)>
<!ATTLIST location documentId CDATA #IMPLIED>

<!ELEMENT analysis (domain?, property*, ranking*, topic*)>
<!ELEMENT domain (#PCDATA)>
<!ELEMENT property (#PCDATA)>
<!ATTLIST property name CDATA #REQUIRED>
<!ELEMENT ranking (#PCDATA)>
<!ATTLIST ranking scheme CDATA #REQUIRED>
<!ELEMENT topic (class, terms?)>
<!ATTLIST topic absoluteScore CDATA #REQUIRED
		relativeScore CDATA #REQUIRED>
<!ELEMENT class (#PCDATA)>
<!ELEMENT terms (#PCDATA)>


<!ELEMENT linguisticAnalysis (token?, word?, sentence?, tagging?,
			      phrase?, sectioning?)>

<!-- This will be structured in the final version of the format -->
<!ELEMENT token (#PCDATA)>

<!-- This will be structured in the final version of the format -->
<!ELEMENT word (#PCDATA)>

<!-- This will be structured in the final version of the format -->
<!ELEMENT sentence (#PCDATA)>

<!-- This will be structured in the final version of the format -->
<!ELEMENT tagging (#PCDATA)>

<!-- This will be structured in the final version of the format -->
<!ELEMENT phrase (#PCDATA)>

<!-- This will be structured in the final version of the format -->
<!ELEMENT sectioning (#PCDATA)>


<!-- This will be structured in the final version of the format -->
<!ELEMENT relevance (#PCDATA)>




( run in 1.574 second using v1.01-cache-2.11-cpan-e1769b4cff6 )