2016-02-01 21:28:52 +00:00
|
|
|
|
//[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
|
|
|
|
//[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
|
|
|
|
//[5] Name ::= NameStartChar (NameChar)*
|
|
|
|
|
var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
|
2017-02-07 18:20:30 +00:00
|
|
|
|
var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
|
|
|
|
|
//var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
|
|
|
|
|
//var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
|
|
|
|
|
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
|
|
|
|
//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var S_TAG = 0;//tag name offerring
|
|
|
|
|
var S_ATTR = 1;//attr name offerring
|
2017-02-07 18:20:30 +00:00
|
|
|
|
var S_ATTR_SPACE=2;//attr name end and space offer
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var S_EQ = 3;//=space?
|
2017-02-07 18:20:30 +00:00
|
|
|
|
var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
|
|
|
|
|
var S_ATTR_END = 5;//attr value end and no space(quot end)
|
|
|
|
|
var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
|
|
|
|
|
var S_TAG_CLOSE = 7;//closed el<el />
|
2016-02-01 21:28:52 +00:00
|
|
|
|
|
|
|
|
|
function XMLReader(){
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
XMLReader.prototype = {
|
|
|
|
|
parse:function(source,defaultNSMap,entityMap){
|
|
|
|
|
var domBuilder = this.domBuilder;
|
|
|
|
|
domBuilder.startDocument();
|
|
|
|
|
_copy(defaultNSMap ,defaultNSMap = {})
|
|
|
|
|
parse(source,defaultNSMap,entityMap,
|
|
|
|
|
domBuilder,this.errorHandler);
|
|
|
|
|
domBuilder.endDocument();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
|
2017-02-07 18:20:30 +00:00
|
|
|
|
function fixedFromCharCode(code) {
|
2016-02-01 21:28:52 +00:00
|
|
|
|
// String.prototype.fromCharCode does not supports
|
|
|
|
|
// > 2 bytes unicode chars directly
|
|
|
|
|
if (code > 0xffff) {
|
|
|
|
|
code -= 0x10000;
|
|
|
|
|
var surrogate1 = 0xd800 + (code >> 10)
|
|
|
|
|
, surrogate2 = 0xdc00 + (code & 0x3ff);
|
|
|
|
|
|
|
|
|
|
return String.fromCharCode(surrogate1, surrogate2);
|
|
|
|
|
} else {
|
|
|
|
|
return String.fromCharCode(code);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
function entityReplacer(a){
|
|
|
|
|
var k = a.slice(1,-1);
|
|
|
|
|
if(k in entityMap){
|
|
|
|
|
return entityMap[k];
|
|
|
|
|
}else if(k.charAt(0) === '#'){
|
|
|
|
|
return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
|
|
|
|
|
}else{
|
|
|
|
|
errorHandler.error('entity not found:'+a);
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
function appendText(end){//has some bugs
|
|
|
|
|
if(end>start){
|
|
|
|
|
var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
|
|
|
|
|
locator&&position(start);
|
|
|
|
|
domBuilder.characters(xt,0,end-start);
|
|
|
|
|
start = end
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
function position(p,m){
|
|
|
|
|
while(p>=lineEnd && (m = linePattern.exec(source))){
|
|
|
|
|
lineStart = m.index;
|
|
|
|
|
lineEnd = lineStart + m[0].length;
|
|
|
|
|
locator.lineNumber++;
|
|
|
|
|
//console.log('line++:',locator,startPos,endPos)
|
|
|
|
|
}
|
|
|
|
|
locator.columnNumber = p-lineStart+1;
|
|
|
|
|
}
|
|
|
|
|
var lineStart = 0;
|
|
|
|
|
var lineEnd = 0;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
var linePattern = /.*(?:\r\n?|\n)|.*$/g
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var locator = domBuilder.locator;
|
|
|
|
|
|
|
|
|
|
var parseStack = [{currentNSMap:defaultNSMapCopy}]
|
|
|
|
|
var closeMap = {};
|
|
|
|
|
var start = 0;
|
|
|
|
|
while(true){
|
|
|
|
|
try{
|
|
|
|
|
var tagStart = source.indexOf('<',start);
|
|
|
|
|
if(tagStart<0){
|
|
|
|
|
if(!source.substr(start).match(/^\s*$/)){
|
2017-02-07 18:20:30 +00:00
|
|
|
|
var doc = domBuilder.doc;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var text = doc.createTextNode(source.substr(start));
|
|
|
|
|
doc.appendChild(text);
|
|
|
|
|
domBuilder.currentElement = text;
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if(tagStart>start){
|
|
|
|
|
appendText(tagStart);
|
|
|
|
|
}
|
|
|
|
|
switch(source.charAt(tagStart+1)){
|
|
|
|
|
case '/':
|
|
|
|
|
var end = source.indexOf('>',tagStart+3);
|
|
|
|
|
var tagName = source.substring(tagStart+2,end);
|
|
|
|
|
var config = parseStack.pop();
|
2017-02-07 18:20:30 +00:00
|
|
|
|
if(end<0){
|
|
|
|
|
|
|
|
|
|
tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
|
|
|
|
|
//console.error('#@@@@@@'+tagName)
|
|
|
|
|
errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
|
|
|
|
|
end = tagStart+1+tagName.length;
|
|
|
|
|
}else if(tagName.match(/\s</)){
|
|
|
|
|
tagName = tagName.replace(/[\s<].*/,'');
|
|
|
|
|
errorHandler.error("end tag name: "+tagName+' maybe not complete');
|
|
|
|
|
end = tagStart+1+tagName.length;
|
|
|
|
|
}
|
|
|
|
|
//console.error(parseStack.length,parseStack)
|
|
|
|
|
//console.error(config);
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var localNSMap = config.localNSMap;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
var endMatch = config.tagName == tagName;
|
|
|
|
|
var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
|
|
|
|
|
if(endIgnoreCaseMach){
|
|
|
|
|
domBuilder.endElement(config.uri,config.localName,tagName);
|
|
|
|
|
if(localNSMap){
|
|
|
|
|
for(var prefix in localNSMap){
|
|
|
|
|
domBuilder.endPrefixMapping(prefix) ;
|
|
|
|
|
}
|
2016-02-01 21:28:52 +00:00
|
|
|
|
}
|
2017-02-07 18:20:30 +00:00
|
|
|
|
if(!endMatch){
|
|
|
|
|
errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName );
|
|
|
|
|
}
|
|
|
|
|
}else{
|
|
|
|
|
parseStack.push(config)
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-01 21:28:52 +00:00
|
|
|
|
end++;
|
|
|
|
|
break;
|
|
|
|
|
// end elment
|
|
|
|
|
case '?':// <?...?>
|
|
|
|
|
locator&&position(tagStart);
|
|
|
|
|
end = parseInstruction(source,tagStart,domBuilder);
|
|
|
|
|
break;
|
|
|
|
|
case '!':// <!doctype,<![CDATA,<!--
|
|
|
|
|
locator&&position(tagStart);
|
|
|
|
|
end = parseDCC(source,tagStart,domBuilder,errorHandler);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
locator&&position(tagStart);
|
|
|
|
|
var el = new ElementAttributes();
|
2017-02-07 18:20:30 +00:00
|
|
|
|
var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
//elStartEnd
|
2017-02-07 18:20:30 +00:00
|
|
|
|
var end = parseElementStartPart(source,tagStart,el,currentNSMap,entityReplacer,errorHandler);
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var len = el.length;
|
|
|
|
|
|
2017-02-07 18:20:30 +00:00
|
|
|
|
|
2016-02-01 21:28:52 +00:00
|
|
|
|
if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
|
|
|
|
|
el.closed = true;
|
|
|
|
|
if(!entityMap.nbsp){
|
|
|
|
|
errorHandler.warning('unclosed xml attribute');
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-02-07 18:20:30 +00:00
|
|
|
|
if(locator && len){
|
|
|
|
|
var locator2 = copyLocator(locator,{});
|
|
|
|
|
//try{//attribute position fixed
|
|
|
|
|
for(var i = 0;i<len;i++){
|
|
|
|
|
var a = el[i];
|
|
|
|
|
position(a.offset);
|
|
|
|
|
a.locator = copyLocator(locator,{});
|
|
|
|
|
}
|
|
|
|
|
//}catch(e){console.error('@@@@@'+e)}
|
|
|
|
|
domBuilder.locator = locator2
|
|
|
|
|
if(appendElement(el,domBuilder,currentNSMap)){
|
|
|
|
|
parseStack.push(el)
|
|
|
|
|
}
|
|
|
|
|
domBuilder.locator = locator;
|
|
|
|
|
}else{
|
|
|
|
|
if(appendElement(el,domBuilder,currentNSMap)){
|
|
|
|
|
parseStack.push(el)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-01 21:28:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if(el.uri === 'http://www.w3.org/1999/xhtml' && !el.closed){
|
|
|
|
|
end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
|
|
|
|
|
}else{
|
|
|
|
|
end++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}catch(e){
|
2017-02-07 18:20:30 +00:00
|
|
|
|
errorHandler.error('element parse error: '+e)
|
|
|
|
|
//errorHandler.error('element parse error: '+e);
|
2016-02-01 21:28:52 +00:00
|
|
|
|
end = -1;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//throw e;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
}
|
|
|
|
|
if(end>start){
|
|
|
|
|
start = end;
|
|
|
|
|
}else{
|
|
|
|
|
//TODO: 这里有可能sax回退,有位置错误风险
|
|
|
|
|
appendText(Math.max(tagStart,start)+1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
function copyLocator(f,t){
|
|
|
|
|
t.lineNumber = f.lineNumber;
|
|
|
|
|
t.columnNumber = f.columnNumber;
|
|
|
|
|
return t;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
|
|
|
|
|
* @return end of the elementStartPart(end of elementEndPart for selfClosed el)
|
|
|
|
|
*/
|
2017-02-07 18:20:30 +00:00
|
|
|
|
function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var attrName;
|
|
|
|
|
var value;
|
|
|
|
|
var p = ++start;
|
|
|
|
|
var s = S_TAG;//status
|
|
|
|
|
while(true){
|
|
|
|
|
var c = source.charAt(p);
|
|
|
|
|
switch(c){
|
|
|
|
|
case '=':
|
|
|
|
|
if(s === S_ATTR){//attrName
|
|
|
|
|
attrName = source.slice(start,p);
|
|
|
|
|
s = S_EQ;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
}else if(s === S_ATTR_SPACE){
|
2016-02-01 21:28:52 +00:00
|
|
|
|
s = S_EQ;
|
|
|
|
|
}else{
|
|
|
|
|
//fatalError: equal must after attrName or space after attrName
|
|
|
|
|
throw new Error('attribute equal must after attrName');
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '\'':
|
|
|
|
|
case '"':
|
2017-02-07 18:20:30 +00:00
|
|
|
|
if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
|
|
|
|
|
){//equal
|
|
|
|
|
if(s === S_ATTR){
|
|
|
|
|
errorHandler.warning('attribute value must after "="')
|
|
|
|
|
attrName = source.slice(start,p)
|
|
|
|
|
}
|
2016-02-01 21:28:52 +00:00
|
|
|
|
start = p+1;
|
|
|
|
|
p = source.indexOf(c,start)
|
|
|
|
|
if(p>0){
|
|
|
|
|
value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
|
|
|
|
|
el.add(attrName,value,start-1);
|
2017-02-07 18:20:30 +00:00
|
|
|
|
s = S_ATTR_END;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
}else{
|
|
|
|
|
//fatalError: no end quot match
|
|
|
|
|
throw new Error('attribute value no end \''+c+'\' match');
|
|
|
|
|
}
|
2017-02-07 18:20:30 +00:00
|
|
|
|
}else if(s == S_ATTR_NOQUOT_VALUE){
|
2016-02-01 21:28:52 +00:00
|
|
|
|
value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
|
|
|
|
|
//console.log(attrName,value,start,p)
|
|
|
|
|
el.add(attrName,value,start);
|
|
|
|
|
//console.dir(el)
|
|
|
|
|
errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
|
|
|
|
|
start = p+1;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
s = S_ATTR_END
|
2016-02-01 21:28:52 +00:00
|
|
|
|
}else{
|
|
|
|
|
//fatalError: no equal before
|
|
|
|
|
throw new Error('attribute value must after "="');
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '/':
|
|
|
|
|
switch(s){
|
|
|
|
|
case S_TAG:
|
|
|
|
|
el.setTagName(source.slice(start,p));
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_END:
|
|
|
|
|
case S_TAG_SPACE:
|
|
|
|
|
case S_TAG_CLOSE:
|
|
|
|
|
s =S_TAG_CLOSE;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
el.closed = true;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_NOQUOT_VALUE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
case S_ATTR:
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_SPACE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
break;
|
|
|
|
|
//case S_EQ:
|
|
|
|
|
default:
|
|
|
|
|
throw new Error("attribute invalid close char('/')")
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case ''://end document
|
|
|
|
|
//throw new Error('unexpected end of input')
|
|
|
|
|
errorHandler.error('unexpected end of input');
|
2017-02-07 18:20:30 +00:00
|
|
|
|
if(s == S_TAG){
|
|
|
|
|
el.setTagName(source.slice(start,p));
|
|
|
|
|
}
|
|
|
|
|
return p;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
case '>':
|
|
|
|
|
switch(s){
|
|
|
|
|
case S_TAG:
|
|
|
|
|
el.setTagName(source.slice(start,p));
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_END:
|
|
|
|
|
case S_TAG_SPACE:
|
|
|
|
|
case S_TAG_CLOSE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
break;//normal
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_NOQUOT_VALUE://Compatible state
|
2016-02-01 21:28:52 +00:00
|
|
|
|
case S_ATTR:
|
|
|
|
|
value = source.slice(start,p);
|
|
|
|
|
if(value.slice(-1) === '/'){
|
|
|
|
|
el.closed = true;
|
|
|
|
|
value = value.slice(0,-1)
|
|
|
|
|
}
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_SPACE:
|
|
|
|
|
if(s === S_ATTR_SPACE){
|
2016-02-01 21:28:52 +00:00
|
|
|
|
value = attrName;
|
|
|
|
|
}
|
2017-02-07 18:20:30 +00:00
|
|
|
|
if(s == S_ATTR_NOQUOT_VALUE){
|
2016-02-01 21:28:52 +00:00
|
|
|
|
errorHandler.warning('attribute "'+value+'" missed quot(")!!');
|
|
|
|
|
el.add(attrName,value.replace(/&#?\w+;/g,entityReplacer),start)
|
|
|
|
|
}else{
|
2017-02-07 18:20:30 +00:00
|
|
|
|
if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !value.match(/^(?:disabled|checked|selected)$/i)){
|
|
|
|
|
errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
|
|
|
|
|
}
|
2016-02-01 21:28:52 +00:00
|
|
|
|
el.add(value,value,start)
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case S_EQ:
|
|
|
|
|
throw new Error('attribute value missed!!');
|
|
|
|
|
}
|
|
|
|
|
// console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
|
|
|
|
|
return p;
|
|
|
|
|
/*xml space '\x20' | #x9 | #xD | #xA; */
|
|
|
|
|
case '\u0080':
|
|
|
|
|
c = ' ';
|
|
|
|
|
default:
|
|
|
|
|
if(c<= ' '){//space
|
|
|
|
|
switch(s){
|
|
|
|
|
case S_TAG:
|
|
|
|
|
el.setTagName(source.slice(start,p));//tagName
|
2017-02-07 18:20:30 +00:00
|
|
|
|
s = S_TAG_SPACE;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
break;
|
|
|
|
|
case S_ATTR:
|
|
|
|
|
attrName = source.slice(start,p)
|
2017-02-07 18:20:30 +00:00
|
|
|
|
s = S_ATTR_SPACE;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
break;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_NOQUOT_VALUE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
|
|
|
|
|
errorHandler.warning('attribute "'+value+'" missed quot(")!!');
|
|
|
|
|
el.add(attrName,value,start)
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_END:
|
|
|
|
|
s = S_TAG_SPACE;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
break;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//case S_TAG_SPACE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
//case S_EQ:
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//case S_ATTR_SPACE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
// void();break;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//case S_TAG_CLOSE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
//ignore warning
|
|
|
|
|
}
|
|
|
|
|
}else{//not space
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
|
|
|
|
//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
|
2016-02-01 21:28:52 +00:00
|
|
|
|
switch(s){
|
|
|
|
|
//case S_TAG:void();break;
|
|
|
|
|
//case S_ATTR:void();break;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//case S_ATTR_NOQUOT_VALUE:void();break;
|
|
|
|
|
case S_ATTR_SPACE:
|
|
|
|
|
var tagName = el.tagName;
|
|
|
|
|
if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !attrName.match(/^(?:disabled|checked|selected)$/i)){
|
|
|
|
|
errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
|
|
|
|
|
}
|
2016-02-01 21:28:52 +00:00
|
|
|
|
el.add(attrName,attrName,start);
|
|
|
|
|
start = p;
|
|
|
|
|
s = S_ATTR;
|
|
|
|
|
break;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_ATTR_END:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
errorHandler.warning('attribute space is required"'+attrName+'"!!')
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_TAG_SPACE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
s = S_ATTR;
|
|
|
|
|
start = p;
|
|
|
|
|
break;
|
|
|
|
|
case S_EQ:
|
2017-02-07 18:20:30 +00:00
|
|
|
|
s = S_ATTR_NOQUOT_VALUE;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
start = p;
|
|
|
|
|
break;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
case S_TAG_CLOSE:
|
2016-02-01 21:28:52 +00:00
|
|
|
|
throw new Error("elements closed character '/' and '>' must be connected to");
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-02-07 18:20:30 +00:00
|
|
|
|
}//end outer switch
|
|
|
|
|
//console.log('p++',p)
|
2016-02-01 21:28:52 +00:00
|
|
|
|
p++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/**
|
2017-02-07 18:20:30 +00:00
|
|
|
|
* @return true if has new namespace define
|
2016-02-01 21:28:52 +00:00
|
|
|
|
*/
|
2017-02-07 18:20:30 +00:00
|
|
|
|
function appendElement(el,domBuilder,currentNSMap){
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var tagName = el.tagName;
|
|
|
|
|
var localNSMap = null;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
var i = el.length;
|
|
|
|
|
while(i--){
|
|
|
|
|
var a = el[i];
|
|
|
|
|
var qName = a.qName;
|
|
|
|
|
var value = a.value;
|
|
|
|
|
var nsp = qName.indexOf(':');
|
|
|
|
|
if(nsp>0){
|
|
|
|
|
var prefix = a.prefix = qName.slice(0,nsp);
|
|
|
|
|
var localName = qName.slice(nsp+1);
|
|
|
|
|
var nsPrefix = prefix === 'xmlns' && localName
|
|
|
|
|
}else{
|
|
|
|
|
localName = qName;
|
|
|
|
|
prefix = null
|
|
|
|
|
nsPrefix = qName === 'xmlns' && ''
|
|
|
|
|
}
|
|
|
|
|
//can not set prefix,because prefix !== ''
|
|
|
|
|
a.localName = localName ;
|
|
|
|
|
//prefix == null for no ns prefix attribute
|
|
|
|
|
if(nsPrefix !== false){//hack!!
|
|
|
|
|
if(localNSMap == null){
|
|
|
|
|
localNSMap = {}
|
|
|
|
|
//console.log(currentNSMap,0)
|
|
|
|
|
_copy(currentNSMap,currentNSMap={})
|
|
|
|
|
//console.log(currentNSMap,1)
|
|
|
|
|
}
|
|
|
|
|
currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
|
|
|
|
|
a.uri = 'http://www.w3.org/2000/xmlns/'
|
|
|
|
|
domBuilder.startPrefixMapping(nsPrefix, value)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
var i = el.length;
|
|
|
|
|
while(i--){
|
|
|
|
|
a = el[i];
|
|
|
|
|
var prefix = a.prefix;
|
|
|
|
|
if(prefix){//no prefix attribute has no namespace
|
|
|
|
|
if(prefix === 'xml'){
|
|
|
|
|
a.uri = 'http://www.w3.org/XML/1998/namespace';
|
|
|
|
|
}if(prefix !== 'xmlns'){
|
2017-02-07 18:20:30 +00:00
|
|
|
|
a.uri = currentNSMap[prefix || '']
|
2016-02-01 21:28:52 +00:00
|
|
|
|
|
|
|
|
|
//{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
var nsp = tagName.indexOf(':');
|
|
|
|
|
if(nsp>0){
|
|
|
|
|
prefix = el.prefix = tagName.slice(0,nsp);
|
|
|
|
|
localName = el.localName = tagName.slice(nsp+1);
|
|
|
|
|
}else{
|
|
|
|
|
prefix = null;//important!!
|
|
|
|
|
localName = el.localName = tagName;
|
|
|
|
|
}
|
|
|
|
|
//no prefix element has default namespace
|
|
|
|
|
var ns = el.uri = currentNSMap[prefix || ''];
|
|
|
|
|
domBuilder.startElement(ns,localName,tagName,el);
|
|
|
|
|
//endPrefixMapping and startPrefixMapping have not any help for dom builder
|
|
|
|
|
//localNSMap = null
|
|
|
|
|
if(el.closed){
|
|
|
|
|
domBuilder.endElement(ns,localName,tagName);
|
|
|
|
|
if(localNSMap){
|
|
|
|
|
for(prefix in localNSMap){
|
|
|
|
|
domBuilder.endPrefixMapping(prefix)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}else{
|
|
|
|
|
el.currentNSMap = currentNSMap;
|
|
|
|
|
el.localNSMap = localNSMap;
|
2017-02-07 18:20:30 +00:00
|
|
|
|
//parseStack.push(el);
|
|
|
|
|
return true;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
|
|
|
|
|
if(/^(?:script|textarea)$/i.test(tagName)){
|
|
|
|
|
var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
|
|
|
|
|
var text = source.substring(elStartEnd+1,elEndStart);
|
|
|
|
|
if(/[&<]/.test(text)){
|
|
|
|
|
if(/^script$/i.test(tagName)){
|
|
|
|
|
//if(!/\]\]>/.test(text)){
|
|
|
|
|
//lexHandler.startCDATA();
|
|
|
|
|
domBuilder.characters(text,0,text.length);
|
|
|
|
|
//lexHandler.endCDATA();
|
|
|
|
|
return elEndStart;
|
|
|
|
|
//}
|
|
|
|
|
}//}else{//text area
|
|
|
|
|
text = text.replace(/&#?\w+;/g,entityReplacer);
|
|
|
|
|
domBuilder.characters(text,0,text.length);
|
|
|
|
|
return elEndStart;
|
|
|
|
|
//}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return elStartEnd+1;
|
|
|
|
|
}
|
|
|
|
|
function fixSelfClosed(source,elStartEnd,tagName,closeMap){
|
|
|
|
|
//if(tagName in closeMap){
|
|
|
|
|
var pos = closeMap[tagName];
|
|
|
|
|
if(pos == null){
|
|
|
|
|
//console.log(tagName)
|
2017-02-07 18:20:30 +00:00
|
|
|
|
pos = source.lastIndexOf('</'+tagName+'>')
|
|
|
|
|
if(pos<elStartEnd){//忘记闭合
|
|
|
|
|
pos = source.lastIndexOf('</'+tagName)
|
|
|
|
|
}
|
|
|
|
|
closeMap[tagName] =pos
|
2016-02-01 21:28:52 +00:00
|
|
|
|
}
|
|
|
|
|
return pos<elStartEnd;
|
|
|
|
|
//}
|
|
|
|
|
}
|
|
|
|
|
function _copy(source,target){
|
|
|
|
|
for(var n in source){target[n] = source[n]}
|
|
|
|
|
}
|
|
|
|
|
function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
|
|
|
|
|
var next= source.charAt(start+2)
|
|
|
|
|
switch(next){
|
|
|
|
|
case '-':
|
|
|
|
|
if(source.charAt(start + 3) === '-'){
|
|
|
|
|
var end = source.indexOf('-->',start+4);
|
|
|
|
|
//append comment source.substring(4,end)//<!--
|
|
|
|
|
if(end>start){
|
|
|
|
|
domBuilder.comment(source,start+4,end-start-4);
|
|
|
|
|
return end+3;
|
|
|
|
|
}else{
|
|
|
|
|
errorHandler.error("Unclosed comment");
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
}else{
|
|
|
|
|
//error
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
if(source.substr(start+3,6) == 'CDATA['){
|
|
|
|
|
var end = source.indexOf(']]>',start+9);
|
|
|
|
|
domBuilder.startCDATA();
|
|
|
|
|
domBuilder.characters(source,start+9,end-start-9);
|
|
|
|
|
domBuilder.endCDATA()
|
|
|
|
|
return end+3;
|
|
|
|
|
}
|
|
|
|
|
//<!DOCTYPE
|
|
|
|
|
//startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
|
|
|
|
|
var matchs = split(source,start);
|
|
|
|
|
var len = matchs.length;
|
|
|
|
|
if(len>1 && /!doctype/i.test(matchs[0][0])){
|
|
|
|
|
var name = matchs[1][0];
|
|
|
|
|
var pubid = len>3 && /^public$/i.test(matchs[2][0]) && matchs[3][0]
|
|
|
|
|
var sysid = len>4 && matchs[4][0];
|
|
|
|
|
var lastMatch = matchs[len-1]
|
|
|
|
|
domBuilder.startDTD(name,pubid && pubid.replace(/^(['"])(.*?)\1$/,'$2'),
|
|
|
|
|
sysid && sysid.replace(/^(['"])(.*?)\1$/,'$2'));
|
|
|
|
|
domBuilder.endDTD();
|
|
|
|
|
|
|
|
|
|
return lastMatch.index+lastMatch[0].length
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function parseInstruction(source,start,domBuilder){
|
|
|
|
|
var end = source.indexOf('?>',start);
|
|
|
|
|
if(end){
|
|
|
|
|
var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
|
|
|
|
|
if(match){
|
|
|
|
|
var len = match[0].length;
|
|
|
|
|
domBuilder.processingInstruction(match[1], match[2]) ;
|
|
|
|
|
return end+2;
|
|
|
|
|
}else{//error
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param source
|
|
|
|
|
*/
|
|
|
|
|
function ElementAttributes(source){
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
ElementAttributes.prototype = {
|
|
|
|
|
setTagName:function(tagName){
|
|
|
|
|
if(!tagNamePattern.test(tagName)){
|
|
|
|
|
throw new Error('invalid tagName:'+tagName)
|
|
|
|
|
}
|
|
|
|
|
this.tagName = tagName
|
|
|
|
|
},
|
|
|
|
|
add:function(qName,value,offset){
|
|
|
|
|
if(!tagNamePattern.test(qName)){
|
|
|
|
|
throw new Error('invalid attribute:'+qName)
|
|
|
|
|
}
|
|
|
|
|
this[this.length++] = {qName:qName,value:value,offset:offset}
|
|
|
|
|
},
|
|
|
|
|
length:0,
|
|
|
|
|
getLocalName:function(i){return this[i].localName},
|
2017-02-07 18:20:30 +00:00
|
|
|
|
getLocator:function(i){return this[i].locator},
|
2016-02-01 21:28:52 +00:00
|
|
|
|
getQName:function(i){return this[i].qName},
|
|
|
|
|
getURI:function(i){return this[i].uri},
|
|
|
|
|
getValue:function(i){return this[i].value}
|
|
|
|
|
// ,getIndex:function(uri, localName)){
|
|
|
|
|
// if(localName){
|
|
|
|
|
//
|
|
|
|
|
// }else{
|
|
|
|
|
// var qName = uri
|
|
|
|
|
// }
|
|
|
|
|
// },
|
|
|
|
|
// getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
|
|
|
|
|
// getType:function(uri,localName){}
|
|
|
|
|
// getType:function(i){},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function _set_proto_(thiz,parent){
|
|
|
|
|
thiz.__proto__ = parent;
|
|
|
|
|
return thiz;
|
|
|
|
|
}
|
|
|
|
|
if(!(_set_proto_({},_set_proto_.prototype) instanceof _set_proto_)){
|
|
|
|
|
_set_proto_ = function(thiz,parent){
|
|
|
|
|
function p(){};
|
|
|
|
|
p.prototype = parent;
|
|
|
|
|
p = new p();
|
|
|
|
|
for(parent in thiz){
|
|
|
|
|
p[parent] = thiz[parent];
|
|
|
|
|
}
|
|
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function split(source,start){
|
|
|
|
|
var match;
|
|
|
|
|
var buf = [];
|
|
|
|
|
var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
|
|
|
|
|
reg.lastIndex = start;
|
|
|
|
|
reg.exec(source);//skip <
|
|
|
|
|
while(match = reg.exec(source)){
|
|
|
|
|
buf.push(match);
|
|
|
|
|
if(match[1])return buf;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-07 18:20:30 +00:00
|
|
|
|
exports.XMLReader = XMLReader;
|
2016-02-01 21:28:52 +00:00
|
|
|
|
|