The xmlParseXMLDecl function in parser.c in libxml2 before 2.9.3 allows context-dependent attackers to obtain sensitive information via an (1) unterminated encoding value or (2) incomplete XML declaration in XML data, which triggers an out-of-bounds heap read.
/**
* xmlParseEncName:
* @ctxt: an XML parser context
*
* parse the XML encoding name
*
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
*
* Returns the encoding name value or NULL
*/xmlChar*xmlParseEncName(xmlParserCtxtPtrctxt){xmlChar*buf=NULL;intlen=0;intsize=10;xmlCharcur;cur=CUR;if(((cur>='a')&&(cur<='z'))||((cur>='A')&&(cur<='Z'))){buf=(xmlChar*)xmlMallocAtomic(size*sizeof(xmlChar));if(buf==NULL){xmlErrMemory(ctxt,NULL);return(NULL);}buf[len++]=cur;NEXT;cur=CUR;while(((cur>='a')&&(cur<='z'))||((cur>='A')&&(cur<='Z'))||((cur>='0')&&(cur<='9'))||(cur=='.')||(cur=='_')||(cur=='-')){if(len+1>=size){xmlChar*tmp;size*=2;tmp=(xmlChar*)xmlRealloc(buf,size*sizeof(xmlChar));if(tmp==NULL){xmlErrMemory(ctxt,NULL);xmlFree(buf);return(NULL);}buf=tmp;}buf[len++]=cur;NEXT;cur=CUR;if(cur==0){SHRINK;GROW;cur=CUR;}}buf[len]=0;}else{xmlFatalErr(ctxt,XML_ERR_ENCODING_NAME,NULL);}return(buf);}
constxmlChar*xmlParseEncodingDecl(xmlParserCtxtPtrctxt){// ...
/*
* UTF-16 encoding stwich has already taken place at this stage,
* more over the little-endian/big-endian selection is already done
*/if((encoding!=NULL)&&((!xmlStrcasecmp(encoding,BAD_CAST"UTF-16"))||(!xmlStrcasecmp(encoding,BAD_CAST"UTF16")))){/*
* If no encoding was passed to the parser, that we are
* using UTF-16 and no decoder is present i.e. the
* document is apparently UTF-8 compatible, then raise an
* encoding mismatch fatal error
*/if((ctxt->encoding==NULL)&&(ctxt->input->buf!=NULL)&&(ctxt->input->buf->encoder==NULL)){xmlFatalErrMsg(ctxt,XML_ERR_INVALID_ENCODING,"Document labelled UTF-16 but has UTF-8 content\n");}if(ctxt->encoding!=NULL)xmlFree((xmlChar*)ctxt->encoding);ctxt->encoding=encoding;}/*
* UTF-8 encoding is handled natively
*/elseif((encoding!=NULL)&&((!xmlStrcasecmp(encoding,BAD_CAST"UTF-8"))||(!xmlStrcasecmp(encoding,BAD_CAST"UTF8")))){if(ctxt->encoding!=NULL)xmlFree((xmlChar*)ctxt->encoding);ctxt->encoding=encoding;}elseif(encoding!=NULL){xmlCharEncodingHandlerPtrhandler;if(ctxt->input->encoding!=NULL)xmlFree((xmlChar*)ctxt->input->encoding);ctxt->input->encoding=encoding;handler=xmlFindCharEncodingHandler((constchar*)encoding);if(handler!=NULL){xmlSwitchToEncoding(ctxt,handler);}else{xmlFatalErrMsgStr(ctxt,XML_ERR_UNSUPPORTED_ENCODING,"Unsupported encoding %s\n",encoding);return(NULL);}}// ...
return(encoding);}
intxmlSwitchToEncoding(xmlParserCtxtPtrctxt,xmlCharEncodingHandlerPtrhandler){return(xmlSwitchToEncodingInt(ctxt,handler,-1));}staticintxmlSwitchToEncodingInt(xmlParserCtxtPtrctxt,xmlCharEncodingHandlerPtrhandler,intlen){intret=0;if(handler!=NULL){if(ctxt->input!=NULL){ret=xmlSwitchInputEncodingInt(ctxt,ctxt->input,handler,len);}else{xmlErrInternal(ctxt,"xmlSwitchToEncoding : no input\n",NULL);return(-1);}/*
* The parsing is now done in UTF8 natively
*/ctxt->charset=XML_CHAR_ENCODING_UTF8;}elsereturn(-1);return(ret);}
staticintxmlSwitchInputEncodingInt(xmlParserCtxtPtrctxt,xmlParserInputPtrinput,xmlCharEncodingHandlerPtrhandler,intlen){intnbchars;if(handler==NULL)return(-1);if(input==NULL)return(-1);if(input->buf!=NULL){if(input->buf->encoder!=NULL){/*
* Check in case the auto encoding detetection triggered
* in already.
*/if(input->buf->encoder==handler)return(0);/*
* "UTF-16" can be used for both LE and BE
if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
BAD_CAST "UTF-16", 6)) &&
(!xmlStrncmp(BAD_CAST handler->name,
BAD_CAST "UTF-16", 6))) {
return(0);
}
*//*
* Note: this is a bit dangerous, but that's what it
* takes to use nearly compatible signature for different
* encodings.
*/xmlCharEncCloseFunc(input->buf->encoder);input->buf->encoder=handler;return(0);}input->buf->encoder=handler;// ...
/*
* Is there already some content down the pipe to convert ?
*/if(xmlBufIsEmpty(input->buf->buffer)==0){intprocessed;unsignedintuse;/*
* Specific handling of the Byte Order Mark for
* UTF-16
*/if((handler->name!=NULL)&&(!strcmp(handler->name,"UTF-16LE")||!strcmp(handler->name,"UTF-16"))&&(input->cur[0]==0xFF)&&(input->cur[1]==0xFE)){input->cur+=2;}if((handler->name!=NULL)&&(!strcmp(handler->name,"UTF-16BE"))&&(input->cur[0]==0xFE)&&(input->cur[1]==0xFF)){input->cur+=2;}/*
* Errata on XML-1.0 June 20 2001
* Specific handling of the Byte Order Mark for
* UTF-8
*/if((handler->name!=NULL)&&(!strcmp(handler->name,"UTF-8"))&&(input->cur[0]==0xEF)&&(input->cur[1]==0xBB)&&(input->cur[2]==0xBF)){input->cur+=3;}/*
* Shrink the current input buffer.
* Move it as the raw buffer and create a new input buffer
*/processed=input->cur-input->base;xmlBufShrink(input->buf->buffer,processed);input->buf->raw=input->buf->buffer;input->buf->buffer=xmlBufCreate();input->buf->rawconsumed=processed;use=xmlBufUse(input->buf->raw);
/**
* xmlBufCreate:
*
* routine to create an XML buffer.
* returns the new structure.
*/xmlBufPtrxmlBufCreate(void){xmlBufPtrret;ret=(xmlBufPtr)xmlMalloc(sizeof(xmlBuf));if(ret==NULL){xmlBufMemoryError(NULL,"creating buffer");return(NULL);}ret->compat_use=0;ret->use=0;ret->error=0;ret->buffer=NULL;ret->size=xmlDefaultBufferSize;ret->compat_size=xmlDefaultBufferSize;ret->alloc=xmlBufferAllocScheme;ret->content=(xmlChar*)xmlMallocAtomic(ret->size*sizeof(xmlChar));if(ret->content==NULL){xmlBufMemoryError(ret,"creating buffer");xmlFree(ret);return(NULL);}ret->content[0]=0;ret->contentIO=NULL;return(ret);}
// ...
if(ctxt->html){/*
* convert as much as possible of the buffer
*/nbchars=xmlCharEncInput(input->buf,1);}else{/*
* convert just enough to get
* '<?xml version="1.0" encoding="xxx"?>'
* parsed with the autodetected encoding
* into the parser reading buffer.
*/nbchars=xmlCharEncFirstLineInput(input->buf,len);}if(nbchars<0){xmlErrInternal(ctxt,"switching encoding: encoder error\n",NULL);return(-1);}input->buf->rawconsumed+=use-xmlBufUse(input->buf->raw);xmlBufResetInput(input->buf->buffer,input);}return(0);}elseif(input->length==0){/*
* When parsing a static memory array one must know the
* size to be able to convert the buffer.
*/xmlErrInternal(ctxt,"switching encoding : no input\n",NULL);return(-1);}return(0);}
intxmlCharEncFirstLineInput(xmlParserInputBufferPtrinput,intlen){intret=-2;size_twritten;size_ttoconv;intc_in;intc_out;xmlBufPtrin;xmlBufPtrout;if((input==NULL)||(input->encoder==NULL)||(input->buffer==NULL)||(input->raw==NULL))return(-1);out=input->buffer;in=input->raw;toconv=xmlBufUse(in);if(toconv==0)return(0);written=xmlBufAvail(out)-1;/* count '\0' *//*
* echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
* 45 chars should be sufficient to reach the end of the encoding
* declaration without going too far inside the document content.
* on UTF-16 this means 90bytes, on UCS4 this means 180
* The actual value depending on guessed encoding is passed as @len
* if provided
*/if(len>=0){if(toconv>(unsignedint)len)toconv=len;}else{if(toconv>180)toconv=180;}if(toconv*2>=written){xmlBufGrow(out,toconv*2);written=xmlBufAvail(out)-1;}if(written>360)written=360;c_in=toconv;c_out=written;if(input->encoder->input!=NULL){ret=input->encoder->input(xmlBufEnd(out),&c_out,xmlBufContent(in),&c_in);xmlBufShrink(in,c_in);xmlBufAddLen(out,c_out);}#ifdef LIBXML_ICONV_ENABLED
elseif(input->encoder->iconv_in!=NULL){ret=xmlIconvWrapper(input->encoder->iconv_in,xmlBufEnd(out),&c_out,xmlBufContent(in),&c_in);xmlBufShrink(in,c_in);xmlBufAddLen(out,c_out);if(ret==-1)ret=-3;}#endif /* LIBXML_ICONV_ENABLED */#ifdef LIBXML_ICU_ENABLED
elseif(input->encoder->uconv_in!=NULL){ret=xmlUconvWrapper(input->encoder->uconv_in,1,xmlBufEnd(out),&c_out,xmlBufContent(in),&c_in);xmlBufShrink(in,c_in);xmlBufAddLen(out,c_out);if(ret==-1)ret=-3;}#endif /* LIBXML_ICU_ENABLED */switch(ret){case0:#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input\n",c_in,c_out);#endif
break;case-1:#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",c_in,c_out,(int)xmlBufUse(in));#endif
break;case-3:#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",c_in,c_out,(int)xmlBufUse(in));#endif
break;case-2:{charbuf[50];constxmlChar*content=xmlBufContent(in);snprintf(&buf[0],49,"0x%02X 0x%02X 0x%02X 0x%02X",content[0],content[1],content[2],content[3]);buf[49]=0;xmlEncodingErr(XML_I18N_CONV_FAILED,"input conversion failed due to input error, bytes %s\n",buf);}}/*
* Ignore when input buffer is not on a boundary
*/if(ret==-3)ret=0;if(ret==-1)ret=0;return(ret);}
voidxmlParseXMLDecl(xmlParserCtxtPtrctxt){// ...
xmlParseEncodingDecl(ctxt);if(ctxt->errNo==XML_ERR_UNSUPPORTED_ENCODING){/*
* The XML REC instructs us to stop parsing right here
*/return;}/*
* We may have the standalone status.
*/if((ctxt->input->encoding!=NULL)&&(!IS_BLANK_CH(RAW))){if((RAW=='?')&&(NXT(1)=='>')){SKIP(2);return;}xmlFatalErrMsg(ctxt,XML_ERR_SPACE_REQUIRED,"Blank needed here\n");}/*
* We can grow the input buffer freely at that point
*/GROW;// ...
}
intxmlParserInputGrow(xmlParserInputPtrin,intlen){// ...
content=xmlBufContent(in->buf->buffer);if(in->base!=content){/*
* the buffer has been reallocated
*/indx=in->cur-in->base;in->base=content;in->cur=&content[indx];}in->end=xmlBufEnd(in->buf->buffer);CHECK_BUFFER(in);return(ret);}
就是这个地方, 将 ctxt->input 的缓冲区换到了之前申请的 ctxt->input->buf->buffer->content 上, 同时处理了 cur 和 end. 而这个 cur 是拿之前的 cur - base 算的, end 却是新申请的 buffer 算的:
/*
* xmlDictComputeFastQKey:
*
* Calculate a hash key for two strings using a fast hash function
* that works well for low hash table fill.
*
* Neither of the two strings must be NULL.
*/staticunsignedlongxmlDictComputeFastQKey(constxmlChar*prefix,intplen,constxmlChar*name,intlen,intseed){unsignedlongvalue=(unsignedlong)seed;if(plen==0)value+=30*(unsignedlong)':';elsevalue+=30*(*prefix);if(len>10){value+=name[len-(plen+1+1)];len=10;if(plen>10)plen=10;}// ...
}
/**
* xmlDictQLookup:
* @dict: the dictionnary
* @prefix: the prefix
* @name: the name
*
* Add the QName @prefix:@name to the hash @dict if not present.
*
* Returns the internal copy of the QName or NULL in case of internal error
*/constxmlChar*xmlDictQLookup(xmlDictPtrdict,constxmlChar*prefix,constxmlChar*name){unsignedlongokey,key,nbi=0;xmlDictEntryPtrentry;xmlDictEntryPtrinsert;constxmlChar*ret;unsignedintlen,plen,l;if((dict==NULL)||(name==NULL))return(NULL);if(prefix==NULL)return(xmlDictLookup(dict,name,-1));l=len=strlen((constchar*)name);plen=strlen((constchar*)prefix);len+=1+plen;/*
* Check for duplicate and insertion location.
*/okey=xmlDictComputeQKey(dict,prefix,plen,name,l);key=okey%dict->size;if(dict->dict[key].valid==0){insert=NULL;}else{for(insert=&(dict->dict[key]);insert->next!=NULL;insert=insert->next){if((insert->okey==okey)&&(insert->len==len)&&(xmlStrQEqual(prefix,name,insert->name)))return(insert->name);nbi++;}if((insert->okey==okey)&&(insert->len==len)&&(xmlStrQEqual(prefix,name,insert->name)))return(insert->name);}// ...
}