Because the content is in html format, it is obviously inappropriate to directly intercept the first few characters of the content. And if you directly remove all html formats and then intercept them, you cannot achieve the desired effect. After searching online, writing the following code should be able to meet the basic requirements. (written in js because it is easy to debug)
var br = {};
br.spTags = ["img","br","hr"];/*Tags that do not need to appear in pairs*/
br.contain = function(arr,it){
for(var i=0,len=arr.length;iif(arr[i]==it){
return true;
}
}
return false;
}
br.subArtc = function(article,worldNum){
var result = [];
/*First intercept the required string*/
var wcount = 0;
var startTags = [],endTags = [];
var isInTag = false;
for(var i=0,len=article.length;ivar w = article[i];
result.push(w);
if(w=="<"){
isInTag = true;
}
if( !isInTag){
wcount ;
if(wcount==worldNum){
break;
}
}
if(w==">"){
isInTag = false;
}
}
/*Process the string*/
var j=0;
isInTag = false;
var isStartTag = true;
var tagTemp = "";
while(jw = result[j];
if(isInTag){
if(w==">" || w= =" " || w=="/"){
isInTag = false;
if(isStartTag){
startTags.push(tagTemp);
}else{
endTags.push (tagTemp);
}
tagTemp = "";
}
if(isInTag){
tagTemp =w;
}
}
if(w= ="<"){
isInTag = true;
if(result[j 1]=="/"){
isStartTag = false;
j ;
}else{
isStartTag = true;
}
}
j ;
}
/*Eliminate img, br and other tags that do not need to appear in pairs*/
var newStartTags = [] ;
for(var x=0,len=startTags.length;xif(!br.contain(br.spTags,startTags[x])){
newStartTags. push(startTags[x]);
}
}
/*Add no end tag*/
var unEndTagsCount = newStartTags.length - endTags.length;
while(unEndTagsCount>0 ){
result.push("<");
result.push("/")
result.push(newStartTags[unEndTagsCount-1]);
result.push("> ;");
unEndTagsCount--;
}
return result.join("");
};
Basic idea:
1. Bypass the mark and get the actual word count of the content. If you need to display the first 100 words of the content, bypass the mark search and get the actual index of the 100th word. Then intercept the string in front of this index.
2. Based on the obtained string, get the start tag and end tag that exist in the string. Note: The start tag here starts with "<", and the next character is not "/".
3. Eliminate the tags that do not need to appear in pairs among the start tags obtained in 2. Such as br, img, hr, etc.
4. Compare the start tag processed in 3 and the end tag obtained in 2. If there is no pairing, pair it at the appropriate position.
This function has not been rigorously tested. If you are interested, you can help test it. If you have better ideas, you can also reply to the discussion.
Author: cnblogs bravfing
Statement:The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn