ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

HTML文本截取库

2021-07-05 01:03:49  阅读:167  来源: 互联网

标签:cut 截取 else HTML limit charArr 文本 options row


/**
 * [trimHtml description]
 * HTML文本截取库
 * @param  {String} html
 * @param  {Object} options
 * @return {Object}
 */
export const trimHtml = (html, options) => {

  options = options || {};

  var limit = options.limit || 100,
    preserveTags = (typeof options.preserveTags !== 'undefined') ? options.preserveTags : true,
    wordBreak = (typeof options.wordBreak !== 'undefined') ? options.wordBreak : false,
    suffix = options.suffix || '...',
    moreLink = options.moreLink || '',
    moreText = options.moreText || '»',
    preserveWhiteSpace = options.preserveWhiteSpace || false;


  var arr = html.replace(/</g, "\n<")
    .replace(/>/g, ">\n")
    .replace(/\n\n/g, "\n")
    .replace(/^\n/g, "")
    .replace(/\n$/g, "")
    .split("\n");


  var sum = 0,
    row, cut, add,
    tagMatch,
    tagName,
    tagStack = [],
    more = false;

  for (var i = 0; i < arr.length; i++) {

    row = arr[i];
    let rowCut = "";

    // count multiple spaces as one character
    if (!preserveWhiteSpace) {
      rowCut = row.replace(/[ ]+/g, ' ');
    } else {
      rowCut = row;
    }

    if (!row.length) {
      continue;
    }

    var charArr = getCharArr(rowCut);

    if (row[0] !== "<") {

      if (sum >= limit) {
        row = "";
      } else if ((sum + charArr.length) >= limit) {

        cut = limit - sum;

        if (charArr[cut - 1] === ' ') {
          while (cut) {
            cut -= 1;
            if (charArr[cut - 1] !== ' ') {
              break;
            }
          }
        } else {

          add = charArr.slice(cut).indexOf(' ');

          // break on halh of word
          if (!wordBreak) {
            if (add !== -1) {
              cut += add;
            } else {
              cut = row.length;
            }
          }
        }

        row = charArr.slice(0, cut).join('') + suffix;

        if (moreLink) {
          row += '<a href="' + moreLink + '" style="display:inline">' + moreText + '</a>';
        }

        sum = limit;
        more = true;
      } else {
        sum += charArr.length;
      }
    } else if (!preserveTags) {
      row = '';
    } else if (sum >= limit) {

      tagMatch = row.match(/[a-zA-Z]+/);
      tagName = tagMatch ? tagMatch[0] : '';

      if (tagName) {
        if (row.substring(0, 2) !== '</') {

          tagStack.push(tagName);
          row = '';
        } else {

          while (tagStack[tagStack.length - 1] !== tagName && tagStack.length) {
            tagStack.pop();
          }

          if (tagStack.length) {
            row = '';
          }

          tagStack.pop();
        }
      } else {
        row = '';
      }
    }

    arr[i] = row;
  }

  return {
    html: arr.join("\n").replace(/\n/g, " ").replaceAll("  ", " ").replaceAll("  ", " "),
    more: more
  };
};

// count symbols like one char
function getCharArr(rowCut) {
  var charArr = [],
    subRow,
    match,
    char;

  for (var i = 0; i < rowCut.length; i++) {

    subRow = rowCut.substring(i);
    match = subRow.match(/^&[a-z0-9#]+;/);

    if (match) {
      char = match[0];
      charArr.push(char);
      i += (char.length - 1);
    } else {
      charArr.push(rowCut[i]);
    }
  }

  return charArr;
}

  

标签:cut,截取,else,HTML,limit,charArr,文本,options,row
来源: https://www.cnblogs.com/amanda-man/p/14970546.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有