Maison  >  Questions et réponses  >  le corps du texte

Comparez deux textes HTML et marquez les différences

Je veux marquer la différence entre deux textes HTML, j'ai trouvé une solution, tout fonctionne comme prévu (les fichiers supprimés reçoivent le nom de classe "del", le texte inséré reçoit le nom de classe "ins" et les styles associés), il n'y a qu'une seule chose qui ne va pas - pour une raison quelconque, le texte est marqué au mauvais endroit.

var Match,
  calculate_operations,
  consecutive_where,
  create_index,
  diff,
  find_match,
  find_matching_blocks,
  html_to_tokens,
  is_end_of_tag,
  is_start_of_tag,
  is_tag,
  is_whitespace,
  isnt_tag,
  op_map,
  define,
  recursively_find_matching_blocks,
  render_operations,
  wrap;

is_end_of_tag = function (char) {
  return char === '>';
};

is_start_of_tag = function (char) {
  return char === '<';
};

is_whitespace = function (char) {
  return /^\s+$/.test(char);
};

is_tag = function (token) {
  return /^\s*<[^>]+>\s*$/.test(token);
};

isnt_tag = function (token) {
  return !is_tag(token);
};

Match = class Match {
  constructor(start_in_before1, start_in_after1, length1) {
    console.log('lalala');

    this.start_in_before = start_in_before1;
    this.start_in_after = start_in_after1;
    this.length = length1;
    this.end_in_before = this.start_in_before + this.length - 1;
    this.end_in_after = this.start_in_after + this.length - 1;
  }
};

html_to_tokens = function (html) {
  var char, current_word, i, len, mode, words, letters;
  mode = 'char';
  current_word = '';
  // letters = [];
  words = [];
  for (i = 0, len = html.length; i < len; i++) {
    char = html[i];
    // letters.push(char);
    switch (mode) {
      case 'tag':
        if (is_end_of_tag(char)) {
          current_word += '>';
          words.push(current_word);
          current_word = '';
          if (is_whitespace(char)) {
            mode = 'whitespace';
          } else {
            mode = 'char';
          }
        } else {
          current_word += char;
        }
        break;
      case 'char':
        if (is_start_of_tag(char)) {
          if (current_word) {
            words.push(current_word);
          }
          current_word = '<';
          mode = 'tag';
        } else if (/\s/.test(char)) {
          if (current_word) {
            words.push(current_word);
          }
          current_word = char;
          mode = 'whitespace';
        } else if (/[\w\#@]+/i.test(char)) {
          current_word += char;
        } else {
          if (current_word) {
            words.push(current_word);
          }
          current_word = char;
        }
        break;
      case 'whitespace':
        if (is_start_of_tag(char)) {
          if (current_word) {
            words.push(current_word);
          }
          current_word = '<';
          mode = 'tag';
        } else if (is_whitespace(char)) {
          current_word += char;
        } else {
          if (current_word) {
            words.push(current_word);
          }
          current_word = char;
          mode = 'char';
        }
        break;
      default:
        throw new Error(`Unknown mode ${mode}`);
    }
  }
  if (current_word) {
    words.push(current_word);
  }

  return words;
};

find_match = function (
  before_tokens,
  after_tokens,
  index_of_before_locations_in_after_tokens,
  start_in_before,
  end_in_before,
  start_in_after,
  end_in_after
) {
  var best_match_in_after,
    best_match_in_before,
    best_match_length,
    i,
    index_in_after,
    index_in_before,
    j,
    len,
    locations_in_after,
    looking_for,
    match,
    match_length_at,
    new_match_length,
    new_match_length_at,
    ref,
    ref1;
  best_match_in_before = start_in_before;
  best_match_in_after = start_in_after;
  best_match_length = 0;
  match_length_at = {};
  for (
    index_in_before = i = ref = start_in_before, ref1 = end_in_before;
    ref <= ref1 ? i < ref1 : i > ref1;
    index_in_before = ref <= ref1 ? ++i : --i
  ) {
    new_match_length_at = {};
    looking_for = before_tokens[index_in_before];
    locations_in_after = index_of_before_locations_in_after_tokens[looking_for];

    for (j = 0, len = locations_in_after.length; j < len; j++) {
      index_in_after = locations_in_after[j];
      if (index_in_after < start_in_after) {
        continue;
      }
      if (index_in_after >= end_in_after) {
        break;
      }
      if (match_length_at[index_in_after - 1] == null) {
        match_length_at[index_in_after - 1] = 0;
      }
      new_match_length = match_length_at[index_in_after - 1] + 1;
      new_match_length_at[index_in_after] = new_match_length;
      if (new_match_length > best_match_length) {
        best_match_in_before = index_in_before - new_match_length + 1;
        best_match_in_after = index_in_after - new_match_length + 1;
        best_match_length = new_match_length;
      }
    }
    match_length_at = new_match_length_at;
  }
  if (best_match_length !== 0) {
    match = new Match(
      best_match_in_before,
      best_match_in_after,
      best_match_length
    );
  }
  return match;
};

recursively_find_matching_blocks = function (
  before_tokens,
  after_tokens,
  index_of_before_locations_in_after_tokens,
  start_in_before,
  end_in_before,
  start_in_after,
  end_in_after,
  matching_blocks
) {
  var match;
  match = find_match(
    before_tokens,
    after_tokens,
    index_of_before_locations_in_after_tokens,
    start_in_before,
    end_in_before,
    start_in_after,
    end_in_after
  );
  if (match != null) {
    if (
      start_in_before < match.start_in_before &&
      start_in_after < match.start_in_after
    ) {
      recursively_find_matching_blocks(
        before_tokens,
        after_tokens,
        index_of_before_locations_in_after_tokens,
        start_in_before,
        match.start_in_before,
        start_in_after,
        match.start_in_after,
        matching_blocks
      );
    }
    matching_blocks.push(match);
    if (
      match.end_in_before <= end_in_before &&
      match.end_in_after <= end_in_after
    ) {
      recursively_find_matching_blocks(
        before_tokens,
        after_tokens,
        index_of_before_locations_in_after_tokens,
        match.end_in_before + 1,
        end_in_before,
        match.end_in_after + 1,
        end_in_after,
        matching_blocks
      );
    }
  }
  return matching_blocks;
};

create_index = function (p) {
  var i, idx, index, len, ref, token;
  if (p.find_these == null) {
    throw new Error('params must have find_these key');
  }
  if (p.in_these == null) {
    throw new Error('params must have in_these key');
  }
  index = {};
  const arr = html_to_tokens(p.find_these);
  const arr_number = [];
  for (var i = 0; i < arr.length; i++) {
    arr_number.push(arr[i].length);
  }
  ref = p.find_these;

  for (i = 0, len = ref.length; i < len; i++) {
    token = ref[i];
    index[token] = [];
    idx = p.in_these.indexOf(token);
    while (idx !== -1) {
      index[token].push(idx);
      idx = p.in_these.indexOf(token, idx + 1);
    }
  }
  console.log(token, 'token');
  console.log(arr_number, 'arr_number');
  return index;
};

find_matching_blocks = function (before_tokens, after_tokens) {
  var index_of_before_locations_in_after_tokens, matching_blocks;
  matching_blocks = [];
  index_of_before_locations_in_after_tokens = create_index({
    find_these: before_tokens,
    in_these: after_tokens,
  });
  console.log(
    index_of_before_locations_in_after_tokens,
    'index_of_before_locations_in_after_tokens '
  );
  return recursively_find_matching_blocks(
    before_tokens,
    after_tokens,
    index_of_before_locations_in_after_tokens,
    0,
    before_tokens.length,
    0,
    after_tokens.length,
    matching_blocks
  );
};

calculate_operations = function (before_tokens, after_tokens) {
  var action_map,
    action_up_to_match_positions,
    i,
    index,
    is_single_whitespace,
    j,
    last_op,
    len,
    len1,
    match,
    match_starts_at_current_position_in_after,
    match_starts_at_current_position_in_before,
    matches,
    op,
    operations,
    position_in_after,
    position_in_before,
    post_processed;
  if (before_tokens == null) {
    throw new Error('before_tokens?');
  }
  if (after_tokens == null) {
    throw new Error('after_tokens?');
  }
  position_in_before = position_in_after = 0;
  operations = [];
  action_map = {
    'false,false': 'replace',
    'true,false': 'insert',
    'false,true': 'delete',
    'true,true': 'none',
  };
  matches = find_matching_blocks(before_tokens, after_tokens);
  console.log(matches, 'matches');
  matches.push(new Match(before_tokens.length, after_tokens.length, 0));
  for (index = i = 0, len = matches.length; i < len; index = ++i) {
    match = matches[index];
    match_starts_at_current_position_in_before =
      position_in_before === match.start_in_before;
    match_starts_at_current_position_in_after =
      position_in_after === match.start_in_after;
    action_up_to_match_positions =
      action_map[
        [
          match_starts_at_current_position_in_before,
          match_starts_at_current_position_in_after,
        ].toString()
      ];
    if (action_up_to_match_positions !== 'none') {
      operations.push({
        action: action_up_to_match_positions,
        start_in_before: position_in_before,
        end_in_before:
          action_up_to_match_positions !== 'insert'
            ? match.start_in_before - 1
            : void 0,
        start_in_after: position_in_after,
        end_in_after:
          action_up_to_match_positions !== 'delete'
            ? match.start_in_after - 1
            : void 0,
      });
    }
    if (match.length !== 0) {
      operations.push({
        action: 'equal',
        start_in_before: match.start_in_before,
        end_in_before: match.end_in_before,
        start_in_after: match.start_in_after,
        end_in_after: match.end_in_after,
      });
    }
    position_in_before = match.end_in_before + 1;
    position_in_after = match.end_in_after + 1;
  }
  post_processed = [];
  last_op = {
    action: 'none',
  };
  is_single_whitespace = function (op) {
    if (op.action !== 'equal') {
      return false;
    }
    if (op.end_in_before - op.start_in_before !== 0) {
      return false;
    }
    return /^\s$/.test(
      before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9)
    );
  };
  for (j = 0, len1 = operations.length; j < len1; j++) {
    op = operations[j];
    if (
      (is_single_whitespace(op) && last_op.action === 'replace') ||
      (op.action === 'replace' && last_op.action === 'replace')
    ) {
      last_op.end_in_before = op.end_in_before;
      last_op.end_in_after = op.end_in_after;
    } else {
      post_processed.push(op);
      last_op = op;
    }
  }
  return post_processed;
};

consecutive_where = function (start, content, predicate) {
  var answer, i, index, last_matching_index, len, token;
  content = content.slice(start, +content.length + 1 || 9e9);
  // console.log(content, 'content');
  // console.log(predicate, 'predicate');
  // console.log(start, 'start');

  last_matching_index = void 0;
  for (index = i = 0, len = content.length; i < len; index = ++i) {
    token = content[index];
    answer = predicate(token);
    if (answer === true) {
      last_matching_index = index;
    }
    if (answer === false) {
      break;
    }
  }
  if (last_matching_index != null) {
    return content.slice(0, +last_matching_index + 1 || 9e9);
  }
  return [];
};

wrap = function (tag, content) {
  var length, non_tags, position, rendering, tags;
  rendering = '';
  position = 0;
  length = content.length;
  while (true) {
    if (position >= length) {
      break;
    }
    non_tags = consecutive_where(position, content, isnt_tag);
    position += non_tags.length;
    if (non_tags.length !== 0) {
      rendering += `<${tag}>${non_tags.join('')}</${tag}>`;
    }
    if (position >= length) {
      break;
    }
    tags = consecutive_where(position, content, is_tag);
    position += tags.length;
    rendering += tags.join('');
  }
  return rendering;
};

op_map = {
  equal: function (op, before_tokens, after_tokens) {
    return before_tokens
      .slice(op.start_in_before, +op.end_in_before + 1 || 9e9)
      .join('');
  },
  insert: function (op, before_tokens, after_tokens) {
    var val;
    const token_length = after_tokens.map(element => element.length);
    val = after_tokens.slice(op.start_in_after, op.end_in_after + 1 || 9e9);

    return wrap('ins', val);
  },
  delete: function (op, before_tokens, after_tokens) {
    var val;
    val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9);

    return wrap('del', val);
  },
};

op_map.replace = function (op, before_tokens, after_tokens) {
  return (
    op_map.delete(op, before_tokens, after_tokens) +
    op_map.insert(op, before_tokens, after_tokens)
  );
};

render_operations = function (before_tokens, after_tokens, operations) {
  var i, len, op, rendering;
  rendering = '';
  console.log(operations.length, 'operations.length');
  console.log(operations, 'operations');
  for (i = 0, len = operations.length; i < len; i++) {
    op = operations[i];

    rendering += op_map[op.action](op, before_tokens, after_tokens);
  }
  return rendering;
};

diff = function (before, after) {
  var ops;
  if (before === after) {
    return before;
  }
  before = html_to_tokens(before);
  after = html_to_tokens(after);
  ops = calculate_operations(before, after);
  return render_operations(before, after, ops);
};

diff.html_to_tokens = html_to_tokens;

diff.find_matching_blocks = find_matching_blocks;

find_matching_blocks.find_match = find_match;

find_matching_blocks.create_index = create_index;

diff.calculate_operations = calculate_operations;

diff.render_operations = render_operations;

if (typeof define === 'function') {
  define([], function () {
    return diff;
  });
} else if (typeof module !== 'undefined' && module !== null) {
  module.exports = diff;
} else {
  this.htmldiff = diff;
}

Appelez le code que j'ai écrit :

let oldTxt = Match.html_to_tokens(a);
    let newTxt = Match.html_to_tokens(b);
    let ops = Match.calculate_operations(a, b);
    const render = Match.render_operations(newTxt, oldTxt, ops);
    document.getElementById('output').innerHTML = render;

a est le premier code HTML, b est le code HTML modifié (non stringifié)

Des idées sur la façon de procéder ? Merci pour votre aide!

Je sais que l'erreur se trouve quelque part dans la fonction "create_index". Parce que je calcule la longueur du texte et que je la compare ensuite à la longueur du texte, chaque balise/mot compte pour un.

Code Sandbox

P粉478445671P粉478445671377 Il y a quelques jours430

répondre à tous(1)je répondrai

  • P粉677573079

    P粉6775730792023-09-11 13:45:31

    Je l'ai résolu ! Pour tous ceux qui souhaitent utiliser ce code, activez simplement cette partie :

    let oldTxt = Match.html_to_tokens(a);
        let newTxt = Match.html_to_tokens(b);
        let ops = Match.calculate_operations(a, b);
        const render = Match.render_operations(newTxt, oldTxt, ops);
        document.getElementById('output').innerHTML = render;

    À ceci :

        a = Match.html_to_tokens(a);
        b = Match.html_to_tokens(b);
        let ops = Match.calculate_operations(a, b);
        console.log(ops, 'ops');
        const render = Match.render_operations(a, b, ops);
        document.getElementById('output').innerHTML = render;

    où a et b sont deux textes html : a (premier), b (changement)

    Vous pouvez essayer le nouveau code ici : CodeSandbox résolu !

    répondre
    0
  • Annulerrépondre