/*
 * libci_diff, a library to compute a binary copy/insert diff.
 * Copyright (C) 2001 Peter Seiderer <Peter.Seiderer@ciselant.de>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.

 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include "ci_diff.h"
#include "ci_buf.h"
#include "ci_hash.h"
#include <zlib.h>

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#define TMP_BUF_LEN        64
#define HASH_TABLE_SIZE  4096
#define JUNK_SIZE          4

typedef struct ci_copy_cmd_s {
  size_t  begin;
  size_t  end;
} ci_copy_cmd_t;

typedef struct ci_insert_cmd_s {
  void    *buf;
  size_t  buf_len;
} ci_insert_cmd_t;

typedef struct ci_cmd_s {
  char type;  /* c: copy, i: insert */ 
  union {
    ci_copy_cmd_t    copy;
    ci_insert_cmd_t  insert;
  } cmd;
  struct ci_cmd_s *next;
} ci_cmd_t;


static int _ci_diff_b( ci_cmd_t **delta_cmd,
		       void *s1,
		       size_t n1,
		       void *s2,
		       size_t n2 );

static void _ci_find_match( ci_cmd_t *cmd,
			    void *s1,
			    size_t n1,
			    ci_hash_table_t *hash_table,
			    void *s2,
			    size_t n2,
			    size_t index );


static int _ci_delta_to_b( void **delta,
			   size_t *delta_len,
			   ci_cmd_t *delta_cmd );


static int _ci_delta_to_s( char **delta,
			   size_t *delta_len,
			   ci_cmd_t *delta_cmd );


static void _ci_free_cmd( ci_cmd_t *delta_cmd );
static void _ci_free_cmd_list(ci_cmd_t *cmd_list);

static ci_cmd_t *_ci_add_copy_cmd(ci_cmd_t *cmd, size_t begin, size_t end);
static ci_cmd_t *_ci_add_insert_cmd(ci_cmd_t *cmd, void *s, size_t n);
static ci_cmd_t *_ci_append_insert_cmd(ci_cmd_t *cmd, void *s, size_t n);

static int _ci_b_to_cmd(ci_cmd_t **cmd_list, void *delta, size_t delta_len);
static int _ci_s_to_cmd(ci_cmd_t **cmd_list, char *delta, size_t delta_len);

static int _ci_apply_cmd_to_b(void **result,
			      size_t *result_len,
			      void *s2,
			      size_t n2,
			      ci_cmd_t *cmd_list);


int ci_diff_b( void **delta,
	       size_t *delta_len,
	       void *s1,
	       size_t n1,
	       void *s2,
	       size_t n2 )
{
  int ret;
  ci_cmd_t *delta_cmd;

  if ( (ret = _ci_diff_b(&delta_cmd, s1, n1, s2, n2)) != 0) {
    return ret;
  }

  return _ci_delta_to_b(delta, delta_len, delta_cmd);
}

int ci_diff_s( char **delta,
	       char *s1,
	       char *s2 )
{
  int ret;
  ci_cmd_t *delta_cmd;
  size_t delta_len;
  
  if ( (ret = _ci_diff_b(&delta_cmd, s1, strlen(s1), s2, strlen(s2))) != 0) {
    return ret;
  }

  return _ci_delta_to_s(delta, &delta_len, delta_cmd);
}

int ci_diff_sn( char **delta,
		size_t *delta_len,
		char *s1,
		size_t n1,
		char *s2,
		size_t n2 )
{
  int ret;
  ci_cmd_t *delta_cmd;

  if ( (ret = _ci_diff_b(&delta_cmd, s1, n1, s2, n2)) != 0) {
    return ret;
  }

  return _ci_delta_to_s(delta, delta_len, delta_cmd);
}

static int _ci_diff_b( ci_cmd_t **delta_cmd,
		       void *s1,
		       size_t n1,
		       void *s2,
		       size_t n2 )
{
  ci_cmd_t *cmd_list;
  ci_cmd_t *last_cmd;
  ci_cmd_t *tmp_cmd;

  size_t index;
  ci_hash_table_t *hash_table;
  if ( (hash_table = ci_new_hash(HASH_TABLE_SIZE)) == NULL) {
    return ENOMEM;
  }
  index = 0;
  while ( (index+JUNK_SIZE) < n1) {
    int ret;
    uLong adler = adler32(0L, Z_NULL, 0);
    adler = adler32(adler, s1 + index, JUNK_SIZE);
    printf("i: %03d adler: %ld\n", index, adler);
    if ( (ret = ci_insert_hash(hash_table, adler, index)) != 0) {
      ci_free_hash(hash_table);
      return ret;
    }
    index += JUNK_SIZE;
  }

  index = 0;
  cmd_list = NULL;
  last_cmd = NULL;

  while (index < n2 ) {
    ci_cmd_t cmd;
    _ci_find_match(&cmd, s1, n1, hash_table, s2, n2, index);
    switch(cmd.type) {
    case 'c':
      index += cmd.cmd.copy.end- cmd.cmd.copy.begin;
      break;
    case 'i':
      index += cmd.cmd.insert.buf_len;
      break;
    default:
      /* this should never happen */
    }
    if (last_cmd != NULL && last_cmd->type == 'i' && cmd.type == 'i') {
      
      if (_ci_append_insert_cmd(last_cmd, cmd.cmd.insert.buf, cmd.cmd.insert.buf_len) == NULL) {
	ci_free_hash(hash_table);
	_ci_free_cmd_list(cmd_list);
	return ENOMEM;
      }
    } else {
      switch(cmd.type) {
      case 'c':
	tmp_cmd = _ci_add_copy_cmd(last_cmd, cmd.cmd.copy.begin, cmd.cmd.copy.end);
	break;
      case 'i':
	tmp_cmd = _ci_add_insert_cmd(last_cmd, cmd.cmd.insert.buf, cmd.cmd.insert.buf_len);
	break;
      default:
	/* this should never happen */
      }
      if (tmp_cmd == NULL) {
	ci_free_hash(hash_table);
	_ci_free_cmd_list(cmd_list);	
	return ENOMEM;
      }
      last_cmd = tmp_cmd;
    }
    if (cmd_list == NULL) {
      cmd_list = tmp_cmd;
    } 
  }

  ci_free_hash(hash_table);

  *delta_cmd = cmd_list;
  return 0;
}

static void _ci_find_match( ci_cmd_t *cmd,
			    void *s1,
			    size_t n1,
			    ci_hash_table_t *hash_table,
			    void *s2,
			    size_t n2,
			    size_t index )
{
  ci_hash_entry_t *hash_entry;
  uLong adler;
  size_t i_s1;
  size_t i_s2;
  size_t copy_len;
  
  if ( (n2 - index) < JUNK_SIZE ) {
    cmd->type = 'i';
    cmd->cmd.insert.buf = s2 + index;
    cmd->cmd.insert.buf_len = n2 - index;
    return;
  }
  
  adler = adler32(0L, Z_NULL, 0);
  adler = adler32(adler, s2 + index, JUNK_SIZE);
  if ( (hash_entry = ci_find_hash(hash_table, adler)) == NULL) {
    cmd->type = 'i';
    cmd->cmd.insert.buf = s2 + index;
    cmd->cmd.insert.buf_len = 1;
    return;
  }
  
  i_s1 = *hash_entry;
  i_s2 = index;
  copy_len = 0;

  while ( (((unsigned char*)s1)[i_s1] == ((unsigned char *)s2)[i_s2]) &&
	  (i_s1 < n1) &&
	  (i_s2 < n2) ) {
    i_s1++;
    i_s2++;
    copy_len++;
  }
  if (copy_len == 0) {
    cmd->type = 'i';
    cmd->cmd.insert.buf = s2 + i_s2;
    cmd->cmd.insert.buf_len = 1;
    return;
  }
  cmd->type = 'c';
  cmd->cmd.copy.begin = *hash_entry;
  cmd->cmd.copy.end = *hash_entry + copy_len;
  return;
}

static int _ci_delta_to_b( void **delta,
			   size_t *delta_len,
			   ci_cmd_t *delta_cmd )
{
  ci_cmd_t *next;
  ci_buf_t *buf;

  if ( (buf = ci_new_buf()) == NULL ) {
    return errno;
  }
  
  while(delta_cmd != NULL) {
    if (delta_cmd->type == 'c') {
      if ( (buf = ci_append_buf(buf, "c", 1)) == NULL) {
	break;
      }
      if ( (buf = ci_append_buf(buf, &delta_cmd->cmd.copy.begin, sizeof(size_t))) == NULL) {
	break;
      }
      if ( (buf = ci_append_buf(buf, &delta_cmd->cmd.copy.end, sizeof(size_t))) == NULL) {
	break;
      }
    } else if (delta_cmd->type == 'i') {
      if ( (buf = ci_append_buf(buf, "i", 1)) == NULL) {
	break;
      }
      if ( (buf = ci_append_buf(buf, &delta_cmd->cmd.insert.buf_len, sizeof(size_t))) == NULL) {
	break;
      }
      if ( (buf = ci_append_buf(buf, delta_cmd->cmd.insert.buf, delta_cmd->cmd.insert.buf_len)) == NULL) {
	break;
      }
    } else {
      /* this should not happen */
      ci_free_buf(buf);
      errno = ENOMSG;
      break;
    }
    next = delta_cmd->next;
    _ci_free_cmd(delta_cmd);
    delta_cmd = next;
  }
  
  if (delta_cmd != NULL) {
    /* somthing failed: do cleanup and return errno */
    while (delta_cmd != NULL) {
      next = delta_cmd->next;
      _ci_free_cmd(delta_cmd);
      delta_cmd = next;
    }
    return errno;
  }

  *delta = buf->buf;
  *delta_len = buf->buf_len;
  buf->buf = NULL;
  buf->buf_pos = 0;
  buf->buf_len = 0;
  ci_free_buf(buf);
  
  return 0;
}

static int _ci_delta_to_s( char **delta,
			   size_t *delta_len,
			   ci_cmd_t *delta_cmd )
{
  ci_cmd_t *next;
  char tmp_buf[TMP_BUF_LEN];
  int  len;
  ci_buf_t *buf;

  if ( (buf = ci_new_buf()) == NULL ) {
    return errno;
  }
  
  while(delta_cmd != NULL) {
    if (delta_cmd->type == 'c') {
      len = snprintf(tmp_buf, TMP_BUF_LEN, "c%d:%d:",
		     delta_cmd->cmd.copy.begin,
		     delta_cmd->cmd.copy.end);
      if ( (buf = ci_append_buf(buf, tmp_buf, len)) == NULL) {
	break;
      }
    } else if (delta_cmd->type == 'i') {
      len = snprintf(tmp_buf, TMP_BUF_LEN, "i%d:",
		     delta_cmd->cmd.insert.buf_len);
      if ( (buf = ci_append_buf(buf, tmp_buf, len)) == NULL) {
	break;
      }
      if ( (buf = ci_append_buf(buf, delta_cmd->cmd.insert.buf, delta_cmd->cmd.insert.buf_len)) == NULL) {
	break;
      }
    } else {
      /* this should not happen */
      ci_free_buf(buf);
      errno = ENOMSG;
      break;
    }
    next = delta_cmd->next;
    _ci_free_cmd(delta_cmd);
    delta_cmd = next;
  }
  
  if (delta_cmd != NULL) {
    /* somthing failed: do cleanup and return errno */
    while (delta_cmd != NULL) {
      next = delta_cmd->next;
      _ci_free_cmd(delta_cmd);
      delta_cmd = next;
    }
    return errno;
  }
  
  /* not realy needed but for convienence */
  if ( (buf = ci_append_buf(buf, "\0", 1)) == NULL) {
    return errno;
  }

  *delta = buf->buf;
  *delta_len = buf->buf_len-1; /* minus the '\0' char (see above) */
  buf->buf = NULL;
  buf->buf_pos = 0;
  buf->buf_len = 0;
  ci_free_buf(buf);
  
  return 0;
}

static void _ci_free_cmd( ci_cmd_t *delta_cmd )
{
  if ( (delta_cmd->type == 'i') && (delta_cmd->cmd.insert.buf != NULL)) {
    free(delta_cmd->cmd.insert.buf);
  }
  free(delta_cmd);
}

static void _ci_free_cmd_list(ci_cmd_t *cmd_list)
{
  ci_cmd_t *next;
  while (cmd_list != NULL) {
    next = cmd_list->next;
    _ci_free_cmd(cmd_list);
    cmd_list = next;
  }
}

static ci_cmd_t *_ci_add_copy_cmd(ci_cmd_t *cmd, size_t begin, size_t end)
{
  ci_cmd_t *tmp;
  if ( (tmp = (ci_cmd_t *)malloc(sizeof(ci_cmd_t))) == NULL) {
    return NULL;
  }
  tmp->next = NULL;
  tmp->type = 'c';
  tmp->cmd.copy.begin = begin;
  tmp->cmd.copy.end = end;
  if (cmd != NULL) {
    cmd->next = tmp;
  }
  return tmp;
}

static ci_cmd_t *_ci_add_insert_cmd(ci_cmd_t *cmd, void *s, size_t n)
{
  ci_cmd_t *tmp;
  if ( (tmp = (ci_cmd_t *)malloc(sizeof(ci_cmd_t))) == NULL) {
    return NULL;
  }
  tmp->next = NULL;
  tmp->type = 'i';
  if ( (tmp->cmd.insert.buf = malloc(n)) == NULL) {
    free(tmp);
    return NULL;
  }
  memcpy(tmp->cmd.insert.buf, s, n);
  tmp->cmd.insert.buf_len = n;
  if (cmd != NULL) {
    cmd->next = tmp;
  }
  return tmp;
}

static ci_cmd_t *_ci_append_insert_cmd(ci_cmd_t *cmd, void *s, size_t n)
{
  if ( (cmd->cmd.insert.buf = realloc(cmd->cmd.insert.buf, cmd->cmd.insert.buf_len + n)) == NULL ) {
    return NULL;
  }
  memcpy(cmd->cmd.insert.buf + cmd->cmd.insert.buf_len, s, n);
  cmd->cmd.insert.buf_len += n;
  return cmd;
}


int ci_apply_b( void **s1,
		size_t *n1,
		void *s2,
		size_t n2,
		void *delta,
		size_t delta_len )
{
  int ret;
  ci_cmd_t *cmd_list;
  
  if ( (ret = _ci_b_to_cmd(&cmd_list, delta, delta_len)) != 0 ) {
    return ret;
  }
  
  return _ci_apply_cmd_to_b(s1, n1, s2, n2, cmd_list);
}

int ci_apply_s( char **s1,
		char *s2,
		char *delta)
{
  int ret;
  ci_cmd_t *cmd_list;
  size_t result_len;
  
  if ( (ret = _ci_s_to_cmd(&cmd_list, delta, strlen(delta))) != 0 ) {
    return ret;
  }
  
  if ( (ret = _ci_apply_cmd_to_b((void **)s1, &result_len, (void *)s2, strlen(s2), cmd_list)) != 0 ) {
    return ret;
  }
  if ( realloc(*s1, result_len + 1) == NULL) {
    free(*s1);
    return ENOMEM;
  }
  (*s1)[result_len]  = '\0';
  return 0;
}

int ci_apply_sn( char **s1,
		 size_t *n1,
		 char *s2,
		 size_t n2,
		 char *delta,
		 size_t delta_len )
{
  int ret;
  ci_cmd_t *cmd_list;
  size_t result_len;
  
  if ( (ret = _ci_s_to_cmd(&cmd_list, delta, delta_len)) != 0 ) {
    return ret;
  }
  
  if ( (ret = _ci_apply_cmd_to_b((void **)s1, &result_len, (void *)s2, n2, cmd_list)) != 0 ) {
    return ret;
  }
  if ( realloc(*s1, result_len + 1) == NULL) {
    free(*s1);
    return ENOMEM;
  }
  (*s1)[result_len]  = '\0';
  *n1 = result_len;
  return 0;
}

static int _ci_s_to_cmd( ci_cmd_t **cmd_list,
			 char *delta,
			 size_t delta_len )
{
  size_t index;
  char tmp_buf[TMP_BUF_LEN];
  int tmp_buf_i;
  enum state_enum { START,
		    COPY_BEGIN,
		    COPY_END,
		    INSERT_LENGTH,
		    INSERT_BUF } state;

  ci_cmd_t cmd;
  ci_cmd_t *tmp_cmd;
  ci_cmd_t *first_cmd;
  ci_cmd_t *last_cmd;
  char *endptr;
  size_t buf_len;
  
  index = 0;
  tmp_cmd = NULL;
  first_cmd = NULL;
  last_cmd = NULL;
  state = START;
  while (index < delta_len) {
    switch (state) {

    case START:
      if (delta[index] == 'c') {
	state = COPY_BEGIN;
	cmd.type ='c';
      } else if (delta[index] == 'i') {
	state = INSERT_LENGTH;
	cmd.type = 'i';
      } else {
	_ci_free_cmd_list(first_cmd);
	return EBADR;
      }
      tmp_buf_i = 0;
      break;

    case COPY_BEGIN:
      if ( strchr("0123456789", delta[index]) != NULL ) {
	tmp_buf[tmp_buf_i++] = delta[index];
      } else if (delta[index] == ':') {
	state = COPY_END;
	tmp_buf[tmp_buf_i++] = '\0';
	if ( (*tmp_buf == '\0') ||
	     ((cmd.cmd.copy.begin=strtoul(tmp_buf,&endptr,10))==ULONG_MAX) ||
	     (*endptr != '\0') ){
	  _ci_free_cmd_list(first_cmd);
	  return ERANGE;
	}
	tmp_buf_i = 0;
      } else {
	_ci_free_cmd_list(first_cmd);
	return EBADR;
      }
      break;

    case COPY_END:
      if ( strchr("0123456789", delta[index]) != NULL ) {
	tmp_buf[tmp_buf_i++] = delta[index];
      } else if (delta[index] == ':') {
	state = START;
	tmp_buf[tmp_buf_i++] = '\0';
	if ( (*tmp_buf == '\0') ||
	     ((cmd.cmd.copy.end=strtoul(tmp_buf,&endptr,10))==ULONG_MAX) ||
	     (*endptr != '\0') ){
	  _ci_free_cmd_list(first_cmd);
	  return ERANGE;
	}
	tmp_buf_i = 0;
	if ( (tmp_cmd = _ci_add_copy_cmd(last_cmd, cmd.cmd.copy.begin, cmd.cmd.copy.end)) == NULL) {
	  _ci_free_cmd_list(first_cmd);
	  return ENOMEM;
	}
      } else {
	_ci_free_cmd_list(first_cmd);
	return EBADR;
      }
      break;

    case INSERT_LENGTH:
      if ( strchr("0123456789", delta[index]) != NULL ) {
	tmp_buf[tmp_buf_i++] = delta[index];
      } else if (delta[index] == ':') {
	state = INSERT_BUF;
	tmp_buf[tmp_buf_i++] = '\0';
	if ( (*tmp_buf == '\0') ||
	     ((cmd.cmd.insert.buf_len=strtoul(tmp_buf,&endptr,10))==ULONG_MAX) ||
	     (*endptr != '\0') ){
	  _ci_free_cmd_list(first_cmd);
	  return ERANGE;
	}
	cmd.cmd.insert.buf = delta + index + 1;
	tmp_buf_i = 0;
	buf_len = 0;
      } else {
	_ci_free_cmd_list(first_cmd);
	return EBADR;
      }
      break;

    case INSERT_BUF:
      buf_len++;
      if (buf_len < cmd.cmd.insert.buf_len) {
	/* skip one character */
      } else {
	state = START;
	if ( (tmp_cmd = _ci_add_insert_cmd(last_cmd, cmd.cmd.insert.buf, cmd.cmd.insert.buf_len)) == NULL) {
	  _ci_free_cmd_list(first_cmd);
	  return ENOMEM;
	}
      }
      break;
    }
    if ( (first_cmd == NULL) && (tmp_cmd != NULL) ) {
      first_cmd = tmp_cmd;
    }
    if ( tmp_cmd != NULL) {
      last_cmd = tmp_cmd;
      tmp_cmd = NULL;
    }
    index ++;
  }
  if (state != START) {
    _ci_free_cmd_list(first_cmd);
    return EBADR;
  }
  
  *cmd_list = first_cmd;

  return 0;
}


static int _ci_b_to_cmd( ci_cmd_t **cmd_list,
			 void *delta,
			 size_t delta_len )
{
  size_t index;

  unsigned char *delta_array;
  size_t buf_len;
  ci_cmd_t *tmp_cmd;
  ci_cmd_t *first_cmd;
  ci_cmd_t *last_cmd;
  index = 0;
  first_cmd = NULL;
  last_cmd = NULL;
  delta_array = (char *)delta;
  while (index < delta_len) {
    if (delta_array[index] == 'c') {
      if ( delta_len < (index + 2 * sizeof(size_t))) {
	_ci_free_cmd_list(first_cmd);
	return EBADR;
      }
      if ( (tmp_cmd = _ci_add_copy_cmd(last_cmd, *((size_t *)(delta + index + 1)), *((size_t *)(delta + index + 1 + sizeof(size_t))))) == NULL) {
	_ci_free_cmd_list(first_cmd);
	return ENOMEM;
      }
      index += 1 + 2 * sizeof(size_t);
    } else if (delta_array[index] == 'i') {
      if (delta_len < (index + sizeof(size_t))) {
	_ci_free_cmd_list(first_cmd);
	return EBADR;
      } else {
	buf_len = *((size_t *)(delta + index + 1));
	if (delta_len < (index + sizeof(size_t) + buf_len)) {
	  _ci_free_cmd_list(first_cmd);
	  return EBADR;
	} else if ( (tmp_cmd = _ci_add_insert_cmd(last_cmd, delta + index + sizeof(size_t) +1, buf_len)) == NULL) {
	  _ci_free_cmd_list(first_cmd);
	  return ENOMEM;
	}
	index += sizeof(size_t) + buf_len + 1;
      }
    } else {
      _ci_free_cmd_list(first_cmd);
      return EBADR;
    }
    if ( (first_cmd == NULL) && (tmp_cmd != NULL) ) {
      first_cmd = tmp_cmd;
    }
    if ( tmp_cmd != NULL) {
      last_cmd = tmp_cmd;
      tmp_cmd = NULL;
    }
  }
  *cmd_list = first_cmd;
  return 0;
}

static int _ci_apply_cmd_to_b(void **result,
			      size_t *result_len,
			      void *s2,
			      size_t n2,
			      ci_cmd_t *cmd_list)
{
  ci_cmd_t *next;
  ci_buf_t *buf;

  if ( (buf = ci_new_buf()) == NULL ) {
    return errno;
  }
  
  while(cmd_list != NULL) {
    if (cmd_list->type == 'c') {
      if ( (buf = ci_append_buf(buf, s2 + cmd_list->cmd.copy.begin, cmd_list->cmd.copy.end - cmd_list->cmd.copy.begin)) == NULL) {
	break;
      }
    } else if (cmd_list->type == 'i') {
      if ( (buf = ci_append_buf(buf, cmd_list->cmd.insert.buf, cmd_list->cmd.insert.buf_len)) == NULL) {
	break;
      }
    } else {
      /* this should not happen */
      ci_free_buf(buf);
      errno = ENOMSG;
      break;
    }
    next = cmd_list->next;
    _ci_free_cmd(cmd_list);
    cmd_list = next;
  }
  
  if (cmd_list != NULL) {
    /* somthing failed: do cleanup and return errno */
    while (cmd_list != NULL) {
      next = cmd_list->next;
      _ci_free_cmd(cmd_list);
      cmd_list = next;
    }
    return errno;
  }
  
  *result = buf->buf;
  *result_len = buf->buf_len;
  
  buf->buf = NULL;
  buf->buf_pos = 0;
  buf->buf_len = 0;
  ci_free_buf(buf);
  
  return 0;
} 
