#include <stdlib.h>
#include <string.h>
#include <stdint.h>

#include "ant.h"
#include "errors.h"
#include "runtime.h"
#include "internal.h"
#include "descriptors.h"
#include "utf8.h"

#include "modules/textcodec.h"
#include "modules/buffer.h"
#include "modules/symbol.h"

static ant_value_t g_textencoder_proto = 0;
static ant_value_t g_textdecoder_proto = 0;

td_state_t *td_state_new(td_encoding_t enc, bool fatal, bool ignore_bom) {
  td_state_t *st = calloc(1, sizeof(td_state_t));
  if (!st) return NULL;
  st->encoding = enc;
  st->fatal = fatal;
  st->ignore_bom = ignore_bom;
  return st;
}

static td_state_t *td_get_state(ant_value_t obj) {
  ant_value_t s = js_get_slot(obj, SLOT_DATA);
  if (vtype(s) != T_NUM) return NULL;
  return (td_state_t *)(uintptr_t)(size_t)js_getnum(s);
}

static void td_finalize(ant_t *js, ant_object_t *obj) {
  if (!obj->extra_slots) return;
  ant_extra_slot_t *entries = (ant_extra_slot_t *)obj->extra_slots;
  
  for (uint8_t i = 0; i < obj->extra_count; i++) {
  if (entries[i].slot == SLOT_DATA && vtype(entries[i].value) == T_NUM) {
    free((td_state_t *)(uintptr_t)(size_t)js_getnum(entries[i].value));
    return;
  }}
}

static int resolve_encoding(const char *s, size_t len) {
  static const struct { const char *label; uint8_t len; td_encoding_t enc; } map[] = {
    {"unicode-1-1-utf-8", 17, TD_ENC_UTF8},    {"unicode11utf8", 13, TD_ENC_UTF8},
    {"unicode20utf8",     13, TD_ENC_UTF8},    {"utf-8",          5, TD_ENC_UTF8},
    {"utf8",               4, TD_ENC_UTF8},    {"x-unicode20utf8",17, TD_ENC_UTF8},
    {"windows-1252",      12, TD_ENC_WINDOWS_1252}, {"ascii",           5, TD_ENC_WINDOWS_1252},
    {"unicodefffe",       11, TD_ENC_UTF16BE}, {"utf-16be",        8, TD_ENC_UTF16BE},
    {"csunicode",          9, TD_ENC_UTF16LE}, {"iso-10646-ucs-2",16, TD_ENC_UTF16LE},
    {"ucs-2",              5, TD_ENC_UTF16LE}, {"unicode",         7, TD_ENC_UTF16LE},
    {"unicodefeff",       11, TD_ENC_UTF16LE}, {"utf-16",          6, TD_ENC_UTF16LE},
    {"utf-16le",           8, TD_ENC_UTF16LE},
    {"iso-8859-2",        10, TD_ENC_ISO_8859_2},
    {NULL, 0, 0}
  };
  for (int i = 0; map[i].label; i++) {
    if (len == map[i].len && strncasecmp(s, map[i].label, len) == 0) return (int)map[i].enc;
  }
  return -1;
}

static const char *encoding_name(td_encoding_t enc) {
switch (enc) {
  case TD_ENC_UTF16LE:      return "utf-16le";
  case TD_ENC_UTF16BE:      return "utf-16be";
  case TD_ENC_WINDOWS_1252: return "windows-1252";
  case TD_ENC_ISO_8859_2:   return "iso-8859-2";
  default:                  return "utf-8";
}}

static const char *trim_label(const char *s, size_t len, size_t *out_len) {
  while (len > 0 && (unsigned char)*s <= 0x20) { s++; len--; }
  while (len > 0 && (unsigned char)s[len - 1] <= 0x20) { len--; }
  *out_len = len;
  return s;
}

static ant_value_t js_textencoder_get_encoding(ant_t *js, ant_value_t *args, int nargs) {
  return js_mkstr(js, "utf-8", 5);
}

ant_value_t te_encode(ant_t *js, const char *str, size_t str_len) {
  ArrayBufferData *ab = create_array_buffer_data(str_len);
  if (!ab) return js_mkerr(js, "out of memory");
  
  if (str_len > 0) {
    const uint8_t *s = (const uint8_t *)str;
    uint8_t *d = ab->data; size_t i = 0;
    
    while (i < str_len) {
    if (s[i] == 0xED && i + 2 < str_len && s[i+1] >= 0xA0 && s[i+1] <= 0xBF) {
      d[i] = 0xEF; d[i+1] = 0xBF; d[i+2] = 0xBD;
      i += 3;
    } else { d[i] = s[i]; i++; }}
  }
  
  return create_typed_array(js, TYPED_ARRAY_UINT8, ab, 0, str_len, "Uint8Array");
}

static ant_value_t js_textencoder_encode(ant_t *js, ant_value_t *args, int nargs) {
  size_t str_len = 0;
  const char *str = "";
  
  if (nargs > 0 && vtype(args[0]) == T_STR) {
    str = js_getstr(js, args[0], &str_len);
    if (!str) { str = ""; str_len = 0; }
  } else if (nargs > 0 && vtype(args[0]) != T_UNDEF) {
    ant_value_t sv = js_tostring_val(js, args[0]);
    if (is_err(sv)) return sv;
    str = js_getstr(js, sv, &str_len);
    if (!str) { str = ""; str_len = 0; }
  }
  
  return te_encode(js, str, str_len);
}

static ant_value_t js_textencoder_encode_into(ant_t *js, ant_value_t *args, int nargs) {
  if (nargs < 2) return js_mkerr_typed(js, JS_ERR_TYPE, "encodeInto requires 2 arguments");

  size_t str_len = 0;
  const char *str = "";
  if (vtype(args[0]) == T_STR) {
    str = js_getstr(js, args[0], &str_len);
    if (!str) { str = ""; str_len = 0; }
  } else if (vtype(args[0]) != T_UNDEF) {
    ant_value_t sv = js_tostring_val(js, args[0]);
    if (is_err(sv)) return sv;
    str = js_getstr(js, sv, &str_len);
    if (!str) { str = ""; str_len = 0; }
  }

  TypedArrayData *ta = buffer_get_typedarray_data(args[1]);
  if (!ta) return js_mkerr_typed(js, JS_ERR_TYPE, "Second argument must be a Uint8Array");

  uint8_t *dest = (ta->buffer && !ta->buffer->is_detached)
    ? ta->buffer->data + ta->byte_offset : NULL;
  size_t available = ta->byte_length;

  const utf8proc_uint8_t *src = (const utf8proc_uint8_t *)str;
  utf8proc_ssize_t src_len = (utf8proc_ssize_t)str_len;
  utf8proc_ssize_t pos = 0;
  
  size_t written = 0;
  size_t read_units = 0;

  while (pos < src_len) {
    utf8proc_int32_t cp;
    utf8proc_ssize_t n = utf8_next(src + pos, src_len - pos, &cp);
    utf8proc_uint8_t tmp[4];
    utf8proc_ssize_t enc_len;
    
    if (cp >= 0xD800 && cp <= 0xDFFF) {
      tmp[0] = 0xEF; tmp[1] = 0xBF; tmp[2] = 0xBD;
      enc_len = 3;
    } else {
      enc_len = (cp >= 0) ? utf8proc_encode_char(cp, tmp) : 0;
      if (enc_len <= 0) { tmp[0] = 0xEF; tmp[1] = 0xBF; tmp[2] = 0xBD; enc_len = 3; }
    }
    
    if (written + (size_t)enc_len > available) break;
    if (dest) memcpy(dest + written, tmp, (size_t)enc_len);
    
    written += (size_t)enc_len;
    pos += n;
    read_units += (cp >= 0x10000 && cp <= 0x10FFFF) ? 2 : 1;
  }

  ant_value_t result = js_mkobj(js);
  js_set(js, result, "read", js_mknum((double)read_units));
  js_set(js, result, "written", js_mknum((double)written));
  
  return result;
}

static ant_value_t js_textencoder_ctor(ant_t *js, ant_value_t *args, int nargs) {
  if (vtype(js->new_target) == T_UNDEF)
    return js_mkerr_typed(js, JS_ERR_TYPE, "TextEncoder constructor requires 'new'");
  ant_value_t obj = js_mkobj(js);
  ant_value_t proto = js_instance_proto_from_new_target(js, g_textencoder_proto);
  if (is_object_type(proto)) js_set_proto_init(obj, proto);
  return obj;
}

static ant_value_t js_textdecoder_get_encoding(ant_t *js, ant_value_t *args, int nargs) {
  td_state_t *st = td_get_state(js->this_val);
  const char *name = encoding_name(st ? st->encoding : TD_ENC_UTF8);
  return js_mkstr(js, name, strlen(name));
}

static ant_value_t js_textdecoder_get_fatal(ant_t *js, ant_value_t *args, int nargs) {
  td_state_t *st = td_get_state(js->this_val);
  return (st && st->fatal) ? js_true : js_false;
}

static ant_value_t js_textdecoder_get_ignore_bom(ant_t *js, ant_value_t *args, int nargs) {
  td_state_t *st = td_get_state(js->this_val);
  return (st && st->ignore_bom) ? js_true : js_false;
}

static inline uint16_t u16_read(const uint8_t *p, bool be) {
  return be 
    ? (uint16_t)((uint16_t)p[0] << 8 | p[1])
    : (uint16_t)((uint16_t)p[1] << 8 | p[0]);
}

static inline size_t u8_emit(char *out, size_t o, utf8proc_int32_t cp) {
  utf8proc_ssize_t n = utf8proc_encode_char(cp, (utf8proc_uint8_t *)(out + o));
  return n > 0 ? o + (size_t)n : o;
}

static inline size_t u8_fffd(char *out, size_t o) {
  out[o] = (char)0xEF; out[o+1] = (char)0xBF; out[o+2] = (char)0xBD;
  return o + 3;
}

#define U16_IS_HIGH(cu) ((cu) >= 0xD800 && (cu) <= 0xDBFF)
#define U16_IS_LOW(cu)  ((cu) >= 0xDC00 && (cu) <= 0xDFFF)
#define U16_PAIR(hi,lo) (0x10000 + ((uint32_t)((hi) - 0xD800) << 10) + ((lo) - 0xDC00))

static uint32_t decode_windows_1252_byte(uint8_t byte) {
  static const uint16_t specials[32] = {
    0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
    0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178,
  };
  if (byte < 0x80) return byte;
  if (byte < 0xA0) return specials[byte - 0x80];
  return byte;
}

static uint32_t decode_iso_8859_2_byte(uint8_t byte) {
  static const uint16_t upper[96] = {
    0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
    0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
    0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
    0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
    0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
    0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
    0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
    0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
    0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
    0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
    0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
    0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9,
  };
  if (byte < 0xA0) return byte;
  return upper[byte - 0xA0];
}

static utf8proc_ssize_t decode_single_byte(td_state_t *st, const uint8_t *src, size_t len, char *out) {
  size_t o = 0;
  for (size_t i = 0; i < len; i++) {
    uint32_t cp = (st->encoding == TD_ENC_WINDOWS_1252)
      ? decode_windows_1252_byte(src[i])
      : decode_iso_8859_2_byte(src[i]);
    o = u8_emit(out, o, (utf8proc_int32_t)cp);
  }
  return (utf8proc_ssize_t)o;
}

static utf8proc_ssize_t utf16_decode(td_state_t *st, const uint8_t *src, size_t len, char *out, bool stream) {
  bool be = (st->encoding == TD_ENC_UTF16BE);
  size_t i = 0, o = 0;
  size_t avail;

  if (!st->bom_seen) {
    if (len < 2) goto pend_tail;
    if (u16_read(src, be) == 0xFEFF && !st->ignore_bom) i = 2;
    st->bom_seen = true;
  }

  while (i < len) {
    avail = len - i;
    
    if (avail < 2) goto pend_tail;
    uint16_t cu = u16_read(src + i, be);
    i += 2;
    
    if (!U16_IS_HIGH(cu) && !U16_IS_LOW(cu)) {
      o = u8_emit(out, o, (utf8proc_int32_t)cu);
      continue;
    }
    
    if (U16_IS_LOW(cu)) goto err;
    
    avail = len - i;
    if (avail < 2) goto pend_hi;
    
    uint16_t lo = u16_read(src + i, be);
    if (U16_IS_LOW(lo)) { i += 2; o = u8_emit(out, o, (utf8proc_int32_t)U16_PAIR(cu, lo)); continue; }
    
    goto err;

  pend_tail:
    if (stream) { st->pending[0] = src[i]; st->pending_len = 1; }
    else { if (st->fatal) return -1; o = u8_fffd(out, o); }
    break;
    
  pend_hi:
    if (stream) { st->pending_len = (int)(len - (i - 2)); memcpy(st->pending, src + i - 2, (size_t)st->pending_len); }
    else { if (st->fatal) return -1; o = u8_fffd(out, o); if (avail == 1) o = u8_fffd(out, o); }
    break;
    
  err:
    if (st->fatal) return -1;
    o = u8_fffd(out, o);
    continue;
  }
  
  return (utf8proc_ssize_t)o;
}

#undef U16_IS_HIGH
#undef U16_IS_LOW
#undef U16_PAIR

ant_value_t td_decode(ant_t *js, td_state_t *st, const uint8_t *input, size_t input_len, bool stream_mode) {
  size_t total = (size_t)st->pending_len + input_len;
  if (total == 0) {
    if (!stream_mode) st->bom_seen = false;
    return js_mkstr(js, "", 0);
  }

  uint8_t *work = NULL;
  const uint8_t *src;
  if (st->pending_len > 0) {
    work = malloc(total);
    if (!work) return js_mkerr(js, "out of memory");
    memcpy(work, st->pending, (size_t)st->pending_len);
    if (input && input_len > 0) memcpy(work + st->pending_len, input, input_len);
    src = work;
  } else src = input;
  st->pending_len = 0;

  char *out = malloc(total * 3 + 1);
  if (!out) { free(work); return js_mkerr(js, "out of memory"); }

  utf8proc_ssize_t n;
  if (st->encoding == TD_ENC_UTF16LE || st->encoding == TD_ENC_UTF16BE) {
    n = utf16_decode(st, src, total, out, stream_mode);
  } else if (st->encoding == TD_ENC_WINDOWS_1252 || st->encoding == TD_ENC_ISO_8859_2) {
    n = decode_single_byte(st, src, total, out);
    st->pending_len = 0;
    st->bom_seen = false;
  } else {
    utf8_dec_t dec = { .ignore_bom = st->ignore_bom, .bom_seen = st->bom_seen };
    n = utf8_whatwg_decode(&dec, src, total, out, st->fatal, stream_mode);
    st->pending_len = dec.pend_pos;
    memcpy(st->pending, dec.pend_buf, (size_t)dec.pend_pos);
    st->bom_seen = stream_mode ? dec.bom_seen : false;
  }

  if (n < 0) {
    free(work); free(out);
    return js_mkerr_typed(js, JS_ERR_TYPE, "The encoded data was not valid.");
  }

  if (st->encoding != TD_ENC_UTF8) {
    if (!stream_mode) st->bom_seen = false;
  }

  ant_value_t result = js_mkstr(js, out, (size_t)n);
  free(work);
  free(out);
  
  return result;
}

static ant_value_t js_textdecoder_decode(ant_t *js, ant_value_t *args, int nargs) {
  td_state_t *st = td_get_state(js->this_val);
  if (!st) return js_mkerr_typed(js, JS_ERR_TYPE, "Invalid TextDecoder");

  bool stream_mode = false;
  if (nargs > 1 && is_object_type(args[1])) {
    ant_value_t sv = js_get(js, args[1], "stream");
    stream_mode = js_truthy(js, sv);
  }

  const uint8_t *input = NULL;
  size_t input_len = 0;
  if (nargs > 0 && is_object_type(args[0]))
    buffer_source_get_bytes(js, args[0], &input, &input_len);

  return td_decode(js, st, input, input_len, stream_mode);
}

static ant_value_t js_textdecoder_ctor(ant_t *js, ant_value_t *args, int nargs) {
  if (vtype(js->new_target) == T_UNDEF)
    return js_mkerr_typed(js, JS_ERR_TYPE, "TextDecoder constructor requires 'new'");

  td_encoding_t enc = TD_ENC_UTF8;
  if (nargs > 0 && !is_undefined(args[0])) {
  ant_value_t label = (vtype(args[0]) == T_STR) ? args[0] : coerce_to_str(js, args[0]);
  if (is_err(label)) return label;

  size_t llen;
  const char *raw = js_getstr(js, label, &llen);
  if (raw) {
    size_t tlen;
    const char *trimmed = trim_label(raw, llen, &tlen);
    int resolved = resolve_encoding(trimmed, tlen);
    
    if (resolved < 0) return js_mkerr_typed(
      js, JS_ERR_RANGE, "Failed to construct 'TextDecoder': The encoding label provided ('%.*s') is invalid.",
      (int)tlen, trimmed
    );
    
    enc = (td_encoding_t)resolved;
  }}

  bool fatal = false;
  bool ignore_bom = false;
  
  if (nargs > 1 && is_object_type(args[1])) {
    ant_value_t fv = js_getprop_fallback(js, args[1], "fatal");
    if (is_err(fv)) return fv;
    if (vtype(fv) != T_UNDEF) fatal = js_truthy(js, fv);
    ant_value_t bv = js_getprop_fallback(js, args[1], "ignoreBOM");
    if (is_err(bv)) return bv;
    if (vtype(bv) != T_UNDEF) ignore_bom = js_truthy(js, bv);
  }

  td_state_t *st = td_state_new(enc, fatal, ignore_bom);
  if (!st) return js_mkerr(js, "out of memory");

  ant_value_t obj = js_mkobj(js);
  ant_value_t proto = js_instance_proto_from_new_target(js, g_textdecoder_proto);
  
  if (is_object_type(proto)) js_set_proto_init(obj, proto);
  js_set_slot(obj, SLOT_DATA, ANT_PTR(st));
  js_set_finalizer(obj, td_finalize);
  
  return obj;
}

void init_textcodec_module(void) {
  ant_t *js = rt->js;
  ant_value_t g = js_glob(js);

  g_textencoder_proto = js_mkobj(js);
  js_set_getter_desc(js, g_textencoder_proto, "encoding", 8, js_mkfun(js_textencoder_get_encoding), JS_DESC_C);
  js_set(js, g_textencoder_proto, "encode",     js_mkfun(js_textencoder_encode));
  js_set(js, g_textencoder_proto, "encodeInto", js_mkfun(js_textencoder_encode_into));
  js_set_sym(js, g_textencoder_proto, get_toStringTag_sym(), js_mkstr(js, "TextEncoder", 11));
  
  ant_value_t te_ctor = js_make_ctor(js, js_textencoder_ctor, g_textencoder_proto, "TextEncoder", 11);
  js_set(js, g, "TextEncoder", te_ctor);
  js_set_descriptor(js, g, "TextEncoder", 11, JS_DESC_W | JS_DESC_C);

  g_textdecoder_proto = js_mkobj(js);
  js_set_getter_desc(js, g_textdecoder_proto, "encoding",  8, js_mkfun(js_textdecoder_get_encoding),  JS_DESC_C);
  js_set_getter_desc(js, g_textdecoder_proto, "fatal",     5, js_mkfun(js_textdecoder_get_fatal),     JS_DESC_C);
  js_set_getter_desc(js, g_textdecoder_proto, "ignoreBOM", 9, js_mkfun(js_textdecoder_get_ignore_bom), JS_DESC_C);
  js_set(js, g_textdecoder_proto, "decode", js_mkfun(js_textdecoder_decode));
  js_set_sym(js, g_textdecoder_proto, get_toStringTag_sym(), js_mkstr(js, "TextDecoder", 11));
  
  ant_value_t td_ctor = js_make_ctor(js, js_textdecoder_ctor, g_textdecoder_proto, "TextDecoder", 11);
  js_set(js, g, "TextDecoder", td_ctor);
  js_set_descriptor(js, g, "TextDecoder", 11, JS_DESC_W | JS_DESC_C);
}
