import _ from 'lodash';

import { MetadataTypes } from '../constants';

/**
 * Given a list of columns, return an updated list where any columns with
 * a null type have been updated to have an appropriate type suggestions,
 * based on examining the data in the given rows.
 */
export function updateColumnsWithMissingTypes(columns, rows) {
  const untypedColumnStats = collectStatsOnUntypedColumns(columns, rows);

  // add basic type suggestions to all the untyped columns
  const updatedColumns = [...columns]; // copy to avoid mutating argument
  for (let stats of untypedColumnStats) {
    const type = suggestType(stats);
    updatedColumns[stats.index] = columns[stats.index].update({ type });
  }

  // if there is no text column, suggest one, overriding the initial suggestion
  const columnTypes = columns.map(c => c.type);
  if (!columnTypes.includes('text')) {
    const textColumn = suggestTextColumn(untypedColumnStats, updatedColumns);
    // if all the columns were annotated, we won't have a suggested text column
    if (textColumn != null) {
      updatedColumns[textColumn] = columns[textColumn].update({ type: 'text' });
    }
  }
  return updatedColumns;
}

function collectStatsOnUntypedColumns(columns, rows) {
  const allStats = [];
  for (let column of columns.filter(c => c.type == null)) {
    allStats.push(columnStats(column, rows));
  }
  return allStats;
}

function columnStats(column, rows) {
  const { index, name } = column;
  let totalValues = 0;
  let validNumbers = 0;
  let validDates = 0;
  let totalText = 0;
  let nonEmptyCells = 0;
  for (let cell of rows.map(row => row[index])) {
    if (cell != null) {
      totalValues += cell.stringValues.length;
      validNumbers += cell.numberValues.length;
      // for performance, we don't use cell.dateValues
      validDates += countValidDates(cell);
      if (cell.hasValue) {
        totalText += cell.rawValue.length;
        nonEmptyCells++;
      }
    }
  }
  const averageLength = nonEmptyCells > 0 ? totalText / nonEmptyCells : 0;
  return { index, name, totalValues, validNumbers, validDates, averageLength };
}

const ISO_MONTH_RE = '(0[1-9]|1[0-2])'; // must be two digits
const MDY_MONTH_RE = `([1-9]|${ISO_MONTH_RE})`; // can omit leading 0

const ISO_DAY_RE = '(0[1-9]|[1-2]\\d|3[01])'; // must be two digits
const MDY_DAY_RE = `([1-9]|${ISO_DAY_RE})`; // can omit leading 0

const ISO_YEAR_RE = '\\d{4}'; // must be four digits
const MDY_YEAR_RE = `(\\d{2}|${ISO_YEAR_RE})`; // can be two or four digits

const MDY_DATE_RE = `${MDY_MONTH_RE}\\/${MDY_DAY_RE}\\/${MDY_YEAR_RE}`;
const ISO_DATE_RE = `${ISO_YEAR_RE}-${ISO_MONTH_RE}-${ISO_DAY_RE}`;

// hours and minutes required, seconds optional, am/pm suffix optional
const MDY_TIME_RE = '\\d{1,2}:\\d{2}(:\\d{2})?( (am|pm|AM|PM))?';
// hours and minutes required, seconds and millis optional, timezone optional
const ISO_TIME_RE = '\\d{2}:\\d{2}(:\\d{2}(\\.\\d+)?)?(Z|[+-]\\d{2}:\\d{2})?';

const FULL_MDY_RE = `${MDY_DATE_RE}( ${MDY_TIME_RE})?`;
const FULL_ISO_RE = `${ISO_DATE_RE}([T ]${ISO_TIME_RE})?`;

const MULTI_FORMAT_DATE_RE = new RegExp(`^(${FULL_MDY_RE}|${FULL_ISO_RE})$`);

export function looksLikeADate(value) {
  return MULTI_FORMAT_DATE_RE.test(value);
}

function countValidDates(cell) {
  return cell.stringValues.filter(value => looksLikeADate(value)).length;
}

const SCORE_COLUMN_RE = /(^|[^A-Za-z])(nps|score|csat)($|[^A-Za-z])/i;

function suggestType({ name, totalValues, validNumbers, validDates }) {
  if (validDates / totalValues >= 0.95) {
    return MetadataTypes.DATE;
  }
  if (validNumbers / totalValues >= 0.95) {
    if (SCORE_COLUMN_RE.test(name)) {
      return MetadataTypes.SCORE;
    } else {
      return MetadataTypes.NUMBER;
    }
  }
  return MetadataTypes.CATEGORY;
}

function suggestTextColumn(untypedColumnStats, columns) {
  // only consider nonempty columns that would otherwise be strings
  const candidateColumnStats = untypedColumnStats.filter(stats => {
    const column = columns[stats.index];
    return column.type === MetadataTypes.CATEGORY && !column.empty;
  });
  // text column is whichever has the longest text on average
  return _.maxBy(candidateColumnStats, 'averageLength')?.index;
}
