import {
  DateRange as TimeDimensionDateRange,
  DeeplyReadonly,
  Query,
  QueryRecordType,
  ResultSet,
  TimeDimension,
  TimeDimensionGranularity,
} from '@cubejs-client/core';
import * as d3 from 'd3';
import { add, Duration, parseISO } from 'date-fns';
import _ from 'lodash';
import invariant from 'tiny-invariant';

export type TidyQueryRecord<
  TQuery extends DeeplyReadonly<Query>,
  KeepNullDimensions extends boolean = false,
> = {
  [K in QueryMeasure<TQuery>]: number;
} & {
  [K in QueryDimension<TQuery>]: KeepNullDimensions extends true
    ? string | null
    : string;
} & TidyQueryRecordTimeDimensions<TQuery>;

type QueryMeasure<TQuery extends DeeplyReadonly<Query>> = NonNullable<
  TQuery['measures']
>[number];
type QueryDimension<TQuery extends DeeplyReadonly<Query>> = NonNullable<
  TQuery['dimensions']
>[number];

type TidyQueryRecordTimeDimensions<TQuery extends DeeplyReadonly<Query>> =
  TQuery extends { timeDimensions: infer U }
    ? {
        [K in ExtractTimeMembers<U>]: Date;
      }
    : unknown;

type ExtractTimeMembers<T> = T extends readonly [infer First, ...infer Rest]
  ? ExtractTimeMember<First> | ExtractTimeMembers<Rest>
  : never;

type ExtractTimeMember<T> = T extends {
  dimension: infer Dimension;
  granularity: infer Granularity;
}
  ? Dimension | `${Dimension & string}.${Granularity & string}`
  : never;

type TidyRawDataOptions = {
  fillMissingDates?: boolean;
  keepNullDimensions?: boolean;
};

/**
 * Transforms a query result into 'tidy' form. Specifically, a 'tidy' query data
 * is an array of rows, where each row is an object with
 *   - One field for each query dimension (a string)
 *   - One field for each query measure (a number; NaN for empty)
 *   - Two fields for each query timeDimension having a granularity (`dimension`
 *      and `dimension`.`granularity`, both `Date`s)
 *
 * Additionally, if a the query has a timeDimension with a granularity
 * (indicating to use that timeDimension as a dimension), placeholder rows are
 * filled in where appropriate to ensure that the entire timeDimension's
 * dateRange is covered by at least one row.
 */
export default function tidyRawData<
  TQuery extends DeeplyReadonly<Query>,
  KeepNullDimensions extends boolean = false,
>(
  results: ResultSet<QueryRecordType<TQuery>>,
  options: TidyRawDataOptions = { fillMissingDates: true },
): TidyQueryRecord<TQuery, KeepNullDimensions>[] {
  const { fillMissingDates = true, keepNullDimensions } = options;
  const { query, rawData } = composeCompareDateRangeData(results);

  let data = normalizeTypes(rawData, query, { keepNullDimensions });
  data = sortByTimeDimension(data, query);
  if (fillMissingDates) {
    data = fillTimeDimension(data, query);
  }
  return data;
}

/**
 * Given raw data from a cubejs query, 'normalizes' the fields so that every
 * measure field is a number (or NaN), every dimension field is a string, and
 * every timeDimension field is a Date.
 */
function normalizeTypes<TQuery extends DeeplyReadonly<Query>>(
  records: QueryRecordType<TQuery>[],
  query: TQuery,
  options: TidyRawDataOptions,
) {
  const { keepNullDimensions } = options;
  const { measures = [], dimensions = [] } = query;
  const timeDimensions =
    query.timeDimensions?.filter(
      (timeDimension) => timeDimension.granularity,
    ) ?? [];

  return records.map((record) =>
    Object.fromEntries([
      ...measures.map((measure) => [measure, normalizeNumber(record[measure])]),
      ...dimensions.map((dimension) => {
        const value = record[dimension];
        if (keepNullDimensions && value === null) {
          return [dimension, value];
        }
        return [dimension, String(record[dimension])];
      }),
      ...timeDimensions.flatMap((timeDimension) => [
        [timeDimension.dimension, parseISO(record[timeDimension.dimension])],
        [
          `${timeDimension.dimension}.${timeDimension.granularity}`,
          parseISO(
            record[`${timeDimension.dimension}.${timeDimension.granularity}`],
          ),
        ],
      ]),
    ]),
  ) as unknown as TidyQueryRecord<TQuery>[];
}

function normalizeNumber(value: unknown) {
  if (value === null || value === undefined) return NaN;
  if (typeof value === 'number') return value;
  return Number(value);
}

/**
 * Given raw data from a cubejs query, fills in missing rows such that all
 * timeDimension fields (with granularity) have at least one value per timestamp
 * in the query's `dateRange`. Specifically, each missing timeDimension value
 * adds one row per permutation of dimension values seen in the existing data
 * (row measures are set to NaN).
 *
 * This is useful if you want to plot the data along the timeDimension axis.
 *
 * ...Sorry about the JS gibberish that follows.
 */
function fillTimeDimension<TQuery extends DeeplyReadonly<Query>>(
  records: Record<string, any>[],
  query: TQuery,
): TidyQueryRecord<TQuery>[] {
  const timeDimensions =
    query.timeDimensions?.filter(
      (timeDimension_) => timeDimension_.granularity,
    ) ?? [];

  if (timeDimensions.length === 0) return records as TidyQueryRecord<TQuery>[];
  invariant(
    timeDimensions.length === 1,
    'Only one time dimension with granularity is supported',
  );
  const { dimension, dateRange, granularity } = timeDimensions[0];

  invariant(
    granularity !== undefined,
    'You should never see this message if the filter above works',
  );
  invariant(dateRange !== undefined, 'Only range timeDimensions are supported');
  const [start, end] = [parseISO(dateRange[0]), parseISO(dateRange[1])];

  const missingDates: Date[] = [];
  const index = d3.group(
    records,
    (record) => record[dimension as keyof typeof record] as Date,
  );
  const step = getGranularityStep(granularity);
  for (let dt = start; dt <= end; dt = add(dt, step)) {
    if (!index.has(dt)) {
      missingDates.push(dt);
    }
  }

  const placeholderMeasures = Object.fromEntries(
    query.measures?.map((measure) => [measure, NaN]) ?? [],
  );
  const placeholderDimensions = d3.map(
    d3
      .group(records, (record) =>
        (query.dimensions ?? []).map((dim) => record[dim]).join(','),
      )
      .keys(),
    (key) => Object.fromEntries(d3.zip(query.dimensions ?? [], key.split(','))),
  );
  const placeholderRows = placeholderDimensions.map((d) =>
    _.merge(d, placeholderMeasures),
  );

  const newRecords = Array.from(records);
  missingDates.forEach((dt) => {
    placeholderRows.forEach((placeholderRow) => {
      newRecords.push({
        ...placeholderRow,
        [dimension]: dt,
        [`${dimension}.${granularity}`]: dt,
      } as (typeof records)[number]);
    });
  });
  return newRecords as TidyQueryRecord<TQuery>[];
}

function getGranularityStep(granularity: TimeDimensionGranularity): Duration {
  return { [`${granularity}s`]: 1 };
}

/**
 * When a cubejs query uses compareDateRange in a time dimension a
 * number of ResultSet methods are disallowed. In order to access the
 * raw data and query the result set must be decomposed which provides
 * multiple ResultSets for each date range in the query. For convenience
 * this method "recomposes" those ResultSets by appending the rawData
 * together and merging the query.timeDimensions together to provide a
 * single ResultSet for data visualization.
 *
 * @param results result set to decompose and then combine data from
 * @returns Combined rawData from each result in the result set.
 */
function composeCompareDateRangeData<TQuery extends DeeplyReadonly<Query>>(
  results: ResultSet<QueryRecordType<TQuery>>,
) {
  const decomposed = results.decompose() as ResultSet<
    QueryRecordType<TQuery>
  >[];
  let query: Query | undefined;
  let rawData: QueryRecordType<TQuery>[] | undefined;
  decomposed.forEach((result) => {
    if (query === undefined) {
      query = result.query();
    } else {
      query.timeDimensions = (query.timeDimensions ?? [])
        .concat(result.query().timeDimensions ?? [])
        .sort((a, b) => {
          if (a.dimension === b.dimension) {
            return (a.granularity ?? '').localeCompare(b.granularity ?? '');
          }
          return (a.dimension ?? '').localeCompare(b.dimension ?? '');
        })
        .reduce(
          (
            previousDimensions: TimeDimension[],
            current: TimeDimension,
          ): TimeDimension[] => {
            const previous = previousDimensions.pop();
            if (previous === undefined) {
              return [current];
            }
            if (
              previous.dimension === current.dimension &&
              previous.granularity === current.granularity
            ) {
              return [
                ...previousDimensions,
                {
                  ...previous,
                  dateRange: [
                    dateRangeMin([
                      dateRangeMin(previous?.dateRange ?? ''),
                      dateRangeMin(current?.dateRange ?? ''),
                    ]),
                    dateRangeMax([
                      dateRangeMax(previous?.dateRange ?? ''),
                      dateRangeMax(current?.dateRange ?? ''),
                    ]),
                  ],
                },
              ];
            }
            return [...previousDimensions, previous, current];
          },
          [] as TimeDimension[],
        );
    }

    if (rawData === undefined) {
      rawData = result.rawData();
    } else {
      const resultData = result.rawData();
      rawData = rawData.concat(resultData);
    }
  });

  if (rawData === undefined || query === undefined) {
    throw new Error('No data or query was parseable from the query result');
  }
  return { query: query as TQuery, rawData };
}

function dateRangeMin(range: Readonly<TimeDimensionDateRange>) {
  if (typeof range === 'string') {
    return range;
  }
  return range.reduce((curr, prev) => (curr < prev ? curr : prev));
}

function dateRangeMax(range: Readonly<TimeDimensionDateRange>) {
  if (typeof range === 'string') {
    return range;
  }
  return range.reduce((curr, prev) => (curr > prev ? curr : prev));
}

/**
 * Ordering the time dimensions helps highcharts properly render
 * columns when the filters are applied out of order, which causes the data
 * to be returned out of order.
 *
 * This sort makes the assumption that the time value for a record must be
 * the same across different timeDimensions.
 */
function sortByTimeDimension<TQuery extends DeeplyReadonly<Query>>(
  records: TidyQueryRecord<TQuery>[],
  query: TQuery,
): TidyQueryRecord<TQuery>[] {
  const timeDimensions =
    query.timeDimensions?.filter(
      (timeDimension_) => timeDimension_.granularity,
    ) ?? [];

  return records.sort((a, b) => {
    const aTimeDimensions = timeDimensions.filter(
      (timeDimension) => a[timeDimension.dimension as keyof typeof a],
    );
    const bTimeDimensions = timeDimensions.filter(
      (timeDimension) => b[timeDimension.dimension as keyof typeof b],
    );
    if (aTimeDimensions.length > 0 && bTimeDimensions.length > 0) {
      return (
        a[aTimeDimensions[0].dimension as keyof typeof a] -
        b[bTimeDimensions[0].dimension as keyof typeof b]
      );
    }
    return 0;
  });
}
