import { z } from 'zod'

// TODO: Import elsewhere from these const
export const GHG_FLIGHTS_KEY = '__ghg_flights_key'
export const GHG_TRAINS_KEY = '__ghg_trains_key'

const OptionalSpecs = z
  .object({
    cars: z.array(z.string()),
    food: z.array(z.string()),
    paper: z.array(z.string()),
    water: z.array(z.string()),
    energy: z.array(z.string()),
    energy_efficiency_achieved: z.array(z.string()),
    demographics: z.array(z.string()),
    hr: z.array(z.string()),
    // TODO: Removing buildings from validation in spec and dataset
    // until a new spec format consistent withthe other datasets is created.
    // It's inconsistent because it's the only dataset which has the type Record<string, 2D Array>
    // (unlike airports and train_stations which have Record<string, Array>).
    // Currently, we only use the spec for buildings export in /energy.
    // buildings: z.array(z.union([z.string(), z.array(z.string())])),
  })
  .partial()

function missingKeyMessage(key: string) {
  return {
    message: `missing_key`,
    params: {
      key,
    },
  }
}

const RequiredSpecs = z
  .object({
    ghg_flights: z
      .array(z.string())
      .refine(
        (v) => v.includes(GHG_FLIGHTS_KEY),
        missingKeyMessage(GHG_FLIGHTS_KEY)
      ),
    ghg_trains: z
      .array(z.string())
      .refine(
        (v) => v.includes(GHG_TRAINS_KEY),
        missingKeyMessage(GHG_TRAINS_KEY)
      ),
    airports: z.array(z.union([z.string(), z.array(z.string())])),
    train_stations: z.array(z.union([z.string(), z.array(z.string())])),
    flight_legs: z
      .array(z.string())
      .refine(
        (v) => v.includes(GHG_FLIGHTS_KEY),
        missingKeyMessage(GHG_FLIGHTS_KEY)
      ),
    trains: z
      .array(z.string())
      .refine(
        (v) => v.includes(GHG_TRAINS_KEY),
        missingKeyMessage(GHG_TRAINS_KEY)
      ),
  })
  .required()

const GenericDataSchema = z.array(
  z.array(z.union([z.number(), z.string(), z.null(), z.boolean()]))
)
const OptionalProps = z
  .object({
    cars: GenericDataSchema,
    food: GenericDataSchema,
    paper: GenericDataSchema,
    water: GenericDataSchema,
    energy: GenericDataSchema,
    energy_efficiency_achieved: GenericDataSchema,
    demographics: GenericDataSchema,
    hr: GenericDataSchema,
    // See TODO in OptionalSpecs above for buildings.
    // buildings: z.record(
    //   z.string(),
    //   z.array(
    //     z.object({
    //       campus: z.string(),
    //       code: z.string(),
    //       name: z.string(),
    //       location: z.string(),
    //       ownership: z.string(),
    //       'era (m2)': z.number(),
    //     })
    //   )
    // ),
  })
  .partial()

const DataSpecs = RequiredSpecs.merge(OptionalSpecs)

const RequiredProps = z
  .object({
    config: z.object({
      org_units: z.record(z.string()),
      org_colors: z.record(z.string()),
      countries: z.record(z.string()),
      continents: z.record(z.string()),
      home_airport: z.string(),
      home_train_station: z.string(),
      data_specs: DataSpecs,
    }),
    ghg_flights: z.array(z.array(z.union([z.number(), z.string()]))),
    ghg_trains: z.array(z.array(z.number())),
    airports: z.record(z.array(z.union([z.string(), z.number()]))),
    train_stations: z.record(z.array(z.union([z.string(), z.number()]))),
    flight_legs: z.array(
      z.array(z.union([z.string(), z.boolean(), z.null(), z.number()]))
    ),
    trains: z.array(
      z.array(z.union([z.string(), z.boolean(), z.null(), z.number()]))
    ),
  })
  .required()

const DataSchema = RequiredProps.merge(OptionalProps)

export type TData = z.infer<typeof DataSchema>

export function validateData(data: Record<string, unknown>) {
  const parsed = DataSchema.parse(data)

  validateDataSpecsLengthWithDatasetLength(parsed)
}

type TSpecsKey = keyof TData['config']['data_specs']
type TConfigEntry = {
  name: TSpecsKey
  length: number
}

function validateDataSpecsLengthWithDatasetLength(data: TData) {
  const dataSpecEntries = Object.entries(data.config.data_specs) as Array<
    [TSpecsKey, TData['config']['data_specs'][TSpecsKey]]
  >

  const config = dataSpecEntries.reduce((all, current) => {
    const [k, v] = current

    // 1 should be enough, there's no data nested in 2 levels so far
    const length = Array.isArray(v) ? v.flat(2).length : 0

    return [
      ...all,
      {
        name: k,
        length,
      },
    ]
  }, [] as Array<TConfigEntry>)

  const keysToSkip = ['config']
  const dataKeys = Object.keys(data).filter(
    (k) => !keysToSkip.includes(k)
  ) as Array<keyof TData>

  const specsMatch = config.map((spec) => {
    const { name, length } = spec

    if (!dataKeys.includes(name)) {
      return null
    }

    const dataset = data[name]

    if (Array.isArray(dataset)) {
      const validLength = dataset.every((item) => item.length === length)

      // Return spec to use in error message if invalid data
      return validLength ? null : spec
    }

    // These data types are records; only match keys
    // (data type is validated by Zod)
    if (['airports', 'train_stations'].includes(name)) {
      const validLength = Object.values(dataset ?? {}).every(
        (item) => item.length === length - 1
        // -1 because we flattened the data specs above, and don't want to count the travelStopIdentifier
      )

      return validLength ? null : spec
    }

    return null
  })

  const filteredInvalid = specsMatch.filter(
    (match): match is TConfigEntry => match !== null
  )

  if (filteredInvalid.length) {
    const issues = filteredInvalid.map((spec) => ({
      code: 'custom' as const,
      path: ['config', spec.name],
      message: `Mismatch in config.data_specs.${spec.name} and ${spec.name} length, expected ${spec.length}`,
    }))

    throw new z.ZodError(issues)
  }
}
