import removeSubstring from '@lib/utilities/text/removeSubstring'
import { STAGES_OF_DISEASE } from '@modules/trials/constants'
import uniq from 'lodash/uniq'

export default function getSearchableConditions({
  additionalExclusions = [],
  conditionAncestors,
  conditions = [],
  searchCondition,
}: {
  additionalExclusions?: string[]
  conditionAncestors: string[]
  conditions?: string[]
  searchCondition?: string
}) {
  // We leave the primary condition out of the exclusion list filter
  // because otherwise some trials may be entirely hidden from search
  const filteredAdditionalConditions = conditionAncestors.filter(
    (c) => !containsExcludedSubstring(c),
  )

  const allConditions = [...conditions, searchCondition].filter(
    Boolean,
  ) as string[]

  return uniq([...allConditions, ...filteredAdditionalConditions])
    .map(removeStageOfDiseaseLabels)
    .map(removeParenthesesContent)
    .map(removeAcjjVersions)
    .map((condition) => removeSubstring(condition, additionalExclusions))
    .filter(Boolean)
}

// Note: we may change this to pull from a file or table instead
// https://linear.app/withpower/issue/POW-2196/add-exclusion-list-for-substrings-to-avoid-pushing-to-searchconditions
const EXCLUDED_SUBSTRINGS: string[] = ['Prostatic', 'Neoplasms']

function containsExcludedSubstring(condition: string) {
  return EXCLUDED_SUBSTRINGS.some((s) =>
    condition.toLowerCase().includes(s.toLowerCase()),
  )
}

/**
 * Recursively remove all parentheses and their contents from the condition string
 */
function removeParenthesesContent(condition: string) {
  let result = condition
  while (result !== (result = result.replace(/\s*\([^()]*\)/g, '')));
  return result
}

function removeStageOfDiseaseLabels(condition: string) {
  // Add some additional labels that are common
  // ex. Stage II <> Stage IIA <> Stage IIB
  const removableLabels = STAGES_OF_DISEASE.concat(
    STAGES_OF_DISEASE.filter((l) => l.includes('Stage')).flatMap((l) => [
      l + 'A',
      l + 'B',
    ]),
  )
  // Remove stage of disease labels
  // ex. Stage II Breast Cancer -> Breast Cancer
  return removeSubstring(
    condition,
    // Remove the longest substrings first or else we may miss some characters
    // ex. Stage II Breast Cancer -> removes 'Stage I' -> I Breast Cancer
    removableLabels.sort((a, b) => b.length - a.length),
  )
}

// Removes the 'ACJJ' version of a condition
// Eg. 'Breast Cancer, ACJJ v8' -> 'Breast Cancer'
// Eg. 'Breast Cancer - ACJJ v8 or v9' -> 'Breast Cancer'
function removeAcjjVersions(condition: string) {
  const acjjRegexTerms = [/AJCC\sv\d\s(or|and)\sv\d/, /AJCC\sv\d/]
  const removableAcjjRegexTerms = acjjRegexTerms.map(addSeparatorRegex)
  return removeSubstring(condition, removableAcjjRegexTerms, false)
}

// Captures the separator character before and including the condition
// Eg. 'Recurrant' matches 'Recurrant', '- Recurrant', ', Recurrant'
function addSeparatorRegex(condition: RegExp | string) {
  const separatorRegexString = '(?:[-,]\\s?)?'
  if (typeof condition === 'string') {
    return new RegExp(`(${separatorRegexString}(${condition}))`)
  } else {
    return new RegExp(`(${separatorRegexString}(${condition.source}))`)
  }
}
