import sanitizeSentence from '@lib/utilities/text/sanitizeSentence'
import uniq from 'lodash/uniq'
import extractInclusionCriteria from './extractInclusionCriteria'

/**
 * This is a utility to pull inclusion and exclusion criteria out of the unstructured string.
 * Generally, criteria pulled from ct.gov is of the form:
 *  'Inclusion criteria\n\n criteria 1\n criteria2\n\n Exclusion criteria \n\n criteria 3'
 * This util file pulls the structured data out of the string
 * @param criteria: an unstructured string that contains inclusion and exclusion criteria
 */
export const parseEligibilityCriteria = (criteria: string) => {
  const inclusions = extractInclusionCriteria(criteria)
  // Criteria are structured by newlines
  const sections = inclusions.split('\n')

  // Take out the "Inclusion criteria:" header
  const filteredSections = sections
    .filter((criteria) => {
      return !(
        criteria.toLowerCase().includes('inclusion criteria') &&
        criteria.length < 20
      )
    })
    // Remove empty lines
    // If a section ends with : or 'as', remove it for now - it can be described by the subsequent criteria
    .filter((section) => section !== '')
    .filter((section) => section.slice(-1) !== ':')
    // TODO: maybe do all of this slicing in one pass O(N)
    .filter((section) => section.split(' ').slice(-1)[0] !== 'as')

  const uniqueCriteria = uniq(joinMultiLineCriteria(filteredSections))
    // tidy up, remove trailing characters
    .map(sanitizeSentence)

  return uniqueCriteria.slice(0, 10)
}

const endsWithOr = (s: string) => /or$/gi.test(s.trim())

const joinMultiLineCriteria = (sections: string[]) => {
  // Join things that end in OR because they are part of the same criteria
  for (let i = 0; i < sections.length; i++) {
    const section = sections[i]
    if (endsWithOr(section!)) {
      for (let j = i + 1; j < sections.length; j++) {
        const anotherSection = sections[j]
        sections[i] += ' ' + anotherSection
        if (!endsWithOr(anotherSection!)) {
          sections[j] += ' or'
          i = j + 1
          break
        }
      }
    }
  }
  return sections.filter((section) => !endsWithOr(section))
}
