import { ArticleMetadata } from '@core/domain/models/reviewItemMetadata/articleMetadata'
import { RegistrationMetadata } from '@core/domain/models/reviewItemMetadata/registrationMetadata'

export function parseRISText<T extends ArticleMetadata & RegistrationMetadata>(
  content: string,
): T[] {
  content = content.replace(/\r\n|\xa0/gm, '\n')
  const studyRegex = /(.*?)ER\s{2}-/gs
  const rawStudies = content.match(studyRegex)
  const studies: T[] = []
  rawStudies?.forEach((a: string) => {
    const potentialPdfUrlRegex =
      /(?<=(?:L[12]\s{2}-\s){1,10})(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const volumeNumberRegex = /(?<=VL\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const startPageRegex = /(?<=SP\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const endPageRegex = /(?<=EP\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const issueNumberRegex = /(?<=IS\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const abstractRegex = /(?<=(?:AB|N2)\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const titleRegex = /(?<=(?:TI|T1)\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const authorsRegex = /(?<=(?:A1|AU)\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const doiRegex = /(?<=DO\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const publishYearRegex =
      /(?<=(?:Y1|PY|YR)\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const urlRegex = /(?<=(?:UR|SO)\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const journalRegex = /(?<=JO\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const pmidRegex = /U1\s{2}-\s(\d+)/gm
    const registrationIdRegex = /(?<=ID\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs
    const registrationDateRegex = /(?<=DP\s{2}-\s)(.*?)(?=\n[A-Z\d]*?\s+-)/gs

    const registrationId = a?.match(registrationIdRegex)?.[0].trim() ?? ''
    const registrationDate = a?.match(registrationDateRegex)?.[0].trim() ?? ''
    const pmidMatch = pmidRegex.exec(a)
    const pmid = pmidMatch ? pmidMatch[1] : undefined
    const volumeNumber = a?.match(volumeNumberRegex)?.[0].trim() ?? ''
    const issueNumber = a?.match(issueNumberRegex)?.[0].trim() ?? ''
    const abstract =
      a?.match(abstractRegex)?.[0].replace(/\s+/g, ' ').trim() ?? ''
    const title = a?.match(titleRegex)?.[0].replace(/\s+/g, ' ').trim() ?? ''
    const journal =
      a?.match(journalRegex)?.[0].replace(/\s+/g, ' ').trim() ?? ''
    const doi = a?.match(doiRegex)?.[0].trim() ?? ''
    const url = a?.match(urlRegex)?.[0].trim() ?? ''
    const publishYear =
      a?.match(publishYearRegex)?.[0].trim().substring(0, 4) ?? ''
    const authors = []
    const startPage = a?.match(startPageRegex)?.[0].trim() ?? ''
    const endPage = a?.match(endPageRegex)?.[0].trim() ?? ''
    const pagesNumber =
      startPage + (startPage !== '' && endPage !== '' ? '-' : '') + endPage
    let authorsMatches
    while ((authorsMatches = authorsRegex.exec(a)) !== null) {
      if (authorsMatches.index === authorsRegex.lastIndex) {
        authorsRegex.lastIndex++
      }
      const author = authorsMatches[1].trim()

      if (author.includes(';')) {
        author.split(';').forEach((a) => authors.push(a.trim()))
      } else if (author.includes(',')) {
        const parts = author.split(',')
        if (parts.length === 2) {
          authors.push(author.trim().replace(',', ' '))
        } else {
          parts.forEach((a) => authors.push(a.trim()))
        }
      } else {
        authors.push(author)
      }
    }
    const potentialPdfUrl =
      a?.match(potentialPdfUrlRegex)?.[0].trim() ?? undefined

    studies.push({
      pagesNumber,
      issueNumber,
      volumeNumber,
      title,
      abstract,
      authors,
      doi,
      url,
      publishYear,
      rawData: a,
      journal,
      pmid,
      potentialPdfUrl,
      registrationDate,
      registrationId,
    } as unknown as T)
  })

  return studies
}
