import peg from 'pegjs';
import SyntacticActionsPlugin from 'pegjs-syntactic-actions';

/*

*CAN=SIG. blah blah
*CAN=SIG. blah blah;
*CAN=SIG. blah blah *
IM,IMP,CAN,CANC,PM,AVV,AVV DIF

*/
const parser = peg.generate(
  `
start = ($((!keyvalue (wchar+ /.))*) keyvalue)* $(remainder)
startunsafe = ($((!keyvalueunsafe (wchar+ /.))*) keyvalueunsafe)* $(remainder)

keyvalue = $(startkey) keyword $(ws* keyvaluesep ws* (valuecounter ws*)?) (persontitle ws+)? value $(endvalue)?

keyvalueunsafe = $(startkey)? keyword $((ws* keyvaluesep ws* / ' '+) (valuecounter ws*)?) (persontitle ws+)? value $(endvalue)?

endvalue = ';'

valuecounter = $(digit+ ws* [°.)-])

keyvaluesep = $([=:])
  
startkey = $([@*#])

simplevalueterminators = $([/()\\\\+&;:=\\[\\]\\n,])
immediatevalueterminators = $(simplevalueterminators)
separatedvalueterminators = $(complexvalueterminators)

freeattribute = $( ('liber'i [aeioAEIO]? / 'lib'i) '.'?)
nonfreeattribute = $( ('detenut'i [aeioAEIO]? / 'det'i) '.'?)
presentattribute = $( ('presente'i / 'pre'i 's'i?) '.'?)
nonpresentattribute = $( ('non'i presentattribute / ('assente'i / 'ass'i) '.'?))
bornattribute = $( 'nat'i [aeioAEIO] (ws+ 'a'i)? ws+)
operatoractivity = $( 'si'i ws+ 'assenta'i / 'avvertit'i [oaieOAIE] )
publicattribute = $( 'd\\'ufficio'i )
trustedattribute = $( ('di'i ws+)? ('fiducia'i / 'fid'i '.'?) )
lawyerterminator = $( ( 'per'i / 'e'i / 'foro'i / trustedattribute) (ws+ / ':') )
complexvalueterminators = $( freeattribute (ws* ','? ws*) ( presentattribute / nonpresentattribute) / presentattribute / nonpresentattribute / nonfreeattribute / bornattribute / operatoractivity / lawyerterminator / ((impkeyword / canckeyword / pmkeyword / avvkeyword) ws+))

impkeyword = $( ( 'imputat'i [aeioAEIO]? / 'im'i 'p'i? ) (ws* digit+)? '.'? )
canckeyword = $( ( 'cancelliere'i / 'can'i 'c'i? ) '.'? )
pmkeyword = $( 'p'i (ws* '.'?) 'm'i '.'? )
avvtitle = $('avvocat'i ('essa'i / 'esse'i / [aeioAEIO]? ) / 'avv'i (('.'? ws+)? 'ssa'i)?)
plaintiffrole = $( 'pc'i / [pP] ws* '.'? ws* ('civ'i / [cC]) (ws* '.')? )
representationrole = $( ('difes'i [aeioAEIO]? / 'dif'i) '.'? (ws+ 'da'i)? ) &ws
avvkeyword = $(('con'i ws+)? (( avvtitle ('.'? ws+ ('dif'i / 'difesa'i))? / ('dif'i '.'?) (ws+ avvtitle)? ) '.'?) (ws+ (plaintiffrole / trustedattribute / publicattribute ) '.'?)*)
operatortoken = $( 'operatore'i / 'op'i '.'? )
soundmantoken = $( 'fonico'i / 'fon'i '.'? )
operatorkeyword = $( operatortoken ws* soundmantoken / operatortoken / soundmantoken )
otherkeyword = $(( 'nr'i / 'rg'i 'nr'i? / 'e'i ) '.'?)
keyword = impkeyword / canckeyword / pmkeyword / avvkeyword / operatorkeyword / otherkeyword

sigtitlemale = $( 'signor'i 'i'i? / 'sig'i '.'?)
sigtitlefemale = $( 'signor'i [aeAE]? / 'sig'i '.'? (ws*  'ra'i))
dotttitlemale = $( 'dottor'i [eiEI]? / ('dot'i 't'i? / 'dr'i ) '.'?)
dotttitlefemale = $( 'dottoress'i [aeAE]? / ('dot'i 't'i? / 'dr'i ) '.'? (ws*  'ssa'i))
persontitle = sigtitlefemale / sigtitlemale / dotttitlefemale / dotttitlemale

valueexcludepatterns = ([\\n] / endvalue / ws* (startkey keyword / startkey / keyvaluesep / keyword keyvaluesep) / operatoractivity / immediatevalueterminators)
valuetoken = $(!valueexcludepatterns ![ \\t\\r.-] .)+
valuepluscounter = $(ws* '+' ws* digit+)
value = $( (valuetoken / [ \\t\\r.-]+ !separatedvalueterminators valuetoken)* valuepluscounter? )


ws = $([ \\t\\n\\r])
    
integer = $(digit+)
    
digit = $([0-9])
    
word = $(wchar+)
    
wchar = $( latinGlyph / extendedGlyph )
    
latinGlyph = [a-z]i
    
extendedGlyph = [\u00C0-\uFFFF]
    
remainder = $(.*)
`,
  {
    allowedStartRules: ['start', 'startunsafe'],
    plugins: [new SyntacticActionsPlugin()],
  }
);

function processKeyValue(node) {
  if ('children' in node) {
    const value = {};
    node.children.forEach((item) => {
      if (item.rule === 'keyword' && item.text !== undefined) {
        const key = { text: item.text };
        if (Array.isArray(item.children) && item.children.length > 0) {
          const { rule } = item.children[0];
          if (rule === 'impkeyword') {
            key.type = 'defendant';
          } else if (rule === 'pmkeyword') {
            key.type = 'prosecutor';
          } else if (rule === 'canckeyword') {
            key.type = 'chancellor';
          } else if (rule === 'avvkeyword') {
            key.type = 'lawyer';
          } else if (rule === 'operatorkeyword') {
            key.type = 'operator';
          } else if (rule === 'otherkeyword') {
            key.type = 'other';
          }
        }
        value.key = key;
      }
      if (item.rule === 'value' && item.text !== undefined) {
        value.value = item.text;
      }
      if (item.rule === 'persontitle' && item.text !== undefined) {
        const persontitle = { text: item.text };
        if (Array.isArray(item.children) && item.children.length > 0) {
          persontitle.type = item.children[0].rule;
          const { rule } = item.children[0];
          if (rule.endsWith('female')) {
            persontitle.gender = 'female';
          } else if (rule.endsWith('male')) {
            persontitle.gender = 'male';
          }
          if (rule.startsWith('dott')) {
            persontitle.title = 'dr';
          } else if (rule.startsWith('sig')) {
            persontitle.title = 'mr';
          }
        }
        value.persontitle = persontitle;
      }
    });
    if (value.key && value.key.type === 'other') {
      return node.text;
    }
    const result = { ...node, item: value };
    delete result.children;
    return result;
  }
  return node;
}

function processStart(node, skipStrings = false) {
  if (!('children' in node)) {
    if (skipStrings) return [];
    return [node.text];
  }
  return node.children;
}

function refactorTree(node, skipStrings = true) {
  if (node === null || node === undefined || node === '') {
    return null;
  }
  if (typeof node === 'string' || node instanceof String) {
    if (skipStrings) {
      return null;
    }
    return node;
  }
  if (node instanceof Array) {
    const values = node
      .flat(2)
      .map((x) => refactorTree(x, skipStrings))
      .filter((x) => x !== null);
    return values;
  }
  if ('children' in node) {
    let children = refactorTree(node.children, skipStrings);
    if (!(children instanceof Array)) {
      if (children !== null) {
        children = [children];
      } else {
        children = [];
      }
    }
    let result = { ...node, children };
    if (result.rule === 'keyvalue' || result.rule === 'keyvalueunsafe') {
      result = processKeyValue(result);
      delete result.rule;
      if (!result.item || !result.item.value) {
        if (skipStrings) return null;
        result = result.text;
      }
    }
    if (result.rule === 'start' || result.rule === 'startunsafe') {
      result = processStart(result, skipStrings);
    }
    return result;
  }
  return node;
}

function basicParse(text, skipStrings = false, unsafe = false) {
  try {
    const parsedTree = parser.parse(text, unsafe ? { startRule: 'startunsafe' } : {});
    return refactorTree(parsedTree, skipStrings).filter((node) => !(skipStrings && (typeof node === 'string' || node instanceof String)));
  } catch (error) {
    if (skipStrings) return [];
    return [text];
  }
}

function parseKeyValues(string, skipStrings = true, unsafe = false) {
  return basicParse(string, skipStrings, unsafe);
}

export { parseKeyValues };

export default parseKeyValues;
