export enum PlainTextTypes {
  Eml = '.eml',
  Html = '.html',
  Json = '.json',
  Md = '.md',
  Msg = '.msg',
  Rst = '.rst',
  Rtf = '.rtf',
  Txt = '.txt',
  Xml = '.xml'
}

export enum ImageTypes {
  Jpeg = '.jpeg',
  Png = '.png'
}

export enum DocumentTypes {
  Csv = '.csv',
  Doc = '.doc',
  Docx = '.docx',
  Epub = '.epub',
  Odt = '.odt',
  Pdf = '.pdf',
  Ppt = '.ppt',
  Pptx = '.pptx',
  Tsv = '.tsv',
  Xlsx = '.xlsx'
}

export const SupportedFileTypes = {
  ...PlainTextTypes,
  ...ImageTypes,
  ...DocumentTypes
} as const;

export const SUPPORTED_EXTENSIONS = Object.values(SupportedFileTypes);

/**
 * All properties are optional,
 * https://unstructured-io.github.io/unstructured/api.html
 */
export type ApiParams = Partial<{
  coordinates: boolean;
  encoding: string; // utf_8 is default if not provided
  ocr_languages: string; // eng, kor, etc
  output_format: 'json' | 'text/csv'; // json is default if not provided,
  include_page_breaks: boolean;
  /* available for processing PDF/Images files */
  strategy: 'hi_res' | 'fast' | 'ocr_only' | 'auto'; // fast is default if not provided,
  /* To extract the table structure from PDF files using the hi_res strategy */
  pdf_infer_table_structure: boolean;
  /**
   * By default, it skips table extraction for PDFs Images, and Excel files which are pdf, jpg, png, xlsx, and xls.
   * Table extraction only works with hi_res strategy
   */
  skip_infer_table_types: typeof SUPPORTED_EXTENSIONS;
  xml_keep_tags: boolean;
}>;
