{"version":3,"sources":["../../lib/Tabletojson.ts"],"sourcesContent":["import * as cheerio from 'cheerio';\nimport {CallbackFunction, TableToJsonOptions} from './index';\n\nexport class Tabletojson {\n static convert(\n html: string,\n options: TableToJsonOptions = {\n useFirstRowForHeadings: false,\n stripHtmlFromHeadings: true,\n stripHtmlFromCells: true,\n stripHtml: null,\n forceIndexAsNumber: false,\n countDuplicateHeadings: true,\n ignoreColumns: null,\n onlyColumns: null,\n ignoreHiddenRows: true,\n headings: null,\n containsClasses: null,\n id: null,\n limitrows: null,\n },\n ): any[] {\n options = Object.assign(\n {\n useFirstRowForHeadings: false,\n stripHtmlFromHeadings: true,\n stripHtmlFromCells: true,\n stripHtml: null,\n forceIndexAsNumber: false,\n countDuplicateHeadings: true,\n ignoreColumns: null,\n onlyColumns: null,\n ignoreHiddenRows: true,\n headings: null,\n containsClasses: null,\n id: null,\n limitrows: null,\n },\n options,\n );\n\n if (options.stripHtml === true) {\n options.stripHtmlFromHeadings = true;\n options.stripHtmlFromCells = true;\n } else if (options.stripHtml === false) {\n options.stripHtmlFromHeadings = false;\n options.stripHtmlFromCells = false;\n }\n\n const jsonResponse: any[] = [];\n let suffix;\n\n const $ = cheerio.load(html);\n\n let additionalSelectors = options.containsClasses ? `.${options.containsClasses.join('.')}` : '';\n additionalSelectors = options.id ? `${additionalSelectors}#${options.id}` : '';\n\n $(`table${additionalSelectors}`).each((_i, table) => {\n const tableAsJson: any[] = [];\n const alreadySeen: any = {};\n // Get column headings\n // @fixme Doesn't support vertical column headings.\n // @todo Try to support badly formated tables.\n const columnHeadings: string[] = [];\n\n let trs: cheerio.Cheerio = $(table).find('tr');\n\n if (options.useFirstRowForHeadings) {\n trs = $(trs[0]);\n }\n let headingsCounter: number = 0;\n // Use headings for objects key evaluation\n trs.each((_index: number, row: cheerio.Element) => {\n const cells: cheerio.Cheerio = options.useFirstRowForHeadings\n ? $(row).find('td, th')\n : $(row).find('th');\n cells.each((j: number, cell: cheerio.Element) => {\n if (options.onlyColumns && !options.onlyColumns.includes(j)) return;\n if (options.ignoreColumns && !options.onlyColumns && options.ignoreColumns.includes(j)) return;\n let value: string = '';\n\n if (options.headings) {\n value = options.headings[headingsCounter++];\n } else {\n const cheerioCell: cheerio.Cheerio = $(cell);\n const cheerioCellText: string = cheerioCell.text();\n const cheerioCellHtml: string | null = cheerioCell.html();\n\n value = options.stripHtmlFromHeadings\n ? cheerioCellText.trim()\n : cheerioCellHtml\n ? cheerioCellHtml.trim()\n : '';\n }\n\n const seen: any = alreadySeen[value];\n if (seen && options.countDuplicateHeadings) {\n suffix = ++alreadySeen[value];\n columnHeadings[j] = value !== '' ? `${value}_${suffix}` : `${j}`;\n } else {\n alreadySeen[value] = 1;\n columnHeadings[j] = value;\n }\n });\n });\n\n let rowspans: any[] = [];\n\n // Fetch each row\n $(table)\n .find('tr')\n .each(function (i, row) {\n const rowAsJson: any = {};\n\n function setColumn(j: number, content: string) {\n if (columnHeadings[j] && !options.forceIndexAsNumber) {\n rowAsJson[columnHeadings[j]] = content;\n } else {\n rowAsJson[j] = content;\n }\n }\n\n // Add content from rowspans\n rowspans.forEach((rowspan: any, index: number) => {\n if (!rowspan) return;\n\n setColumn(index, rowspan.content);\n\n rowspan.value--;\n });\n const nextrowspans: any[] = [...rowspans];\n\n const cells: cheerio.Cheerio = options.useFirstRowForHeadings\n ? $(row).find('td, th')\n : $(row).find('td');\n cells.each((j: number, cell: cheerio.Element) => {\n // ignoreHiddenRows\n if (options.ignoreHiddenRows) {\n const style: string | undefined = $(row).attr('style');\n if (style) {\n const m = style.match(/.*display.*:.*none.*/g);\n if (m && m.length > 0) return;\n }\n }\n\n // Apply rowspans offsets\n let aux: number = j;\n j = 0;\n do {\n while (rowspans[j]) j++;\n while (aux && !rowspans[j]) {\n j++;\n aux--;\n }\n } while (aux);\n\n if (options.onlyColumns && !options.onlyColumns.includes(j)) return;\n if (options.ignoreColumns && !options.onlyColumns && options.ignoreColumns.includes(j)) return;\n\n const cheerioCell: cheerio.Cheerio = $(cell);\n const cheerioCellText: string = cheerioCell.text();\n const cheerioCellHtml: string | null = cheerioCell.html();\n const cheerioCellRowspan: string | undefined = cheerioCell.attr('rowspan');\n\n const content: string = options.stripHtmlFromCells\n ? cheerioCellText.trim()\n : cheerioCellHtml\n ? cheerioCellHtml.trim()\n : '';\n\n setColumn(j, content);\n\n // Check rowspan\n const value: number = cheerioCellRowspan ? parseInt(cheerioCellRowspan, 10) - 1 : 0;\n if (value > 0) nextrowspans[j] = {content, value};\n });\n\n rowspans = nextrowspans;\n rowspans.forEach((rowspan: any, index: number) => {\n if (rowspan && rowspan.value === 0) rowspans[index] = null;\n });\n\n // Skip blank rows\n if (JSON.stringify(rowAsJson) !== '{}') tableAsJson.push(rowAsJson);\n\n if (options.limitrows && i === options.limitrows) {\n return false;\n }\n });\n\n // Add the table to the response\n const dataContained: boolean = tableAsJson.length > 0;\n const pushToJsonResult: boolean = Array.isArray(tableAsJson) && dataContained;\n if (!pushToJsonResult) {\n return true;\n }\n jsonResponse.push(tableAsJson);\n });\n\n return jsonResponse;\n }\n\n /**\n * Convert an HTML Page for a given URL\n * @param url URL to be called\n * @param callbackFunctionOrOptions {Object} Options for html conversion\n * @param callbackFunctionOrOptions.useFirstRowForHeadings Use the first row as header [default=false]\n * @param callbackFunctionOrOptions.stripHtmlFromHeadings Strip all HTML from headings [default=true]\n * @param callbackFunctionOrOptions.stripHtmlFromCells Strip HTML from cells [default=true]\n * @param callbackFunctionOrOptions.stripHtml Strip off HTML [default=null] if set true stripHtmlFromHeadings and stripHtmlFromCells will also be true\n * @param callbackFunctionOrOptions.forceIndexAsNumber Force the index to be used as number [default=false]\n * @param callbackFunctionOrOptions.countDuplicateHeadings If given a _ will be added to the duplicate key [default=false]\n * @param callbackFunctionOrOptions.ignoreColumns {Array} Array of column indices to ignored [default=null]\n * @param callbackFunctionOrOptions.onlyColumns {Array} Array of column indices to be used. Overrides ignoreColumn [default=null]\n * @param callbackFunctionOrOptions.ignoreHiddenRows Ignoring hidden rows [default=true]\n * @param callbackFunctionOrOptions.headings {Array} Array of Strings to be used as headings [default=null]\n * @param callbackFunctionOrOptions.headings {Array} Array of classes to find a specific table [default=null]\n * @param callbackFunctionOrOptions.limitrows {Integer} Integer that limits the result of all rows to a given amount of data [default=null]\n * @param callbackFunctionOrOptions.request Options to be passed to request object\n * @param callbackFunction Callback function to be called when the conversion finished\n * @return {Promise<*>} Promise containing the result\n */\n static async convertUrl(\n url: string,\n callbackFunctionOrOptions?: TableToJsonOptions | CallbackFunction,\n callbackFunction?: CallbackFunction,\n ): Promise {\n let options: TableToJsonOptions;\n let callback = null;\n let fetchOptions: RequestInit;\n\n if (\n callbackFunction &&\n typeof callbackFunction === 'function' &&\n typeof callbackFunctionOrOptions === 'object'\n ) {\n // If both options and callback passed\n options = callbackFunctionOrOptions;\n // If you need to pass in options for request (proxy)\n // add them to callbackFunctionOrOptions.request\n fetchOptions = options.fetchOptions || {};\n callback = callbackFunction;\n\n // Use a callback (if passed)\n const result = await fetch(url, fetchOptions);\n const resultMimetype = result.headers.get('content-type');\n if (resultMimetype && !resultMimetype.includes('text/')) {\n throw new Error('Tabletojson can just handle text/** mimetypes');\n }\n return callback.call(this, Tabletojson.convert(await result.text(), options));\n } else if (typeof callbackFunctionOrOptions === 'function') {\n // If only callback passed, invoke with no options\n callback = callbackFunctionOrOptions;\n\n // Use a callback (if passed)\n const result = await fetch(url);\n const resultMimetype = result.headers.get('content-type');\n if (resultMimetype && !resultMimetype.includes('text/')) {\n throw new Error('Tabletojson can just handle text/** mimetypes');\n }\n return callback.call(this, Tabletojson.convert(await result.text()));\n } else {\n // If neither argument is callback, return a promise\n options = callbackFunctionOrOptions || {};\n // If you need to pass in options for request (proxy)\n // add them to callbackFunctionOrOptions.request\n fetchOptions = options.fetchOptions || {};\n const result = await fetch(url);\n const resultMimetype = result.headers.get('content-type');\n if (resultMimetype && !resultMimetype.includes('text/')) {\n throw new Error('Tabletojson can just handle text/** mimetypes');\n }\n return Tabletojson.convert(await result.text(), options);\n }\n }\n}\n"],"names":["cheerio","Tabletojson","convert","html","options","useFirstRowForHeadings","stripHtmlFromHeadings","stripHtmlFromCells","stripHtml","forceIndexAsNumber","countDuplicateHeadings","ignoreColumns","onlyColumns","ignoreHiddenRows","headings","containsClasses","id","limitrows","Object","assign","jsonResponse","suffix","$","load","additionalSelectors","join","each","_i","table","tableAsJson","alreadySeen","columnHeadings","trs","find","headingsCounter","_index","row","cells","j","cell","includes","value","cheerioCell","cheerioCellText","text","cheerioCellHtml","trim","seen","rowspans","i","rowAsJson","setColumn","content","forEach","rowspan","index","nextrowspans","style","attr","m","match","length","aux","cheerioCellRowspan","parseInt","JSON","stringify","push","dataContained","pushToJsonResult","Array","isArray","convertUrl","url","callbackFunctionOrOptions","callbackFunction","callback","fetchOptions","result","fetch","resultMimetype","headers","get","Error","call"],"mappings":"AAAA,YAAYA,aAAa,UAAU;AAGnC,OAAO,MAAMC;IACT,OAAOC,QACHC,IAAY,EACZC,UAA8B;QAC1BC,wBAAwB;QACxBC,uBAAuB;QACvBC,oBAAoB;QACpBC,WAAW;QACXC,oBAAoB;QACpBC,wBAAwB;QACxBC,eAAe;QACfC,aAAa;QACbC,kBAAkB;QAClBC,UAAU;QACVC,iBAAiB;QACjBC,IAAI;QACJC,WAAW;IACf,CAAC,EACI;QACLb,UAAUc,OAAOC,MAAM,CACnB;YACId,wBAAwB;YACxBC,uBAAuB;YACvBC,oBAAoB;YACpBC,WAAW;YACXC,oBAAoB;YACpBC,wBAAwB;YACxBC,eAAe;YACfC,aAAa;YACbC,kBAAkB;YAClBC,UAAU;YACVC,iBAAiB;YACjBC,IAAI;YACJC,WAAW;QACf,GACAb;QAGJ,IAAIA,QAAQI,SAAS,KAAK,MAAM;YAC5BJ,QAAQE,qBAAqB,GAAG;YAChCF,QAAQG,kBAAkB,GAAG;QACjC,OAAO,IAAIH,QAAQI,SAAS,KAAK,OAAO;YACpCJ,QAAQE,qBAAqB,GAAG;YAChCF,QAAQG,kBAAkB,GAAG;QACjC;QAEA,MAAMa,eAAsB,EAAE;QAC9B,IAAIC;QAEJ,MAAMC,IAAItB,QAAQuB,IAAI,CAACpB;QAEvB,IAAIqB,sBAAsBpB,QAAQW,eAAe,GAAG,CAAC,CAAC,EAAEX,QAAQW,eAAe,CAACU,IAAI,CAAC,KAAK,CAAC,GAAG;QAC9FD,sBAAsBpB,QAAQY,EAAE,GAAG,CAAC,EAAEQ,oBAAoB,CAAC,EAAEpB,QAAQY,EAAE,CAAC,CAAC,GAAG;QAE5EM,EAAE,CAAC,KAAK,EAAEE,oBAAoB,CAAC,EAAEE,IAAI,CAAC,CAACC,IAAIC;YACvC,MAAMC,cAAqB,EAAE;YAC7B,MAAMC,cAAmB,CAAC;YAC1B,sBAAsB;YACtB,mDAAmD;YACnD,8CAA8C;YAC9C,MAAMC,iBAA2B,EAAE;YAEnC,IAAIC,MAAuBV,EAAEM,OAAOK,IAAI,CAAC;YAEzC,IAAI7B,QAAQC,sBAAsB,EAAE;gBAChC2B,MAAMV,EAAEU,GAAG,CAAC,EAAE;YAClB;YACA,IAAIE,kBAA0B;YAC9B,0CAA0C;YAC1CF,IAAIN,IAAI,CAAC,CAACS,QAAgBC;gBACtB,MAAMC,QAAyBjC,QAAQC,sBAAsB,GACvDiB,EAAEc,KAAKH,IAAI,CAAC,YACZX,EAAEc,KAAKH,IAAI,CAAC;gBAClBI,MAAMX,IAAI,CAAC,CAACY,GAAWC;oBACnB,IAAInC,QAAQQ,WAAW,IAAI,CAACR,QAAQQ,WAAW,CAAC4B,QAAQ,CAACF,IAAI;oBAC7D,IAAIlC,QAAQO,aAAa,IAAI,CAACP,QAAQQ,WAAW,IAAIR,QAAQO,aAAa,CAAC6B,QAAQ,CAACF,IAAI;oBACxF,IAAIG,QAAgB;oBAEpB,IAAIrC,QAAQU,QAAQ,EAAE;wBAClB2B,QAAQrC,QAAQU,QAAQ,CAACoB,kBAAkB;oBAC/C,OAAO;wBACH,MAAMQ,cAA+BpB,EAAEiB;wBACvC,MAAMI,kBAA0BD,YAAYE,IAAI;wBAChD,MAAMC,kBAAiCH,YAAYvC,IAAI;wBAEvDsC,QAAQrC,QAAQE,qBAAqB,GAC/BqC,gBAAgBG,IAAI,KACpBD,kBACAA,gBAAgBC,IAAI,KACpB;oBACV;oBAEA,MAAMC,OAAYjB,WAAW,CAACW,MAAM;oBACpC,IAAIM,QAAQ3C,QAAQM,sBAAsB,EAAE;wBACxCW,SAAS,EAAES,WAAW,CAACW,MAAM;wBAC7BV,cAAc,CAACO,EAAE,GAAGG,UAAU,KAAK,CAAC,EAAEA,MAAM,CAAC,EAAEpB,OAAO,CAAC,GAAG,CAAC,EAAEiB,EAAE,CAAC;oBACpE,OAAO;wBACHR,WAAW,CAACW,MAAM,GAAG;wBACrBV,cAAc,CAACO,EAAE,GAAGG;oBACxB;gBACJ;YACJ;YAEA,IAAIO,WAAkB,EAAE;YAExB,iBAAiB;YACjB1B,EAAEM,OACGK,IAAI,CAAC,MACLP,IAAI,CAAC,SAAUuB,CAAC,EAAEb,GAAG;gBAClB,MAAMc,YAAiB,CAAC;gBAExB,SAASC,UAAUb,CAAS,EAAEc,OAAe;oBACzC,IAAIrB,cAAc,CAACO,EAAE,IAAI,CAAClC,QAAQK,kBAAkB,EAAE;wBAClDyC,SAAS,CAACnB,cAAc,CAACO,EAAE,CAAC,GAAGc;oBACnC,OAAO;wBACHF,SAAS,CAACZ,EAAE,GAAGc;oBACnB;gBACJ;gBAEA,4BAA4B;gBAC5BJ,SAASK,OAAO,CAAC,CAACC,SAAcC;oBAC5B,IAAI,CAACD,SAAS;oBAEdH,UAAUI,OAAOD,QAAQF,OAAO;oBAEhCE,QAAQb,KAAK;gBACjB;gBACA,MAAMe,eAAsB;uBAAIR;iBAAS;gBAEzC,MAAMX,QAAyBjC,QAAQC,sBAAsB,GACvDiB,EAAEc,KAAKH,IAAI,CAAC,YACZX,EAAEc,KAAKH,IAAI,CAAC;gBAClBI,MAAMX,IAAI,CAAC,CAACY,GAAWC;oBACnB,mBAAmB;oBACnB,IAAInC,QAAQS,gBAAgB,EAAE;wBAC1B,MAAM4C,QAA4BnC,EAAEc,KAAKsB,IAAI,CAAC;wBAC9C,IAAID,OAAO;4BACP,MAAME,IAAIF,MAAMG,KAAK,CAAC;4BACtB,IAAID,KAAKA,EAAEE,MAAM,GAAG,GAAG;wBAC3B;oBACJ;oBAEA,yBAAyB;oBACzB,IAAIC,MAAcxB;oBAClBA,IAAI;oBACJ,GAAG;wBACC,MAAOU,QAAQ,CAACV,EAAE,CAAEA;wBACpB,MAAOwB,OAAO,CAACd,QAAQ,CAACV,EAAE,CAAE;4BACxBA;4BACAwB;wBACJ;oBACJ,QAASA,IAAK;oBAEd,IAAI1D,QAAQQ,WAAW,IAAI,CAACR,QAAQQ,WAAW,CAAC4B,QAAQ,CAACF,IAAI;oBAC7D,IAAIlC,QAAQO,aAAa,IAAI,CAACP,QAAQQ,WAAW,IAAIR,QAAQO,aAAa,CAAC6B,QAAQ,CAACF,IAAI;oBAExF,MAAMI,cAA+BpB,EAAEiB;oBACvC,MAAMI,kBAA0BD,YAAYE,IAAI;oBAChD,MAAMC,kBAAiCH,YAAYvC,IAAI;oBACvD,MAAM4D,qBAAyCrB,YAAYgB,IAAI,CAAC;oBAEhE,MAAMN,UAAkBhD,QAAQG,kBAAkB,GAC5CoC,gBAAgBG,IAAI,KACpBD,kBACAA,gBAAgBC,IAAI,KACpB;oBAENK,UAAUb,GAAGc;oBAEb,gBAAgB;oBAChB,MAAMX,QAAgBsB,qBAAqBC,SAASD,oBAAoB,MAAM,IAAI;oBAClF,IAAItB,QAAQ,GAAGe,YAAY,CAAClB,EAAE,GAAG;wBAACc;wBAASX;oBAAK;gBACpD;gBAEAO,WAAWQ;gBACXR,SAASK,OAAO,CAAC,CAACC,SAAcC;oBAC5B,IAAID,WAAWA,QAAQb,KAAK,KAAK,GAAGO,QAAQ,CAACO,MAAM,GAAG;gBAC1D;gBAEA,kBAAkB;gBAClB,IAAIU,KAAKC,SAAS,CAAChB,eAAe,MAAMrB,YAAYsC,IAAI,CAACjB;gBAEzD,IAAI9C,QAAQa,SAAS,IAAIgC,MAAM7C,QAAQa,SAAS,EAAE;oBAC9C,OAAO;gBACX;YACJ;YAEJ,gCAAgC;YAChC,MAAMmD,gBAAyBvC,YAAYgC,MAAM,GAAG;YACpD,MAAMQ,mBAA4BC,MAAMC,OAAO,CAAC1C,gBAAgBuC;YAChE,IAAI,CAACC,kBAAkB;gBACnB,OAAO;YACX;YACAjD,aAAa+C,IAAI,CAACtC;QACtB;QAEA,OAAOT;IACX;IAEA;;;;;;;;;;;;;;;;;;;KAmBC,GACD,aAAaoD,WACTC,GAAW,EACXC,yBAAiE,EACjEC,gBAAmC,EACvB;QACZ,IAAIvE;QACJ,IAAIwE,WAAW;QACf,IAAIC;QAEJ,IACIF,oBACA,OAAOA,qBAAqB,cAC5B,OAAOD,8BAA8B,UACvC;YACE,sCAAsC;YACtCtE,UAAUsE;YACV,qDAAqD;YACrD,gDAAgD;YAChDG,eAAezE,QAAQyE,YAAY,IAAI,CAAC;YACxCD,WAAWD;YAEX,6BAA6B;YAC7B,MAAMG,SAAS,MAAMC,MAAMN,KAAKI;YAChC,MAAMG,iBAAiBF,OAAOG,OAAO,CAACC,GAAG,CAAC;YAC1C,IAAIF,kBAAkB,CAACA,eAAexC,QAAQ,CAAC,UAAU;gBACrD,MAAM,IAAI2C,MAAM;YACpB;YACA,OAAOP,SAASQ,IAAI,CAAC,IAAI,EAAEnF,YAAYC,OAAO,CAAC,MAAM4E,OAAOlC,IAAI,IAAIxC;QACxE,OAAO,IAAI,OAAOsE,8BAA8B,YAAY;YACxD,kDAAkD;YAClDE,WAAWF;YAEX,6BAA6B;YAC7B,MAAMI,SAAS,MAAMC,MAAMN;YAC3B,MAAMO,iBAAiBF,OAAOG,OAAO,CAACC,GAAG,CAAC;YAC1C,IAAIF,kBAAkB,CAACA,eAAexC,QAAQ,CAAC,UAAU;gBACrD,MAAM,IAAI2C,MAAM;YACpB;YACA,OAAOP,SAASQ,IAAI,CAAC,IAAI,EAAEnF,YAAYC,OAAO,CAAC,MAAM4E,OAAOlC,IAAI;QACpE,OAAO;YACH,oDAAoD;YACpDxC,UAAUsE,6BAA6B,CAAC;YACxC,qDAAqD;YACrD,gDAAgD;YAChDG,eAAezE,QAAQyE,YAAY,IAAI,CAAC;YACxC,MAAMC,SAAS,MAAMC,MAAMN;YAC3B,MAAMO,iBAAiBF,OAAOG,OAAO,CAACC,GAAG,CAAC;YAC1C,IAAIF,kBAAkB,CAACA,eAAexC,QAAQ,CAAC,UAAU;gBACrD,MAAM,IAAI2C,MAAM;YACpB;YACA,OAAOlF,YAAYC,OAAO,CAAC,MAAM4E,OAAOlC,IAAI,IAAIxC;QACpD;IACJ;AACJ"}