'use strict';
import fs from 'fs';
import path from 'path';
import _ from 'lodash';
import {Tabletojson as tabletojson} from '../lib/Tabletojson';
describe('TableToJSON Local', function () {
let html = '';
let noTables = '';
beforeAll(() => {
html = fs.readFileSync(path.resolve(process.cwd(), 'test/tables.html'), 'utf8');
noTables = fs.readFileSync(path.resolve(process.cwd(), 'test/notables.html'), 'utf8');
});
it('Options: Strip HTML from header AND from body', async function () {
const converted = tabletojson.convert(html, {
stripHtmlFromHeadings: true,
stripHtmlFromCells: true,
});
expect(converted).toBeDefined();
const firstTable = converted[0];
expect(_.has(firstTable[0], 'Age')).toBeTruthy();
expect(firstTable[0].Age).toBe('2');
});
it('Options: Strip HTML from header AND from body using stripHtml-shortcut ', async function () {
const converted = tabletojson.convert(html, {
stripHtml: true,
});
expect(converted).toBeDefined();
const firstTable = converted[0];
expect(_.has(firstTable[0], 'Age')).toBeTruthy();
expect(firstTable[0].Age).toBe('2');
});
it('Options: Strip HTML from header but not from body', async function () {
const converted = tabletojson.convert(html, {
stripHtmlFromHeadings: true,
stripHtmlFromCells: false,
});
expect(converted).toBeDefined();
const firstTable = converted[0];
expect(_.has(firstTable[0], 'Age')).toBeTruthy();
expect(firstTable[0].Age).toBe('2');
});
it('Options: Strip HTML from body but not from header', async function () {
const converted = tabletojson.convert(html, {
stripHtmlFromHeadings: false,
stripHtmlFromCells: true,
});
expect(converted).toBeDefined();
const firstTable = converted[0];
expect(_.has(firstTable[0], 'Age')).toBeTruthy();
expect(firstTable[0]['Age']).toBe('2');
});
// ADDED TO FIX: https://github.com/maugenst/tabletojson/issues/15
it('Double Header Entry: handle double header entries in different tables', async function () {
const converted = tabletojson.convert(html);
expect(converted).toBeDefined();
const firstTable = converted[0];
const secondTable = converted[1];
expect(_.has(firstTable[0], 'Age')).toBeTruthy();
expect(_.has(secondTable[0], 'Age')).toBeTruthy();
});
it('Double Header Entry: handle double header entries', async function () {
const converted = tabletojson.convert(html);
expect(converted).toBeDefined();
const firstTable = converted[0];
expect(_.has(firstTable[0], 'isDumb')).toBeTruthy();
expect(_.has(firstTable[0], 'isDumb_2')).toBeTruthy();
});
it('Directly local html content: Table with header', async function () {
const converted = tabletojson.convert(html);
expect(converted).toBeDefined();
const firstTable = converted[0];
expect(_.has(firstTable[0], 'Dog')).toBeTruthy();
expect(_.has(firstTable[0], 'Race')).toBeTruthy();
expect(_.has(firstTable[0], 'Age')).toBeTruthy();
});
it('Do not strip HTML from header', async function () {
const converted = tabletojson.convert(html, {
stripHtml: false,
});
expect(converted).toBeDefined();
const firstTable = converted[0];
expect(_.has(firstTable[0], 'Dog')).toBeTruthy();
expect(_.has(firstTable[0], 'Race')).toBeTruthy();
expect(_.has(firstTable[0], 'Age')).toBeTruthy();
});
it('Directly passing html content: Table without header', async function () {
const converted = tabletojson.convert(html);
expect(converted).toBeDefined();
const thirdTable = converted[2];
expect(_.has(thirdTable[0], '0')).toBeTruthy();
expect(_.has(thirdTable[0], '1')).toBeTruthy();
expect(_.has(thirdTable[0], '2')).toBeTruthy();
expect(thirdTable[0]['0']).toBe('Dog');
expect(thirdTable[0]['1']).toBe('Race');
expect(thirdTable[0]['2']).toBe('Age');
});
// ADDED TO FIX: https://github.com/maugenst/tabletojson/issues/14
it('Empty header: to be converted into their column count and not to the underline field name', async function () {
const converted = tabletojson.convert(html);
expect(converted).toBeDefined();
const forthTable = converted[3];
expect(_.has(forthTable[0], 'Dog')).toBeTruthy();
expect(_.has(forthTable[0], '1')).toBeTruthy();
expect(_.has(forthTable[0], '2')).toBeTruthy();
expect(_.has(forthTable[0], 'Height')).toBeTruthy();
expect(_.has(forthTable[0], '4')).toBeTruthy();
});
// ADDED TO FIX: https://github.com/maugenst/tabletojson/pull/18
it('Double Header Entry: countDuplicateHeadings:false', async function () {
const converted = tabletojson.convert(html, {
countDuplicateHeadings: false,
});
expect(converted).toBeDefined();
const table = converted[4];
expect(_.has(table[0], 'PLACE')).toBeTruthy();
expect(_.has(table[0], 'VALUE')).toBeTruthy();
expect(_.has(table[0], 'PLACE_2')).toBeFalsy();
expect(_.has(table[0], 'VALUE_2')).toBeFalsy();
expect(_.has(table[1], 'PLACE')).toBeTruthy();
expect(_.has(table[1], 'VALUE')).toBeTruthy();
expect(_.has(table[1], 'PLACE_2')).toBeFalsy();
expect(_.has(table[1], 'VALUE_2')).toBeFalsy();
expect(table[0].PLACE).toBe('def');
expect(table[0].VALUE).toBe('2');
expect(table[1].PLACE).toBe('jkl');
expect(table[1].VALUE).toBe('4');
});
// ADDED TO FIX: https://github.com/maugenst/tabletojson/pull/18
it('Double Header Entry: countDuplicateHeadings:true', async function () {
const converted = tabletojson.convert(html, {
countDuplicateHeadings: true,
});
expect(converted).toBeDefined();
const table = converted[4];
expect(_.has(table[0], 'PLACE')).toBeTruthy();
expect(_.has(table[0], 'VALUE')).toBeTruthy();
expect(_.has(table[0], 'PLACE_2')).toBeTruthy();
expect(_.has(table[0], 'VALUE_2')).toBeTruthy();
expect(_.has(table[1], 'PLACE')).toBeTruthy();
expect(_.has(table[1], 'VALUE')).toBeTruthy();
expect(_.has(table[1], 'PLACE_2')).toBeTruthy();
expect(_.has(table[1], 'VALUE_2')).toBeTruthy();
expect(table[0].PLACE).toBe('abc');
expect(table[0].VALUE).toBe('1');
expect(table[0].PLACE_2).toBe('def');
expect(table[0].VALUE_2).toBe('2');
expect(table[1].PLACE).toBe('ghi');
expect(table[1].VALUE).toBe('3');
expect(table[1].PLACE_2).toBe('jkl');
expect(table[1].VALUE_2).toBe('4');
});
// FEATURE 'ignoreColumns'
it('Option: ignoreColumns: [2, 3]', async function () {
const converted = tabletojson.convert(html, {
ignoreColumns: [2, 3],
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'NAME')).toBeTruthy();
expect(_.has(table[0], 'PLACE')).toBeTruthy();
expect(_.has(table[0], 'WEIGHT')).toBeFalsy();
expect(_.has(table[0], 'SEX')).toBeFalsy();
expect(_.has(table[0], 'AGE')).toBeTruthy();
expect(table[0].NAME).toBe('Mel');
expect(table[0].PLACE).toBe('1');
expect(table[0].AGE).toBe('23');
expect(_.has(table[1], 'NAME')).toBeTruthy();
expect(_.has(table[1], 'PLACE')).toBeTruthy();
expect(_.has(table[1], 'WEIGHT')).toBeFalsy();
expect(_.has(table[1], 'SEX')).toBeFalsy();
expect(_.has(table[1], 'AGE')).toBeTruthy();
expect(table[1].NAME).toBe('Tom');
expect(table[1].PLACE).toBe('2');
expect(table[1].AGE).toBe('54');
expect(_.has(table[2], 'NAME')).toBeTruthy();
expect(_.has(table[2], 'PLACE')).toBeTruthy();
expect(_.has(table[2], 'WEIGHT')).toBeFalsy();
expect(_.has(table[2], 'SEX')).toBeFalsy();
expect(_.has(table[2], 'AGE')).toBeTruthy();
expect(table[2].NAME).toBe('Bill');
expect(table[2].PLACE).toBe('3');
expect(table[2].AGE).toBe('31');
});
// FEATURE 'onlyColumns'
it('Option: onlyColumns: [0, 4]', async function () {
const converted = tabletojson.convert(html, {
onlyColumns: [0, 4],
ignoreColumns: [2, 4],
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'NAME')).toBeTruthy();
expect(_.has(table[0], 'PLACE')).toBeFalsy();
expect(_.has(table[0], 'WEIGHT')).toBeFalsy();
expect(_.has(table[0], 'SEX')).toBeFalsy();
expect(_.has(table[0], 'AGE')).toBeTruthy();
expect(table[0].NAME).toBe('Mel');
expect(table[0].AGE).toBe('23');
expect(_.has(table[1], 'NAME')).toBeTruthy();
expect(_.has(table[1], 'PLACE')).toBeFalsy();
expect(_.has(table[1], 'WEIGHT')).toBeFalsy();
expect(_.has(table[1], 'SEX')).toBeFalsy();
expect(_.has(table[1], 'AGE')).toBeTruthy();
expect(table[1].NAME).toBe('Tom');
expect(table[1].AGE).toBe('54');
expect(_.has(table[2], 'NAME')).toBeTruthy();
expect(_.has(table[2], 'PLACE')).toBeFalsy();
expect(_.has(table[2], 'WEIGHT')).toBeFalsy();
expect(_.has(table[2], 'SEX')).toBeFalsy();
expect(_.has(table[2], 'AGE')).toBeTruthy();
expect(table[2].NAME).toBe('Bill');
expect(table[2].AGE).toBe('31');
});
// FEATURE 'ignoreHiddenRows:true'
it('Option: ignoreHiddenRows:true', async function () {
const converted = tabletojson.convert(html, {
ignoreHiddenRows: true,
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'NAME')).toBeTruthy();
expect(_.has(table[0], 'PLACE')).toBeTruthy();
expect(_.has(table[0], 'WEIGHT')).toBeTruthy();
expect(_.has(table[0], 'SEX')).toBeTruthy();
expect(_.has(table[0], 'AGE')).toBeTruthy();
expect(table.length).toBe(3);
});
// FEATURE 'ignoreHiddenRows:false'
it('Option: ignoreHiddenRows:false', async function () {
const converted = tabletojson.convert(html, {
ignoreHiddenRows: false,
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'NAME')).toBeTruthy();
expect(_.has(table[0], 'PLACE')).toBeTruthy();
expect(_.has(table[0], 'WEIGHT')).toBeTruthy();
expect(_.has(table[0], 'SEX')).toBeTruthy();
expect(_.has(table[0], 'AGE')).toBeTruthy();
expect(table.length).toBe(4);
});
// FEATURE 'headings: ['A', 'B', 'C', 'D', 'E']'
it('Option: headings: ["A","B","C","D","E"]', async function () {
const converted = tabletojson.convert(html, {
headings: ['A', 'B', 'C', 'D', 'E'],
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'A')).toBeTruthy();
expect(_.has(table[0], 'B')).toBeTruthy();
expect(_.has(table[0], 'C')).toBeTruthy();
expect(_.has(table[0], 'D')).toBeTruthy();
expect(_.has(table[0], 'E')).toBeTruthy();
expect(table.length).toBe(3);
});
// FEATURE 'headings: ['A', 'B', 'C']'
it('Option: headings: ["A","B","C"]', async function () {
const converted = tabletojson.convert(html, {
headings: ['A', 'B', 'C'],
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'A')).toBeTruthy();
expect(_.has(table[0], 'B')).toBeTruthy();
expect(_.has(table[0], 'C')).toBeTruthy();
expect(_.has(table[0], 'D')).toBeFalsy();
expect(_.has(table[0], 'E')).toBeFalsy();
expect(table.length).toBe(3);
});
/**
* | NAME | PLACE | WEIGHT | SEX | AGE |
* | Mel | 1 | 58 | W | 23 |
* | Tom | 2 | 78 | M | 54 |
* | Bill | 3 | 92 | M | 31 |
*/
// FEATURE 'headings: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I']'
it('Option: headings: ["A","B","C","E","E","F","G","H","I"]', async function () {
const converted = tabletojson.convert(html, {
headings: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'A')).toBeTruthy();
expect(_.has(table[0], 'B')).toBeTruthy();
expect(_.has(table[0], 'C')).toBeTruthy();
expect(_.has(table[0], 'D')).toBeTruthy();
expect(_.has(table[0], 'E')).toBeTruthy();
expect(table.length).toBe(3);
expect(table[0].A).toEqual('Mel');
expect(table[0].B).toEqual('1');
expect(table[0].C).toEqual('58');
expect(table[0].D).toEqual('W');
expect(table[0].E).toEqual('23');
expect(table[1].A).toEqual('Tom');
expect(table[1].B).toEqual('2');
expect(table[1].C).toEqual('78');
expect(table[1].D).toEqual('M');
expect(table[1].E).toEqual('54');
expect(table[2].A).toEqual('Bill');
expect(table[2].B).toEqual('3');
expect(table[2].C).toEqual('92');
expect(table[2].D).toEqual('M');
expect(table[2].E).toEqual('31');
});
/**
* | NAME | PLACE | WEIGHT | SEX | AGE |
* | Mel | 1 | 58 | W | 23 |
* | Tom | 2 | 78 | M | 54 |
* | Bill | 3 | 92 | M | 31 |
*/
// FEATURE 'headings: ['A', 'B', 'C'] && ignoreColumns: [1, 2]'
it('Option: headings: ["A","B","C"] && ignoreColumns: [1, 2]', async function () {
const converted = tabletojson.convert(html, {
headings: ['A', 'B', 'C'],
ignoreColumns: [1, 2],
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'A')).toBeTruthy();
expect(_.has(table[0], 'B')).toBeTruthy();
expect(_.has(table[0], 'C')).toBeTruthy();
expect(_.has(table[0], 'D')).toBeFalsy();
expect(_.has(table[0], 'E')).toBeFalsy();
expect(table.length).toBe(3);
expect(table[0].A).toEqual('Mel');
expect(table[0].B).toEqual('W');
expect(table[0].C).toEqual('23');
expect(table[1].A).toEqual('Tom');
expect(table[1].B).toEqual('M');
expect(table[1].C).toEqual('54');
expect(table[2].A).toEqual('Bill');
expect(table[2].B).toEqual('M');
expect(table[2].C).toEqual('31');
});
/**
* | NAME | PLACE | WEIGHT | SEX | AGE |
* | Mel | 1 | 58 | W | 23 |
* | Tom | 2 | 78 | M | 54 |
* | Bill | 3 | 92 | M | 31 |
*/
// FEATURE 'headings: ['A', 'B', 'C'] && ignoreColumns: [1, 2] && onlyColumns: [0, 4]'
it('Option: headings: ["A","B","C"] && ignoreColumns: [1, 2] && onlyColumns: [0, 4]', async function () {
const converted = tabletojson.convert(html, {
headings: ['A', 'B', 'C'],
ignoreColumns: [1, 2],
onlyColumns: [0, 4],
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'A')).toBeTruthy();
expect(_.has(table[0], 'B')).toBeTruthy();
expect(_.has(table[0], 'C')).toBeFalsy();
expect(_.has(table[0], 'D')).toBeFalsy();
expect(_.has(table[0], 'E')).toBeFalsy();
expect(table.length).toBe(3);
expect(table[0].A).toEqual('Mel');
expect(table[0].B).toEqual('23');
expect(table[1].A).toEqual('Tom');
expect(table[1].B).toEqual('54');
expect(table[2].A).toEqual('Bill');
expect(table[2].B).toEqual('31');
});
/**
* | NAME | PLACE | WEIGHT | SEX | AGE |
* | Mel | 1 | 58 | W | 23 |
* | Tom | 2 | 78 | M | 54 |
* | Bill | 3 | 92 | M | 31 |
*/
// FEATURE 'headings: ['A'] && ignoreColumns: [1, 2] && onlyColumns: [0, 4]'
it('Option: headings: ["A"] && ignoreColumns: [1, 2] && onlyColumns: [0, 4]', async function () {
const converted = tabletojson.convert(html, {
headings: ['A'],
ignoreColumns: [1, 2],
onlyColumns: [0, 4],
});
expect(converted).toBeDefined();
const table = converted[5];
expect(_.has(table[0], 'A')).toBeTruthy();
expect(_.has(table[0], 'B')).toBeFalsy();
expect(_.has(table[0], 'C')).toBeFalsy();
expect(_.has(table[0], 'D')).toBeFalsy();
expect(_.has(table[0], 'E')).toBeFalsy();
expect(table.length).toBe(3);
expect(table[0].A).toEqual('Mel');
expect(table[1].A).toEqual('Tom');
expect(table[2].A).toEqual('Bill');
});
// FIX/TEST: https://github.com/maugenst/tabletojson/issues/19
it('Test to check conversion and handling of Kanji, Hiragana, Katakana and latin texts', async function () {
const converted = tabletojson.convert(html);
expect(converted).toBeDefined();
const table = converted[6];
expect(_.has(table[0], 'Kanji')).toBeTruthy();
expect(_.has(table[0], 'Hiragana')).toBeTruthy();
expect(_.has(table[0], 'Katakana')).toBeTruthy();
expect(_.has(table[0], 'Rōmaji')).toBeTruthy();
expect(_.has(table[0], 'English')).toBeTruthy();
expect(table[0].Kanji).toEqual('私');
expect(table[0].Hiragana).toEqual('わたし');
expect(table[0].Katakana).toEqual('ワタシ');
expect(table[0].Rōmaji).toEqual('watashi');
expect(table[0].English).toEqual('I, me');
});
// ENHANCEMENT: https://github.com/maugenst/tabletojson/issues/30
it('limit results to only get a configurable amount of rows', async function () {
let converted = tabletojson.convert(html);
expect(converted).toBeDefined();
let table = converted[9];
expect(table.length).toBe(200);
converted = tabletojson.convert(html, {
limitrows: 5,
});
expect(converted).toBeDefined();
table = converted[9];
expect(table.length).toBe(5);
});
// ENHANCEMENT: Coverage improvement to also cover rowspan tables
// | PARENT | CHILD | AGE |
// | | Sue | 15 |
// | Marry | Steve | 12 |
// | | Tom | 3 |
it('Rowspan usage leads to correct object representation', async function () {
const converted = tabletojson.convert(html, {
id: ['table11'],
});
expect(converted).toBeDefined();
expect(converted.length).toBe(1);
const table = converted[0];
expect(table.length).toBe(3);
expect(_.has(table[0], 'Parent')).toBeTruthy();
expect(table[0].Parent).toBe('Marry');
expect(table[1].Parent).toBe('Marry');
expect(table[2].Parent).toBe('Marry');
});
// ENHANCEMENT: Coverage improvement to also cover complex rowspan tables
// | PARENT | CHILD | AGE |
// +--------+-------+-----+
// | | Sue | 15 |
// + +-------+-----+
// | Marry | Steve | 12 |
// + +-------+-----+
// | | | |
// +--------+ Tom | 3 +
// | | | |
// + Taylor +-------+-----+
// | | Peter | 17 |
// +--------+-------+-----+
it('Complex rowspan usage leads to correct object representation', async function () {
const converted = tabletojson.convert(html, {
id: ['table12'],
});
expect(converted).toBeDefined();
expect(converted.length).toBe(1);
const table = converted[0];
expect(table.length).toBe(5);
expect(_.has(table[0], 'Parent')).toBeTruthy();
expect(table[0].Parent).toBe('Marry');
expect(table[1].Parent).toBe('Marry');
expect(table[2].Parent).toBe('Marry');
expect(table[3].Parent).toBe('Taylor');
expect(table[4].Parent).toBe('Taylor');
expect(table[0].Child).toBe('Sue');
expect(table[1].Child).toBe('Steve');
expect(table[2].Child).toBe('Tom');
expect(table[3].Child).toBe('Tom');
expect(table[4].Child).toBe('Peter');
expect(table[0].Age).toBe('15');
expect(table[1].Age).toBe('12');
expect(table[2].Age).toBe('3');
expect(table[3].Age).toBe('3');
expect(table[4].Age).toBe('17');
});
it('Options: containsClasses', async function () {
const converted = tabletojson.convert(html, {
containsClasses: ['table'],
});
expect(converted).toBeDefined();
const firstTable = converted[0];
expect(_.has(firstTable[0], 'Age')).toBeTruthy();
expect(firstTable[0].Age).toBe('2');
});
it('Options: byId', async function () {
const converted = tabletojson.convert(html, {
id: ['table9'],
});
expect(converted).toBeDefined();
expect(converted.length).toBe(1);
const table = converted[0];
expect(_.has(table[0], 'Age')).toBeTruthy();
expect(table[0].Age).toBe('2');
});
it('Options: useFirstRowForHeadings', async function () {
const converted = tabletojson.convert(html, {
id: ['table13'],
useFirstRowForHeadings: true,
});
expect(converted).toBeDefined();
expect(converted.length).toBe(1);
const table = converted[0];
expect(_.has(table[0], 'Age')).toBeTruthy();
expect(table[0].Dog).toEqual('Dog');
expect(table[0].Race).toEqual('Race');
expect(table[0].Age).toEqual('Age');
expect(table[1].Dog).toEqual('Donald');
expect(table[1].Race).toEqual('Bobtail');
expect(table[1].Age).toEqual('2');
});
it('Converting a table with no content', async function () {
const converted = tabletojson.convert(html, {
id: ['table14'],
});
expect(converted).toBeDefined();
expect(Array.isArray(converted)).toBeTruthy();
expect(converted.length).toBe(0);
});
it('Options: converting an html page with no tables', async function () {
const converted = tabletojson.convert(noTables);
expect(converted).toBeDefined();
expect(converted.length).toBe(0);
});
});