package com.pidorashque;

import com.google.common.collect.ArrayTable;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

public class App 
{
    public static void main( String[] args ) throws IOException {
        final Document doc = Jsoup.parse(new URL("http://p...content-available-to-author-only...n.com/raw.php?i=E7BGzemF"), 10000);
        final Elements rows = doc.select("table tr");

        final int rowCount = 5;
        final List<Element> rows1 = rows.subList(1, rows.size());
        final List<ArrayTable<Integer, String, String>> tables = IntStream.range(0, rows1.size() / rowCount)
                .mapToObj(i -> {
                    final int start = i * rowCount;
                    final List<Element> subRows = rows1.subList(start, start + rowCount);

                    final List<String> columnKeys = subRows.get(0).children().stream()
                            .filter(e -> e.tagName().equals("td") && !e.attr("colspan").equals("5"))
                            .map(Element::text)
                            .filter(s -> !s.matches("\\W+"))
                            .distinct()
                            .collect(Collectors.toList());

                    final ArrayTable<Integer, String, String> table = ArrayTable.create(IntStream.range(1, rowCount).boxed().collect(Collectors.toList()), columnKeys);

                    IntStream.range(1, subRows.size()).forEach(r -> {
                        final List<Element> columns = subRows.get(r).children().stream()
                                .filter(e -> e.tagName().equals("td") && !e.attr("colspan").equals("5") && !e.text().matches("\\W+"))
                                .limit(columnKeys.size())
                                .collect(Collectors.toList());
                        IntStream.range(0, columns.size()).forEach(c -> table.set(r - 1, c, columns.get(c).text()));
                    });
                    return table;
                })
                .collect(Collectors.toList());

        System.out.println(tables);
    }
}
