<?php

/**
 * Bash.org scrapper.
 *
 * One-liners: SELECT * FROM quotes WHERE quote NOT LIKE '%' || CHAR(13) || '%'
 *
 * @author Adam Klvač <adam@klva.cz>
 */

define('BASH_BROWSE_URL', 'http://bash.org/?browse&p=%d');

libxml_use_internal_errors(true);

$db = new PDO('sqlite:bash.db', null, null, [
    PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
    PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_OBJ,
]);

$version = (int) $db->query('PRAGMA user_version')->fetchColumn();

switch (true) {

    case $version < 1:
        printf("DB migration: 0\n");
        $db->exec('CREATE TABLE quotes (id INTEGER PRIMARY KEY, score INTEGER, quote TEXT);');
        $db->exec('CREATE TABLE config (key TEXT PRIMARY KEY, value TEXT);');
        $db->exec('PRAGMA user_version = 1;');
        break;

}

$insert = $db->prepare("INSERT OR IGNORE INTO quotes (id, score, quote) VALUES (?, ?, ?)");

for ($page = get_page();; set_page($page), $page++) {

    $url = sprintf(BASH_BROWSE_URL, $page);
    printf("GET %s\n", $url);

    $file = file_get_contents($url);

    while (($file = file_get_contents($url)) === false) {
        sleep(60);
    }

    $dom = new DOMDocument;

    if (!$dom->loadHTML(mb_convert_encoding($file, 'ISO-8859-1', 'UTF-8'))) {
        exit(1);
    }

    $xPath = new DOMXPath($dom);

    $quotes = [];

    foreach ($xPath->query('//p[@class="quote"]') as $element) {

        if (!preg_match('~^#(?<id>\d+) \+\((?<score>-?\d+)\)- \[X]$~', $element->nodeValue, $matches)) {
            fprintf(STDERR, "p.quote: preg_match() failed [%s]\n", $element->nodeValue);
            exit(2);
        }

        $quotes[] = [
            'id' =>    (int) $matches['id'],
            'score' => (int) $matches['score'],
        ];

    }

    if (count($quotes) === 0) {
        fprintf(STDERR, "no quotes\n");
        exit(3);
    }

    $i = 0;

    foreach ($xPath->query('//p[@class="qt"]') as $element) {

        if ($i === count($quotes)) {
            fprintf(STDERR, "p.qt: count() exceeded\n");
            exit(4);
        }

        $text = $element->nodeValue;

        $quotes[$i]['quote'] = $text;

        $i++;

    }

    if ($i !== count($quotes)) {
        fprintf(STDERR, "count mismatch\n");
        exit(5);
    }

    foreach ($quotes as $quote) {

        $insert->execute([
            $quote['id'],
            $quote['score'],
            $quote['quote'],
        ]);

    }

    if (strpos($file, '"qa">End<') === false) {
        printf("Done.\n");
        exit;
    }

    sleep(15);

}

function get_page(): int
{
    global $db;
    return $db->query("SELECT value FROM config WHERE key = 'page'")->fetchColumn() ?: 1;
}

function set_page(int $page): void
{
    global $db;
    $db->exec("INSERT OR REPLACE INTO config VALUES ('page', $page);");
}