more ausbudget work
[disclosr.git] / admin / importAusbudget.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
<?php
 
include_once("../include/common.inc.php");
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
 
setlocale(LC_CTYPE, 'C');
 
function getTextFromNode($Node, $Text = "") {
    if (!isset($Node->tagName) || $Node->tagName == null) {
        return $Text . $Node->textContent;
    }
    $Node = $Node->firstChild;
    if (isset($Node) && $Node != null) {
        $Text = getTextFromNode($Node, $Text);
    }
    while (isset($Node->nextSibling) && $Node->nextSibling != null) {
        $Text = getTextFromNode($Node->nextSibling, $Text);
        $Node = $Node->nextSibling;
    }
    return $Text;
}
 
$dir = "./ausbudget/";
$dhandle = opendir("./ausbudget/");
// define an array to hold the files
$files = array();
if ($dhandle) {
    // loop through all of the files
    while (false !== ($fname = readdir($dhandle))) {
        if (($fname != '.') && ($fname != '..')) {
            echo "$fname <br>";
            $html = phpQuery::newDocumentHTML(file_get_contents($dir . $fname));
            phpQuery::selectDocument($html);
            foreach (pq('table')->elements as $table) {
                echo "loltable";
                //echo $table->ownerDocument->saveXML($table);
                foreach (pq('tr')->elements as $row) {
                    echo "lolrow";
                    $rowText = getTextFromNode($row);
                    if (strpos($rowText,"Twitter") === false) {
                        echo "<b>".pq("td:first",$row)->text()."</b><br>";
                        echo pq("td",$row)->text() . "<br>";
                    }
                }
            }
        }
        break;
    }
}
?>