From: Maxious Date: Tue, 20 Nov 2012 04:08:55 +0000 Subject: more ausbudget import X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=d4310e86474c5818df61ae245c745e9ae0237e57 --- more ausbudget import Former-commit-id: 1c41e9c73321cf18e58277483900ca60392d79c7 --- --- a/admin/importAusbudget.php +++ b/admin/importAusbudget.php @@ -5,47 +5,64 @@ setlocale(LC_CTYPE, 'C'); -function getTextFromNode($Node, $Text = "") { - if (!isset($Node->tagName) || $Node->tagName == null) { - return $Text . $Node->textContent; - } - $Node = $Node->firstChild; - if (isset($Node) && $Node != null) { - $Text = getTextFromNode($Node, $Text); - } - while (isset($Node->nextSibling) && $Node->nextSibling != null) { - $Text = getTextFromNode($Node->nextSibling, $Text); - $Node = $Node->nextSibling; - } - return $Text; -} - $dir = "./ausbudget/"; $dhandle = opendir("./ausbudget/"); -// define an array to hold the files -$files = array(); -if ($dhandle) { - // loop through all of the files - while (false !== ($fname = readdir($dhandle))) { - if (($fname != '.') && ($fname != '..')) { - echo "$fname
"; - $html = phpQuery::newDocumentHTML(file_get_contents($dir . $fname)); - phpQuery::selectDocument($html); - foreach (pq('table')->elements as $table) { - echo "loltable"; - //echo $table->ownerDocument->saveXML($table); - foreach (pq('tr')->elements as $row) { - echo "lolrow"; - $rowText = getTextFromNode($row); - if (strpos($rowText,"Twitter") === false) { - echo "".pq("td:first",$row)->text()."
"; - echo pq("td",$row)->text() . "
"; +$headers = Array("Table ID", "Portfolio", "Agency", "Program", "Scheme", "2011-2012", "2012-2013", "Difference", "Source"); + +$fp = fopen('php://output', 'w'); +if ($fp) { + header('Content-Type: text/csv; charset=utf-8'); + header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"'); + header('Pragma: no-cache'); + header('Expires: 0'); + fputcsv($fp, $headers); + if ($dhandle) { + // loop through all of the files + while (false !== ($fname = readdir($dhandle))) { + if (($fname != '.') && ($fname != '..')) { + //echo "$fname
"; + $html = phpQuery::newDocumentHTML(file_get_contents($dir . $fname)); + phpQuery::selectDocument($html); + foreach (pq('table')->elements as $table) { + $data = Array(); + ////echo "loltable"; + //echo $table->ownerDocument->saveXML($table); + foreach (pq('tr',$table)->elements as $row) { + //echo "lolrow"; + $rowText = pq($row)->text(); + if (strpos($rowText, "Twitter") === false) { + + $key = trim(pq("td:first", $row)->text()); + //echo "$key
"; + $value = trim(str_replace(pq("td:first", $row)->text(), "", pq("td", $row)->text())); + if ($key == "2011-2012" || $key == "2012-2013") { + $eValue = explode("-", $value); + $value = trim(str_replace(Array("$", ",", "\n"), "", $eValue[0])); + } + //echo "$value
"; + $data[$key] = trim(str_replace(Array("\n"), "", $value)); + if ($key == "Source") { + + foreach ($headers as $fieldName) { + if (isset($data[$fieldName])) { + + $csvrow[] = $data[$fieldName]; + } else { + $csvrow[] = ""; + } + } + + fputcsv($fp, array_values($csvrow)); + $data = Array(); + $csvrow = Array(); + } + } } } } } - break; } + die; } ?>