more ausbudget import
more ausbudget import


Former-commit-id: 1c41e9c73321cf18e58277483900ca60392d79c7

<?php <?php
   
include_once("../include/common.inc.php"); include_once("../include/common.inc.php");
require($basePath . 'lib/phpquery/phpQuery/phpQuery.php'); require($basePath . 'lib/phpquery/phpQuery/phpQuery.php');
   
setlocale(LC_CTYPE, 'C'); setlocale(LC_CTYPE, 'C');
   
function getTextFromNode($Node, $Text = "") {  
if (!isset($Node->tagName) || $Node->tagName == null) {  
return $Text . $Node->textContent;  
}  
$Node = $Node->firstChild;  
if (isset($Node) && $Node != null) {  
$Text = getTextFromNode($Node, $Text);  
}  
while (isset($Node->nextSibling) && $Node->nextSibling != null) {  
$Text = getTextFromNode($Node->nextSibling, $Text);  
$Node = $Node->nextSibling;  
}  
return $Text;  
}  
   
$dir = "./ausbudget/"; $dir = "./ausbudget/";
$dhandle = opendir("./ausbudget/"); $dhandle = opendir("./ausbudget/");
// define an array to hold the files $headers = Array("Table ID", "Portfolio", "Agency", "Program", "Scheme", "2011-2012", "2012-2013", "Difference", "Source");
$files = array();  
if ($dhandle) { $fp = fopen('php://output', 'w');
// loop through all of the files if ($fp) {
while (false !== ($fname = readdir($dhandle))) { header('Content-Type: text/csv; charset=utf-8');
if (($fname != '.') && ($fname != '..')) { header('Content-Disposition: attachment; filename="export.' . date("c") . '.csv"');
echo "$fname <br>"; header('Pragma: no-cache');
$html = phpQuery::newDocumentHTML(file_get_contents($dir . $fname)); header('Expires: 0');
phpQuery::selectDocument($html); fputcsv($fp, $headers);
foreach (pq('table')->elements as $table) { if ($dhandle) {
echo "loltable"; // loop through all of the files
//echo $table->ownerDocument->saveXML($table); while (false !== ($fname = readdir($dhandle))) {
foreach (pq('tr')->elements as $row) { if (($fname != '.') && ($fname != '..')) {
echo "lolrow"; //echo "$fname <br>";
$rowText = getTextFromNode($row); $html = phpQuery::newDocumentHTML(file_get_contents($dir . $fname));
if (strpos($rowText,"Twitter") === false) { phpQuery::selectDocument($html);
echo "<b>".pq("td:first",$row)->text()."</b><br>"; foreach (pq('table')->elements as $table) {
echo pq("td",$row)->text() . "<br>"; $data = Array();
  ////echo "loltable";
  //echo $table->ownerDocument->saveXML($table);
  foreach (pq('tr',$table)->elements as $row) {
  //echo "lolrow";
  $rowText = pq($row)->text();
  if (strpos($rowText, "Twitter") === false) {
   
  $key = trim(pq("td:first", $row)->text());
  //echo "<b>$key</b><br>";
  $value = trim(str_replace(pq("td:first", $row)->text(), "", pq("td", $row)->text()));
  if ($key == "2011-2012" || $key == "2012-2013") {
  $eValue = explode("-", $value);
  $value = trim(str_replace(Array("$", ",", "\n"), "", $eValue[0]));
  }
  //echo "$value <br>";
  $data[$key] = trim(str_replace(Array("\n"), "", $value));
  if ($key == "Source") {
   
  foreach ($headers as $fieldName) {
  if (isset($data[$fieldName])) {
   
  $csvrow[] = $data[$fieldName];
  } else {
  $csvrow[] = "";
  }
  }
   
  fputcsv($fp, array_values($csvrow));
  $data = Array();
  $csvrow = Array();
  }
  }
} }
} }
} }
} }
break;  
} }
  die;
} }
?> ?>