From: maxious Date: Mon, 09 May 2011 05:47:28 +0000 Subject: Initial import/get/scrape planning X-Git-Url: https://maxious.lambdacomplex.org/git/?p=disclosr.git&a=commitdiff&h=c6f98f179ebce5738779914fc759ae1b4e868a49 --- Initial import/get/scrape planning --- --- /dev/null +++ b/.gitmodules @@ -1,1 +1,4 @@ +[submodule "sag"] + path = sag + url = git://github.com/sbisbee/sag.git --- /dev/null +++ b/agencies.csv @@ -1,1 +1,176 @@ - +agencyName,status,website,abn,contractListURL,grantsReportingURL,annualReportURL,consultanciesURL,legalExpenditureURL,recordsListURL,FOIDocumentsURL,infoPublicationSchemeURL +Australian Institute of Family Studies,suspended,http://www.aifs.gov.au,64001053079,http://www.aifs.gov.au/institute/aifs/accountability.html,"",http://www.aifs.gov.au/institute/pubs/annualreports/annualreports.html,"","","","","" +Comsuper,suspended,http://www.comsuper.gov.au/,77310752950,http://www.comsuper.gov.au/reports/murray.html,"",http://www.comsuper.gov.au/reports/annual_reports.html,"","","","","" +Australian National Audit Office (ANAO),active,http://www.anao.gov.au,33020645631,http://www.anao.gov.au/director/tendersandcontracting/contracts.cfm,"",http://www.anao.gov.au/director/aboutus/currentannualreport.cfm,"","","","","" +CRS Australia,active,http://www.crsaustralia.gov.au,65281415387,http://www.crsaustralia.gov.au/reporting.htm#contract_listings,"",http://www.crsaustralia.gov.au/plans_and_annual_reports.htm,"","","","","" +Australian Competition and Consumer Commission,active,http://www.accc.gov.au,94410483623,http://www.accc.gov.au/content/index.phtml/itemId/860838,"",http://www.accc.gov.au/content/index.phtml/itemId/668577,http://www.accc.gov.au/content/index.phtml/itemId/402496,http://www.accc.gov.au/content/index.phtml/itemId/767987,"","","" +Workplace Authority,suspended,http://www.workplaceauthority.gov.au,37243205024,http://www.workplaceauthority.gov.au/AboutUs/CorporateInfo/Pages/ContractsAndTenders.aspx,"",http://www.workplaceauthority.gov.au/AboutUs/CorporateInfo/Pages/AnnualReport.aspx,"","","","","" +Attorney-General's Department,active,http://www.ag.gov.au/,92661124436,http://www.ag.gov.au/www/agd/agd.nsf/Page/Tendersandcontracts_SenateOrderContracts,http://www.ag.gov.au/www/agd/agd.nsf/Page/About_the_DepartmentGrants,http://www.ag.gov.au/www/agd/agd.nsf/Page/PublicationsAnnual_Reports,"","",http://www.ag.gov.au/www/agd/agd.nsf/Page/AbouttheDepartment_SenateContinuingOrderFileLists,http://www.ag.gov.au/www/agd/agd.nsf/Page/Freedom_of_Information,"" +Office of the Official Secretary to the Governor-General,active,http://www.gg.gov.au,67582329284,http://www.gg.gov.au/administration/category.php?id=7,"",http://www.gg.gov.au/administration/category.php?id=5,"","","","","" +Federal Court of Australia,active,http://www.fedcourt.gov.au/,49110847399,http://www.fedcourt.gov.au/aboutct/contracts.html,"",http://www.fedcourt.gov.au/aboutct/ar.html,"","","","","" +IP Australia,suspended,http://www.ipaustralia.gov.au,38113072755,http://www.ipaustralia.gov.au/about/files_contracts.shtml,"",http://www.innovation.gov.au/Section/AbouttheDepartment/Pages/AnnualReport200809.aspx,http://www.ipaustralia.gov.au/about/files_contracts.shtml,http://www.ipaustralia.gov.au/about/files_contracts.shtml,http://www.ipaustralia.gov.au/about/files_ipa.shtml,"","" +National Native Title Tribunal,active,http://www.nntt.gov.au,70238042351,http://www.nntt.gov.au/About-The-Tribunal/Pages/Commonwealth-Reporting-Requirements.aspx,"",http://www.nntt.gov.au/About-The-Tribunal/Pages/Commonwealth-Reporting-Requirements.aspx,"",http://www.nntt.gov.au/About-The-Tribunal/Pages/Commonwealth-Reporting-Requirements.aspx,http://www.nntt.gov.au/About-The-Tribunal/Pages/Commonwealth-Reporting-Requirements.aspx,"","" +"Department of Education, Employment and Workplace Relations",active,http://www.deewr.gov.au/Pages/default.aspx,63578775294,http://www.deewr.gov.au/Department/Publications/Pages/CorporatePublications.aspx,"",http://www.deewr.gov.au/Department/Publications/Pages/CorporatePublications.aspx,"","","","","" +Australian Prudential Regulation Authority (APRA),active,http://www.apra.gov.au,79635582658,http://www.apra.gov.au/aboutAPRA/,"",http://www.apra.gov.au/aboutAPRA/,http://www.apra.gov.au/aboutAPRA/,http://www.apra.gov.au/aboutAPRA/,http://www.apra.gov.au/aboutAPRA/,"","" +Australian Crime Commission,active,http://www.crimecommission.gov.au/index.htm,11259448410,http://www.crimecommission.gov.au/about/business_accountability/index.htm,"",http://www.crimecommission.gov.au/publications/annual_report/index.htm,"",http://www.crimecommission.gov.au/about/business_accountability/index.htm,http://www.crimecommission.gov.au/about/business_accountability/file_list.htm,"","" +Australian Radiation Protection and Nuclear Safety Agency (ARPANSA),active,http://www.arpansa.gov.au,61321195155,http://www.arpansa.gov.au/AboutUs/Corporate/contractlist.cfm,"",http://www.arpansa.gov.au/AboutUs/corporate/annualreports.cfm,"",http://www.arpansa.gov.au/AboutUs/legal.cfm,http://www.arpansa.gov.au/AboutUs/FileList.cfm,"","" +"Department of Innovation, Industry, Science and Research",active,http://www.innovation.gov.au/Pages/default.aspx,74599608295,http://www.innovation.gov.au/Section/AbouttheDepartment/Pages/SenateOrderContractsListingforthe2008-09FinancialYear.aspx,"",http://www.innovation.gov.au/General/Corp-MC/Pages/Publications.aspx#annual,"",http://www.innovation.gov.au/Section/AbouttheDepartment/Pages/DepartmentalExpenditureonLegalServices.aspx,http://www.innovation.gov.au/Section/AbouttheDepartment/Pages/SenateContinuingOrderNo8IndexedListofDepartmentalandAgencyFiles.aspx,"","" +Wheat Exports Australia,active,http://www.wea.gov.au,40485918341,http://www.wea.gov.au/AboutUs/WEA%20Contracts.htm,"",http://www.wea.gov.au/Publications/AnnualReports/AnnualReports.htm,"","",http://www.wea.gov.au/Publications/File%20Index%20to%20June%2009.pdf,"","" +Office of National Assessments,active,http://www.ona.gov.au/,87904367991,http://www.ona.gov.au/corporate.htm,"","","",http://www.ona.gov.au/corporate.htm,"","","" +Old Parliament House,active,http://moadoph.gov.au/,30620774963,http://moadoph.gov.au/about/corporate-documents/,"",http://moadoph.gov.au/about/corporate-documents/,"","",http://moadoph.gov.au/about/corporate-documents/,"","" +Murray-Darling Basin Authority,active,http://www.mdba.gov.au/,13679821382,http://www.mdba.gov.au/about/corporate_documents/contact_listings,"","","","","","","" +Administrative Appeals Tribunal,active,http://www.aat.gov.au,90680970626,http://www.aat.gov.au/TendersAndContracts/Contracts.htm,"",http://www.aat.gov.au/CorporatePublications/AnnualReport.htm,"",http://www.aat.gov.au/CorporatePublications/LegalServices.htm,http://www.aat.gov.au/CorporatePublications/IndexedList.htm,"","" +Aged Care Standards and Accreditation Agency Ltd,active,http://www.accreditation.org.au/,64079618652,"","",http://www.accreditation.org.au/about-us/annual-reports/,"","","","","" +Airservices Australia,suspended,http://www.ano.gov.au,59698720886,"","","","","","","","" +AusAid,active,http://www.ausaid.gov.au,62921558838,http://www.ausaid.gov.au/business/contracts_list.cfm,"",http://www.ausaid.gov.au/publications/pubout.cfm?ID=8691_5877_871_8496_1205&CFID=3328295&CFTOKEN=90652321,"","","",http://www.ausaid.gov.au/about/foi.cfm,"" +Austrade,active,http://www.austrade.gov.au,11764698227,http://www.austrade.gov.au/Contracts/default.aspx,"",http://www.austrade.gov.au/default.aspx?FolderID=1401,"","","","","" +Australia Council for the Arts,suspended,http://www.australiacouncil.gov.au,38392626187,"","","","","","","","" +Australian Antarctic Division,suspended,http://www.environment.gov.au,56428630676,"","","","","","","","" +Australian Broadcasting Corporation,suspended,http://abc.gov.au,52429278345,"","","","","","","","" +Australian Bureau of Statistics,active,http://www.abs.gov.au,26331428522,http://www.abs.gov.au/websitedbs/D3310114.nsf/51c9a3d36edfd0dfca256acb00118404/86158083b1e61cc5ca256ac5007eac24!OpenDocument,"",http://www.abs.gov.au/AUSSTATS/abs@.nsf/second+level+view?ReadForm&prodno=1001.0&viewtitle=Australian%20Bureau%20of%20Statistics%20--%20Annual%20Report~2009-10~Latest~27/10/2010&&tabname=Past%20Future%20Issues&prodno=1001.0&issue=2009-10&num=&view=&,"",http://www.abs.gov.au/websitedbs/D3310114.nsf/4a256353001af3ed4b2562bb00121564/f8961758d807999dca257214001e1281!OpenDocument,"",http://www.abs.gov.au/websitedbs/D3310114.nsf/4a256353001af3ed4b2562bb00121564/95e66d20b24927d7ca25747c002dc9ee!OpenDocument,"" +Australian Centre for International Agricultural Research,active,http://www.aciar.gov.au,34864955427,"","","","","","","","" +Australian Commission for Law Enforcement Integrity,active,http://www.aclei.gov.au,78796734093,http://www.aclei.gov.au/www/aclei/aclei.nsf/Page/Accountability_and_Reporting,"","http://www.aclei.gov.au/www/aclei/aclei.nsf/Page/AccountabilityandReporting_Reports,SubmissionsandSpeeches","",http://www.aclei.gov.au/www/aclei/aclei.nsf/Page/Accountability_and_Reporting,http://www.aclei.gov.au/www/aclei/aclei.nsf/AllDocs/39D8AB36FD3CC755CA25734B007CC961?OpenDocument,http://www.aclei.gov.au/www/aclei/aclei.nsf/Page/FreedomofInformation_FreedomofInformation,"" +Australian Communications and Media Authority (ACMA),suspended,http://www.acma.gov.au,55386169386,"","","","","","","","" +Australian Customs and Border Protection Service,active,http://www.customs.gov.au,66015286036,http://www.customs.gov.au/site/page4247.asp,"",http://www.customs.gov.au/site/page4283.asp,"","",http://www.customs.gov.au/site/page4530.asp,"","" +Australian Electoral Commission,suspended,http://www.aec.gov.au,21133285851,"","","","","","","","" +Australian Fair Pay Commission,suspended,http://www.fairpay.gov.au,14460763247,"","","","","","","","" +Australian Federal Police,active,http://www.afp.gov.au,17864931143,http://www.afp.gov.au/about-the-afp/accountability-requirements/senate-order-contracts.aspx,http://www.afp.gov.au/about-the-afp/accountability-requirements/grants.aspx,http://www.afp.gov.au/media-centre/publications/annual-reports/afp.aspx,http://www.afp.gov.au/about-the-afp/accountability-requirements/consultancy-services.aspx,http://www.afp.gov.au/about-the-afp/accountability-requirements/afp-report-on-legal-services-expenditure.aspx,http://www.afp.gov.au/about-the-afp/accountability-requirements/harradine-report.aspx,"","" +Australian Fisheries Management Authority,active,http://www.afma.gov.au,81098497517,http://www.afma.gov.au/resource-centre/publications-and-forms/corporate/external-reporting/,"",http://www.afma.gov.au/resource-centre/publications-and-forms/corporate/annual-report/,"",http://www.afma.gov.au/resource-centre/publications-and-forms/corporate/external-reporting/,http://www.afma.gov.au/resource-centre/publications-and-forms/corporate/external-reporting/,http://www.afma.gov.au/resource-centre/publications-and-forms/corporate/freedom-of-information/,"" +Australian Hearing Services,active,http://www.hearing.com.au/,83605426759,"","",http://www.hearing.com.au/annual-reports,"","","",http://www.centrelink.gov.au/internet/internet.nsf/legal/foi_table.htm,"" +Australian Human Rights Commission,active,http://www.hreoc.gov.au,47996232602,"","","","","","","","" +Australian Industrial Registry,suspended,http://www.business.gov.au,83674573738,"","","","","","","","" +Australian Institute of Criminology,active,http://www.aic.gov.au,63257175248,"","","","","","","","" +Australian Institute of Health and Welfare,active,http://www.aihw.gov.au,16515245497,"","","","","","","","" +Australian Institute of Marine Science,suspended,http://www.aims.gov.au,78961616230,"","","","","","","","" +Australian Law Reform Commission,active,http://www.alrc.gov.au,88913413914,"","","","","","","","" +Australian Maritime Safety Authority,suspended,http://www.amsa.gov.au,65377938320,"","","","","","","","" +Australian National Maritime Museum,suspended,http://www.anmm.gov.au,35023590988,"","","","","","","","" +Australian Nuclear Science and Technology Organisation (ANSTO),suspended,http://www.ansto.gov.au,47956969590,"","","","","","","","" +Australian Office of Financial Management,active,http://www.aofm.gov.au,13059525039,"","","","","","","","" +Australian Organ and Tissue Donation and Transplantation Authority,active,http://www.donatelife.gov.au,56253405315,"","","","","","","","" +Australian Pesticides and Veterinary Medicines Authority,active,http://www.apvma.gov.au,19495043447,"","","","","","","","" +Australian Public Service Commission,active,http://www.apsc.gov.au,99470863260,"","","","","","","","" +Australian Research Council,suspended,http://www.arc.gov.au,35201451156,"","","","","","","","" +Australian Reward Investment Alliance (ARIA),suspended,http://aria.gov.au,48882817243,"","","","","","","","" +Australian Securities and Investments Commission,active,http://www.asic.gov.au,86768265615,"","","","","","","","" +Australian Security Intelligence Organisation,active,http://blu.asio.gov.au,37467566201,"","","","","","","","" +Australian Sports Anti-Doping Authority (ASADA),suspended,http://checksubstances.asada.gov.au,91592527503,"","","","","","","","" +Australian Taxation Office,active,http://www.ato.gov.au,51824753556,"","","","","","","","" +Australian Transaction Reports and Analysis Centre (AUSTRAC),active,http://www.austrac.gov.au,50008559486,"","","","","","","","" +Australian Transport Safety Bureau,active,http://www.atsb.gov.au,65061156887,"","","","","","","","" +Australian War Memorial,active,http://www.awm.gov.au,64909221257,"","","","","","","","" +Bureau of Meteorology,suspended,http://www.bom.gov.au,92637533532,"","","","","","","","" +Cancer Australia,active,http://www.canceraustralia.gov.au,21075951918,"","","","","","","","" +Centrelink,active,http://www.centrelink.gov.au,29468422437,"","","","","","","","" +Civil Aviation Safety Authority,suspended,http://www.casa.gov.au,44808014470,"","","","","","","","" +Comcare,suspended,http://www.comcare.gov.au,41640788304,"","","","","","","","" +Commonwealth Grants Commission,suspended,http://www.cgc.gov.au,86267354017,"","","","","","","","" +Corporations and Markets Advisory Committee,active,http://www.camac.gov.au,85005260622,"","","","","","","","" +Crimtrac,active,http://www.crimtrac.gov.au,17193904699,"","","","","","","","" +CSIRO,suspended,http://www.csiro.au/,41687119230,http://www.csiro.au/org/pskc.html,"","","","","","","" +Defence Housing Australia,active,http://www.dha.gov.au,72968504934,"","","","","","","","" +Defence Materiel Organisation,active,http://www.defence.gov.au,68706814312,"","","","","","","","" +"Department of Agriculture, Fisheries and Forestry",active,http://www.daff.gov.au/,24113085695,"","","","","","","","" +"Department of Broadband, Communications and the Digital Economy",active,http://www.dbcde.gov.au,51491646726,"","","","","","","","" +Department of Climate Change and Energy Efficiency,active,http://www.climatechange.gov.au,48072377158,"","","","","","","","" +Department of Defence,active,http://www.defence.gov.au,68706814312,"","","","","","","","" +"Department of Families, Housing, Community Services and Indigenous Affairs",active,http://www.fahcsia.gov.au/,36342015855,"","","","","","","","" +Department of Finance and Deregulation,active,http://www.finance.gov.au,61970632495,"","","","","","","","" +Department of Foreign Affairs and Trade,active,http://www.dfat.gov.au,47065634525,"","","","","","","","" +Department of Health and Ageing,active,http://www.health.gov.au,83605426759,"","","","","","","","" +Department of Human Services,active,http://www.centrelink.gov.au,29468422437,"","","","","","","","" +Department of Immigration & Citizenship,active,http://www.eta.immi.gov.au,33380054835,"","","","","","","","" +Department of Infrastructure and Transport,active,http://www.infrastructure.gov.au,86267354017,"","","","","","","","" +Department of Parliamentary Services,active,http://www.aph.gov.au,52997141147,"","","","","","","","" +"Department of Regional Australia, Regional Development and Local Government",active,http://www.regional.gov.au/,37862725624,"",http://www.regional.gov.au/department/grants/index.aspx,http://www.regional.gov.au/department/annual_report/index.aspx,"","",http://www.regional.gov.au/department/file_listing/index.aspx,http://www.regional.gov.au/department/foi/index.aspx,"" +"Department of Resources, Energy and Tourism",active,http://www.ret.gov.au,71278859567,"","","","","","","","" +"Department of Sustainability, Environment, Water, Population and Communities",active,http://www.environment.gov.au,34190894983,"","","","","","","","" +Department of the House of Representatives,active,http://www.aph.gov.au,18526287740,"","","","","","","","" +Department of the Prime Minister and Cabinet,active,http://www.dpmc.gov.au,18108001191,"","","","","","","","" +Department of the Senate,active,http://www.aph.gov.au,23991641527,"","","","","","","","" +Department of the Treasury,active,http://www.treasury.gov.au,92802414793,"","","","","","","","" +Director of National Parks,suspended,http://www.environment.gov.au,13051694963,"","","","","","","","" +Equal Opportunity for Women in the Workplace Agency,suspended,http://www.eowa.gov.au,47641643874,"","","","","","","","" +Export Finance and Insurance Corporation (EFIC),active,http://www.efic.gov.au,96874024697,"","","","","","","","" +Fair Work Australia,active,http://www.fairwork.gov.au,93614579199,"","","","","","","","" +Family Court of Australia,active,http://www.ag.gov.au,63684208971,"","","","","","","","" +Federal Magistrates Court,active,http://www.fmc.gov.au,60265617271,"","","","","","","","" +Food Standards Australia New Zealand,active,http://www.foodstandards.gov.au,20537066246,"","","","","","","","" +Future Fund Management Agency,suspended,http://www.futurefund.gov.au/,53156699293,"","","","","","","","" +Geoscience Australia,suspended,http://www.ga.gov.au,80091799039,"","","","","","","","" +Grains Research and Development Corporation,active,http://www.grdc.com.au/,55611223291,"","","","","","","","" +Great Barrier Reef Marine Park Authority,suspended,http://www.gbrmpa.gov.au,12949356885,"","","","","","","","" +Insolvency and Trustee Service Australia (ITSA),active,http://www.itsa.gov.au,63384330717,"","","","","","","","" +Inspector-General of Taxation,active,http://www.igt.gov.au,51248702319,"","","","","","","","" +Medicare Australia,active,http://www.medicareaustralia.gov.au,75174030967,"","","","","","","","" +Migration Review Tribunal and Refugee Review Tribunal (MRT-RRT),active,http://www.mrt-rrt.gov.au,50760799564,"","","","","","","","" +National Archives of Australia,suspended,http://www.naa.gov.au,36889228992,"","","","","","","","" +National Blood Authority,active,http://www.nba.gov.au,87361602478,"","","","","","","","" +National Capital Authority,suspended,http://www.nationalcapital.gov.au,75149374427,"","","","","","","","" +National Competition Council,active,http://www.ncc.gov.au,56552760098,"","","","","","","","" +National Film and Sound Archive,active,http://www.nfsa.gov.au,41251017588,"","","","","","","","" +National Gallery of Australia,suspended,http://nga.gov.au,27855975449,"","","","","","","","" +National Health and Medical Research Council,active,http://www.nhmrc.gov.au,88601010284,"","","","","","","","" +National Library of Australia,suspended,http://catalogue.nla.gov.au,28346858075,"","","","","","","","" +National Museum of Australia,suspended,http://www.nma.gov.au,70592297967,"","","","","","","","" +National Offshore Petroleum Safety Authority,suspended,http://www.nopsa.gov.au,22385178289,"","","","","","","","" +National Water Commission,suspended,http://www.nwc.gov.au,94364176431,"","","","","","","","" +Office of Parliamentary Counsel,active,http://www.opc.gov.au/,41425630817,http://www.opc.gov.au/about/list_contracts.htm,"",http://www.opc.gov.au/about/documents.htm,http://www.opc.gov.au/about/list_consultancy_contracts.html,http://www.opc.gov.au/about/legal_services.htm,http://www.opc.gov.au/about/list_docs.htm,"","" +Office of the Auditing and Assurance Standards Board,active,http://www.auasb.gov.au,41127556389,"","","","","","","","" +Office of the Australian Accounting Standards Board,active,http://www.aasb.com.au/Home.aspx,92702019575,"","",http://www.aasb.com.au/About-the-AASB/AASB-annual-reports.aspx,"","","","","" +Office of the Australian Building and Construction Commissioner (ABCC),suspended,http://www.abcc.gov.au,68003725098,"","","","","","","","" +Office of the Australian Information Commissioner,active,http://www.oic.qld.gov.au,70810284665,"","","","","","","","" +Office of the Commonwealth Ombudsman,active,http://www.ombudsman.gov.au,53003678148,"","","","","","","","" +Office of the Director of Public Prosecutions,active,http://www.cdpp.gov.au,41036606436,"","","","","","","","" +Office of the Fair Work Ombudsman,active,http://www.fairwork.gov.au,43884188232,"","","","","","","","" +Office of the Inspector-General of Intelligence and Security,active,http://www.igis.gov.au/,67332668643,"","",http://www.igis.gov.au/annual_report/index.cfm,"","","","","" +Office of the Renewable Energy Regulator,suspended,http://www.environment.gov.au,68574011917,"","","","","","","","" +Private Health Insurance Ombudsman,active,http://www.pc.gov.au,61673137709,"","","","","","","","" +Productivity Commission,active,http://www.pc.gov.au,30004116223,"","","","","","","","" +Professional Services Review,active,http://www.psr.gov.au,45307308260,"","","","","","","","" +Reserve Bank of Australia,active,http://www.austrac.gov.au,50008559486,"","","","","","","","" +Royal Australian Mint,active,http://mintissue.ramint.gov.au,45852104259,"","","","","","","","" +Safe Work Australia,active,http://safeworkaustralia.gov.au,76116948344,"","","","","","","","" +Seacare,suspended,http://www.seacare.gov.au/,32745854352,"","",http://www.seacare.gov.au/publications/corporate_publications/annual_reports2,"","",http://www.seacare.gov.au/publications/corporate_publications/harradine_reports2,"","" +Social Security Appeals Tribunal,suspended,http://www.aat.gov.au,90680970626,"","","","","","","","" +Sydney Harbour Federation Trust,suspended,http://www.harbourtrust.gov.au/,14178614905,"","","","","","","","" +Therapeutic Goods Administration,active,http://www.tga.gov.au,40939406804,"","","","","","","","" +Tourism Australia,suspended,http://www.ret.gov.au,99657548712,"","","","","","","","" +Aboriginal and Torres Strait Islander Services,suspended,http://www.atsis.gov.au,13013281264,"","","","","","","","" +Australia-Japan Foundation,suspended,http://www.dfat.gov.au,47065634525,"","","","","","","","" +Australian Film Commission,suspended,http://afcarchive.screenaustralia.gov.au/,57859074040,"","",http://afcarchive.screenaustralia.gov.au/profile/annrpt/profilepage_12.aspx,"","",http://afcarchive.screenaustralia.gov.au/profile/afc_files/profilepage_21.aspx,"","" +Australian Film Television and Radio School,suspended,http://www.planning.nsw.gov.au,29003234571,"","","","","","","","" +Australian Institute of Aboriginal and Torres Strait Islander Studies (AIATSIS),suspended,http://www.aiatsis.gov.au,62020533641,"","","","","","","","" +Biosecurity Australia,suspended,http://www.aph.gov.au,27009590729,"","","","","","","","" +Child Support Agency,suspended,http://www.centrelink.gov.au,29468422437,"","","","","","","","" +Dairy Adjustment Authority,suspended,http://www.daff.gov.au/agriculture-food/meat-wool-dairy/dairy/dairy_adjustment_authority,28714145393,"","","","","","","","" +Department of Climate Change,suspended,http://www.climatechange.gov.au,83814086707,"","","","","","","","" +"Department of Communications, Information Technology and the Arts",suspended,http://www.dbcde.gov.au,51491646726,"","","","","","","","" +"Department of Education, Science and Training",suspended,http://www.dest.gov.au,51452193160,"","","","","","","","" +Department of Employment and Workplace Relations,suspended,http://www.dest.gov.au,40376417416,"","","","","","","","" +"Department of Families, Community Services & Indigenous Affairs",suspended,http://www.centrelink.gov.au,29468422437,"","","","","","","","" +Department of Finance and Administration,suspended,http://www.finance.gov.au,61970632495,"","","","","","","","" +Department of Immigration and Multicultural Affairs,suspended,http://www.immi.gov.au,33380054835,"","","","","","","","" +"Department of Industry, Tourism and Resources",suspended,http://www.business.gov.au,51835430479,"","","","","","","","" +"Department of Infrastructure, Transport, Regional Development and Local Government",suspended,http://www.business.gov.au,86267354017,"","","","","","","","" +Department of the Environment and Water Resources,suspended,http://www.environment.gov.au,34190894983,"","","","","","","","" +"Department of the Environment, Water, Heritage and the Arts",suspended,http://www.environment.gov.au,34190894983,"","","","","","","","" +Department of Transport and Regional Services,suspended,http://www.infrastructure.gov.au,86267354017,"","","","","","","","" +Emergency Management Australia,suspended,http://www.health.gov.au,83605426759,"","","","","","","","" +Export Wheat Commission,suspended,http://australia.gov.au,40485918341,"","","","","","","","" +Land and Water Australia,suspended,http://lwa.gov.au/,25602743013,"","","","","","","","" +Office of Film and Literature Classification,suspended,http://www.acma.gov.au,16001170433,"","","","","","","","" +Office of the Privacy Commissioner,suspended,http://www.privacy.gov.au,13152473225,"","","","","","","","" +Office of Workplace Services,suspended,http://deewr.gov.au,71141751477,"","","","","","","","" +Questacon,suspended,http://www.tenders.gov.au,74599608295,"","","","","","","","" +Refugee Review Tribunal,suspended,http://www.mrt-rrt.gov.au,50760799564,"","","","","","","","" +Screen Australia,suspended,http://www.screenaustralia.gov.au,46741353180,"","","","","","","","" +The National Institute of Clinical Studies Ltd,suspended,http://www.nhmrc.gov.au/nics/index.htm,60095459804,"","","","","","","","" +Workplace Ombudsman,suspended,http://www.fairwork.gov.au,43884188232,"","","","","","","","" +Department of Immigration and Citizenship,,http://www.eta.immi.gov.au,33380054835,"","","","","","","","" +Department of Education Employment and Workplace Relations,,http://www.dest.gov.au,0,"","","","","","","","" +Department of Veterans' Affairs,active,"",0,"","","","","","","","" +Department of Resources Energy and Tourism,,http://www.ret.gov.au,96127997719,"","","","","","","","" +Department of Broadband Communications and the Digital Economy,,http://www.dbcde.gov.au,51491646726,"","","","","","","","" +Department of Innovation Industry Science and Research,,http://australia.gov.au,0,"","","","","","","","" +Department of Agriculture Fisheries and Forestry,,http://www.centrelink.gov.au,29468422437,"","","","","","","","" --- /dev/null +++ b/getAgency.php @@ -1,1 +1,57 @@ +head('_design/app')->headers->_HTTP->status != "404") return true; + } + catch(Exception $e) { + //database issue + return false; + } + $ddoc = new StdClass(); + $ddoc->_id = '_design/app'; + $ddoc->views = new StdClass(); + // by abn, by name + $ddoc->views->byABN = new StdClass(); + $ddoc->views->byABN->map = "function(doc) { emit(doc.ABN, doc); };"; + $ddoc->views->byName = new StdClass(); + $ddoc->views->byName->map = "function(doc) { emit(doc.name, doc); };"; + $ddoc->views->getActive = new StdClass(); + $ddoc->views->getActive->map = 'function(doc) { if (doc.status == "active") { emit(doc._id, doc); } };'; + $ddoc->views->getSuspended = new StdClass(); + $ddoc->views->getSuspended->map = 'function(doc) { if (doc.status == "suspended") { emit(doc._id, doc); } };'; + $ddoc->views->getScrapeRequired = new StdClass(); + $ddoc->views->getScrapeRequired->map = "function(doc) { emit(doc.ABN, doc); };"; + try { + $sag->put('_design/app', $ddoc); + } + catch(Exception $e) { + /* + * A 409 status code means there was a conflict, so another client + * already created the design doc for us. This is fine. + */ + if ($e->getCode() != 409) return false; + } + return true; +} +require_once 'sag/src/Sag.php'; +$sag = new Sag(); +$sag->setDatabase("disclosr-agencies", true); +//get an agency record as json/html, search by name/abn/id +// by name = startkey="Ham"&endkey="Ham\ufff0" +// edit? +createAgencyDesignDoc(); +$rows = $sag->get('/_design/app/_view/byABN?include_docs=true')->body->rows; // &endkey='.$searchVar +foreach ($rows as $row) { + echo ""; + echo '"; + echo ""; + foreach ($row->doc as $key => $value) { + echo ""; + } // also show documents/URLs available + echo "

' . $row->doc->agencyName. "

Field NameField Value
$key$value
"; +} --- /dev/null +++ b/import.php @@ -1,1 +1,31 @@ +setDatabase("disclosr-agencies", true); +$file = "agencies.csv"; +$handle = fopen($file, "r"); +$fieldNames = Array(); +$line = 0; +while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { + if ($line == 0) { + $fieldNames = $data; + } + else { + $agency = Array(); + $valueCount = 0; + foreach ($data as $value) { + if ($value != "") $agency[$fieldNames[$valueCount]] = $value; + $valueCount++; + } + $agency["lastScraped"] = "1/1/1970"; + $agency["scrapeDepth"] = 1; + var_dump($agency); + $sag->post($agency); + } + $line++; + if ($line % 10000 == 0) echo "$line records... \n"; +} +fclose($handle); +echo "Found a total of $line records in $file.\n"; +?> --- /dev/null +++ b/lastUpdated.php @@ -1,1 +1,2 @@ +for each agency, record when last changed (number of days too) and show a couple of URLs that were in that change --- /dev/null +++ b/rss.xml.php @@ -1,1 +1,2 @@ - +// Agency X updated Y, new files, diff of plain text/link text, +// feed for just one agency or all --- /dev/null +++ b/sag --- /dev/null +++ b/scrape.py @@ -1,1 +1,56 @@ +#http://packages.python.org/CouchDB/client.html +import couchdb +import urllib2 +couch = couchdb.Server() # Assuming localhost:5984 +# If your CouchDB server is running elsewhere, set it up like this: +# couch = couchdb.Server('http://example.com:5984/') + +# select database +agencydb = couch['disclosr-agencies'] + +for row in agencydb.view('app/getScrapeRequired'): #not recently scraped agencies view? + agency = agencydb.get(row.id) + print agency['agencyName'] + +#http://diveintopython.org/http_web_services/etags.html +class NotModifiedHandler(urllib2.BaseHandler): + def http_error_304(self, req, fp, code, message, headers): + addinfourl = urllib2.addinfourl(fp, headers, req.get_full_url()) + addinfourl.code = code + return addinfourl + +def scrapeAndStore(URL, depth, agency): + URL = "http://www.hole.fi/jajvirta/weblog/" + req = urllib2.Request(URL) + + #if there is a previous version sotred in couchdb, load caching helper tags + if etag: + req.add_header("If-None-Match", etag) + if last_modified: + req.add_header("If-Modified-Since", last_modified) + + opener = urllib2.build_opener(NotModifiedHandler()) + url_handle = opener.open(req) + headers = url_handle.info() # the addinfourls have the .info() too + etag = headers.getheader("ETag") + last_modified = headers.getheader("Last-Modified") + web_server = headers.getheader("Server") + file_size = headers.getheader("Content-Length") + mime_type = headers.getheader("Content-Type") + + if hasattr(url_handle, 'code') and url_handle.code == 304: + print "the web page has not been modified" + else: + print "error %s in downloading %s", url_handle.code, URL + #record/alert error + + #do scraping + #for each unique link + #if html mimetype + # go down X levels, + # diff with last stored attachment, store in document + #if not + # remember to save parentURL and title (link text that lead to document) + + #store as attachment epoch-filename --- /dev/null +++ b/validation.php --- /dev/null +++ b/webservers.php @@ -1,1 +1,1 @@ - +for each agency, find a scrapped document and read the webserver off it