#dependencies http://code.google.com/p/python-twitter/ |
|
|
|
# info |
# info |
# http://stackoverflow.com/questions/4206882/named-entity-recognition-with-preset-list-of-names-for-python-php/4207128#4207128 |
# http://stackoverflow.com/questions/4206882/named-entity-recognition-with-preset-list-of-names-for-python-php/4207128#4207128 |
# http://alias-i.com/lingpipe/demos/tutorial/ne/read-me.html approximate dist |
# http://alias-i.com/lingpipe/demos/tutorial/ne/read-me.html approximate dist |
# http://streamhacker.com/2008/12/29/how-to-train-a-nltk-chunker/ more training |
# http://streamhacker.com/2008/12/29/how-to-train-a-nltk-chunker/ more training |
# http://www.postgresql.org/docs/9.1/static/pgtrgm.html |
# http://www.postgresql.org/docs/9.1/static/pgtrgm.html |
|
|
# data sources |
# data sources |
# http://twitter.com/#!/ACTEmergencyInf instant site wide |
# http://twitter.com/#!/ACTEmergencyInf instant site wide |
# http://twitter.com/#!/ACTPol_Traffic |
# http://twitter.com/#!/ACTPol_Traffic |
# http://esa.act.gov.au/feeds/currentincidents.xml |
# http://esa.act.gov.au/feeds/currentincidents.xml |
|
|
# source: https://gist.github.com/322906/90dea659c04570757cccf0ce1e6d26c9d06f9283 |
# source: https://gist.github.com/322906/90dea659c04570757cccf0ce1e6d26c9d06f9283 |
import nltk |
import nltk |
import twitter |
import twitter |
|
import tweepy |
import psycopg2 |
import psycopg2 |
|
from iniparse import INIConfig |
|
|
def insert_service_alert_sitewide(heading, message, url): |
def insert_service_alert_sitewide(heading, message, url): |
|
print "NaN" |
|
|
def insert_service_alert_for_street(streets, heading, message, url): |
def insert_service_alert_for_street(streets, heading, message, url): |
conn_string = "host='localhost' dbname='energymapper' user='postgres' password='snmc'" |
conn_string = "host='localhost' dbname='energymapper' user='postgres' password='snmc'" |
# print the connection string we will use to connect |
# print the connection string we will use to connect |
print "Connecting to database\n ->%s" % (conn_string) |
print "Connecting to database\n ->%s" % (conn_string) |
try: |
try: |
# get a connection, if a connect cannot be made an exception will be raised here |
# get a connection, if a connect cannot be made an exception will be raised here |
conn = psycopg2.connect(conn_string) |
conn = psycopg2.connect(conn_string) |
|
|
# conn.cursor will return a cursor object, you can use this cursor to perform queries |
# conn.cursor will return a cursor object, you can use this cursor to perform queries |
cursor = conn.cursor() |
cursor = conn.cursor() |
|
|
# execute our Query |
# execute our Query |
cursor.execute("select max(value), extract(dow from max(time)) as dow, \ |
cursor.execute("") |
extract(year from max(time))::text || lpad(extract(month from max(time))::text,2,'0') \ |
|
|| lpad(extract(month from max(time))::text,2,'0') as yearmonthweek, to_char(max(time),'J') \ |
|
from environmentdata_values where \"dataSourceID\"='NSWAEMODemand' \ |
|
group by extract(dow from time), extract(year from time), extract(week from time) \ |
|
order by extract(year from time), extract(week from time), extract(dow from time)") |
|
|
|
# retrieve the records from the database |
# retrieve the records from the database |
records = cursor.fetchall() |
records = cursor.fetchall() |
|
|
for record in records: |
for record in records: |
ys.append(record[0]) |
ys.append(record[0]) |
# >>> cur.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (42, 'bar')) |
# >>> cur.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (42, 'bar')) |
#>>> cur.statusmessage |
#>>> cur.statusmessage |
#'INSERT 0 1' |
#'INSERT 0 1' |
except: |
except: |
# Get the most recent exception |
# Get the most recent exception |
exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() |
exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() |
# Exit the script and print an error telling what happened. |
# Exit the script and print an error telling what happened. |
sys.exit("Database connection failed!\n ->%s" % (exceptionValue)) |
sys.exit("Database connection failed!\n ->%s" % (exceptionValue)) |
|
|
def get_tweets(user): |
|
tapi = twitter.Api() |
|
return tapi.GetUserTimeline(user) |
|
|
|
def extract_entity_names(t): |
def extract_entity_names(t): |
entity_names = [] |
entity_names = [] |
|
|
if hasattr(t, 'node') and t.node: |
if hasattr(t, 'node') and t.node: |
if t.node == 'NE': |
if t.node == 'NE': |
entity_names.append(' '.join([child[0] for child in t])) |
entity_names.append(' '.join([child[0] for child in t])) |
else: |
else: |
for child in t: |
for child in t: |
entity_names.extend(extract_entity_names(child)) |
entity_names.extend(extract_entity_names(child)) |
|
|
return entity_names |
return entity_names |
|
|
def extract_names(sample): |
def extract_names(sample): |
sentences = nltk.sent_tokenize(sample) |
sentences = nltk.sent_tokenize(sample) |
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences] |
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences] |
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] |
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] |
chunked_sentences = nltk.batch_ne_chunk(tagged_sentences, binary=True) |
chunked_sentences = nltk.batch_ne_chunk(tagged_sentences, binary=True) |
# chunked/tagged may be enough to just find and match the nouns |
# chunked/tagged may be enough to just find and match the nouns |
|
|
entity_names = [] |
entity_names = [] |
for tree in chunked_sentences: |
for tree in chunked_sentences: |
# Print results per sentence |
# Print results per sentence |
# print extract_entity_names(tree) |
# print extract_entity_names(tree) |
|
|
entity_names.extend(extract_entity_names(tree)) |
entity_names.extend(extract_entity_names(tree)) |
|
|
# Print all entity names |
# Print all entity names |
#print entity_names |
#print entity_names |
|
|
# Print unique entity names |
# Print unique entity names |
print set(entity_names) |
print set(entity_names) |
|
cfg = INIConfig(open('/tmp/aws.ini')) |
|
|
|
auth = tweepy.OAuthHandler(cfg.api_keys.twitter_consumer_key, cfg.api_keys.twitter_consumer_secret) |
|
auth.set_access_token(cfg.api_keys.twitter_access_token, cfg.api_keys.twitter_access_token_secret) |
|
|
|
api = tweepy.API(auth) |
|
|
|
# If the authentication was successful, you should |
|
# see the name of the account print out |
|
print api.me().name |
|
# https://github.com/tweepy/tweepy/blob/master/tweepy/api.py |
|
print api.user_timeline(screen_name="ACTPol_Traffic") |
|
print api.update_status(status="test") |