gitphp 0.2.9.1 :: busui.git/blob

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88	#dependencies http://code.google.com/p/python-twitter/ # info # http://stackoverflow.com/questions/4206882/named-entity-recognition-with-preset-list-of-names-for-python-php/4207128#4207128 # http://alias-i.com/lingpipe/demos/tutorial/ne/read-me.html approximate dist # http://streamhacker.com/2008/12/29/how-to-train-a-nltk-chunker/ more training # http://www.postgresql.org/docs/9.1/static/pgtrgm.html # data sources # http://twitter.com/#!/ACTEmergencyInf instant site wide # http://twitter.com/#!/ACTPol_Traffic # http://esa.act.gov.au/feeds/currentincidents.xml # source: https://gist.github.com/322906/90dea659c04570757cccf0ce1e6d26c9d06f9283 import nltk import twitter import psycopg2 def insert_service_alert_sitewide(heading, message, url): def insert_service_alert_for_street(streets, heading, message, url): conn_string = "host='localhost' dbname='energymapper' user='postgres' password='snmc'" # print the connection string we will use to connect print "Connecting to database\n ->%s" % (conn_string) try: # get a connection, if a connect cannot be made an exception will be raised here conn = psycopg2.connect(conn_string) # conn.cursor will return a cursor object, you can use this cursor to perform queries cursor = conn.cursor() # execute our Query cursor.execute("select max(value), extract(dow from max(time)) as dow, \ extract(year from max(time))::text \|\| lpad(extract(month from max(time))::text,2,'0') \ \|\| lpad(extract(month from max(time))::text,2,'0') as yearmonthweek, to_char(max(time),'J') \ from environmentdata_values where \"dataSourceID\"='NSWAEMODemand' \ group by extract(dow from time), extract(year from time), extract(week from time) \ order by extract(year from time), extract(week from time), extract(dow from time)") # retrieve the records from the database records = cursor.fetchall() for record in records: ys.append(record[0]) # >>> cur.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (42, 'bar')) #>>> cur.statusmessage #'INSERT 0 1' except: # Get the most recent exception exceptionType, exceptionValue, exceptionTraceback = sys.exc_info() # Exit the script and print an error telling what happened. sys.exit("Database connection failed!\n ->%s" % (exceptionValue)) def get_tweets(user): tapi = twitter.Api() return tapi.GetUserTimeline(user) def extract_entity_names(t): entity_names = [] if hasattr(t, 'node') and t.node: if t.node == 'NE': entity_names.append(' '.join([child[0] for child in t])) else: for child in t: entity_names.extend(extract_entity_names(child)) return entity_names def extract_names(sample): sentences = nltk.sent_tokenize(sample) tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences] tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] chunked_sentences = nltk.batch_ne_chunk(tagged_sentences, binary=True) # chunked/tagged may be enough to just find and match the nouns entity_names = [] for tree in chunked_sentences: # Print results per sentence # print extract_entity_names(tree) entity_names.extend(extract_entity_names(tree)) # Print all entity names #print entity_names # Print unique entity names print set(entity_names)

#dependencies http://code.google.com/p/python-twitter/
 
# info
# http://stackoverflow.com/questions/4206882/named-entity-recognition-with-preset-list-of-names-for-python-php/4207128#4207128
# http://alias-i.com/lingpipe/demos/tutorial/ne/read-me.html approximate dist
# http://streamhacker.com/2008/12/29/how-to-train-a-nltk-chunker/ more training
# http://www.postgresql.org/docs/9.1/static/pgtrgm.html
 
# data sources
# http://twitter.com/#!/ACTEmergencyInf instant site wide
# http://twitter.com/#!/ACTPol_Traffic
# http://esa.act.gov.au/feeds/currentincidents.xml
 
# source: https://gist.github.com/322906/90dea659c04570757cccf0ce1e6d26c9d06f9283
import nltk
import twitter
import psycopg2
def insert_service_alert_sitewide(heading, message, url):
        
def insert_service_alert_for_street(streets, heading, message, url):
        conn_string = "host='localhost' dbname='energymapper' user='postgres' password='snmc'"
        # print the connection string we will use to connect
        print "Connecting to database\n ->%s" % (conn_string)
        try:
                # get a connection, if a connect cannot be made an exception will be raised here
                conn = psycopg2.connect(conn_string)
 
                # conn.cursor will return a cursor object, you can use this cursor to perform queries
                cursor = conn.cursor()
 
                # execute our Query
                cursor.execute("select max(value), extract(dow from max(time)) as dow, \
extract(year from max(time))::text || lpad(extract(month from max(time))::text,2,'0') \
|| lpad(extract(month from max(time))::text,2,'0') as yearmonthweek, to_char(max(time),'J') \
from environmentdata_values where \"dataSourceID\"='NSWAEMODemand' \
group by extract(dow from time), extract(year from time),  extract(week from time) \
order by  extract(year from time),  extract(week from time), extract(dow from time)")
 
                # retrieve the records from the database
                records = cursor.fetchall()
 
                for record in records:
                        ys.append(record[0])
# >>> cur.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (42, 'bar'))
#>>> cur.statusmessage
#'INSERT 0 1'
        except:
                # Get the most recent exception
                exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
                # Exit the script and print an error telling what happened.
                sys.exit("Database connection failed!\n ->%s" % (exceptionValue))
                
def get_tweets(user):
    tapi = twitter.Api()
    return tapi.GetUserTimeline(user)
 
def extract_entity_names(t):
    entity_names = []
    
    if hasattr(t, 'node') and t.node:
        if t.node == 'NE':
            entity_names.append(' '.join([child[0] for child in t]))
        else:
            for child in t:
                entity_names.extend(extract_entity_names(child))
                
    return entity_names
 
def extract_names(sample):     
    sentences = nltk.sent_tokenize(sample)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    chunked_sentences = nltk.batch_ne_chunk(tagged_sentences, binary=True)
    # chunked/tagged may be enough to just find and match the nouns
 
    entity_names = []
    for tree in chunked_sentences:
        # Print results per sentence
        # print extract_entity_names(tree)
        
        entity_names.extend(extract_entity_names(tree))
 
    # Print all entity names
    #print entity_names
 
    # Print unique entity names
    print set(entity_names)