HW8: Topic Modeling

In [ ]:
 
In [68]:
## =======================================================
## IMPORTING
## =======================================================
import os
def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    for file in directory:
        f=open(path+file,  encoding = "ISO-8859-1")
        results.append(f.read())
        f.close()
    return results


## =======================================================
## MODELING
## =======================================================
import pandas as pd
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS

def run_lda(data, num_topics, stop_words):
    cv = CountVectorizer(stop_words = stop_words)
    lda_vec = cv.fit_transform(data)
    lda_columns = cv.get_feature_names()
    corpus = pd.DataFrame(lda_vec.toarray(), columns = lda_columns)
    lda = LatentDirichletAllocation(n_components=num_topics, max_iter=10, 
                                    learning_method='online')
    lda_model = lda.fit_transform(lda_vec)
    print_topics(lda, cv)
    return lda_model, lda, lda_vec, cv, corpus


## =======================================================
## HELPERS
## =======================================================
import numpy as np
np.random.seed(210)

def print_topics(model, vectorizer, top_n=10):
    for idx, topic in enumerate(model.components_):
        print("Topic %d:" % (idx))
        print([(vectorizer.get_feature_names()[i], topic[i])
                        for i in topic.argsort()[:-top_n - 1:-1]])
        

## =======================================================
## VISUALIZING
## =======================================================        
import pyLDAvis.sklearn as LDAvis
import pyLDAvis

def start_vis(lda, lda_vec, cv):
    panel = LDAvis.prepare(lda, lda_vec, cv, mds='tsne')
    pyLDAvis.show(panel)
#     pyLDAvis.save_html(panel, 'HW8_lda.html')
In [37]:
# data = get_data_from_files('Dog_Hike/')
# lda_model, lda, lda_vec, cv = run_lda(data)

data_fd = get_data_from_files('110/110-f-d/')
data_fr = get_data_from_files('110/110-f-r/')

data = data_fd + data_fr
data
lda_model, lda, lda_vec, cv, corpus = run_lda(data)
Topic 0:
[('the', 139166.20627866255), ('of', 85492.14362503793), ('to', 81466.41040767683), ('and', 78594.90040183658), ('in', 47973.701838936315), ('that', 43342.88607828087), ('for', 29249.2655364567), ('this', 28859.59783095147), ('is', 28270.074522886112), ('we', 27013.196067882127)]
Topic 1:
[('the', 1677.5842128197867), ('to', 846.5616279968173), ('and', 807.3821817461204), ('of', 773.1045091499119), ('in', 619.7655449974652), ('that', 488.29524584042053), ('we', 310.587681504484), ('for', 309.47625594198684), ('this', 294.0477824295658), ('it', 259.4750313814392)]
In [48]:
# corpus

# c2 = corpus.append(df.sum().rename('Total'))
ct = corpus.T
ct['total'] = ct.sum(axis=1)
big_total = ct[ct['total'] > 68]
len(big_total)
Out[48]:
3516
In [47]:
len(ct)
Out[47]:
33688
In [53]:
btt = big_total.T
In [63]:
additional_stopwords = btt.columns
In [64]:
from sklearn.feature_extraction import text 

stop_words = text.ENGLISH_STOP_WORDS.union(additional_stopwords)
In [65]:
stop_words
Out[65]:
frozenset({'interesting',
           'about',
           'blind',
           '100',
           'supporters',
           'enrolled',
           'reauthorize',
           'suspend',
           'players',
           'sound',
           'force',
           'presented',
           'trust',
           'leaving',
           'disappointed',
           'isn',
           'known',
           'bankruptcy',
           'murder',
           'redeploy',
           'national',
           'gentlewoman',
           'non',
           'activity',
           'hereupon',
           'earlier',
           'organ',
           'simply',
           'teams',
           'development',
           'oppose',
           'goal',
           'lessons',
           'believes',
           'note',
           'head',
           'carson',
           'flag',
           'increased',
           'substitute',
           'joint',
           'prevented',
           'legal',
           'properly',
           'gratitude',
           'injustice',
           'false',
           'covered',
           'guard',
           'basketball',
           'why',
           'caucus',
           'lost',
           'confront',
           'factors',
           'completely',
           'valley',
           'opponents',
           'academy',
           'voting',
           'commemorating',
           'housing',
           'level',
           'clearly',
           'printed',
           'tonight',
           'map',
           'victim',
           'ignore',
           'begin',
           'gone',
           'homeowners',
           'flow',
           'liberty',
           'barbara',
           'cbc',
           'transition',
           'hard',
           'understanding',
           'connection',
           'poverty',
           'debating',
           'disability',
           'would',
           'members',
           'attack',
           'sanchez',
           'general',
           'authorizing',
           'package',
           'glenn',
           'houston',
           'pick',
           'and',
           'governments',
           'attorney',
           'commander',
           'investments',
           'hawaiians',
           'skelton',
           'hardworking',
           'hasn',
           'fires',
           'survive',
           'korean',
           'tuesday',
           'occurred',
           'flood',
           'regulations',
           'hometown',
           'statement',
           'talking',
           'longer',
           'gift',
           'when',
           'gathering',
           'nature',
           'straight',
           'drilling',
           'securities',
           'progressive',
           'happening',
           'trillion',
           'designation',
           'won',
           'honors',
           'crime',
           'relevant',
           'shown',
           'comment',
           'mary',
           'elsewhere',
           'library',
           'sell',
           'con',
           'person',
           '90',
           'park',
           'reform',
           '000',
           'debbie',
           'bombing',
           'shelf',
           'vision',
           'resources',
           '109th',
           'mention',
           'lands',
           'nato',
           'taught',
           'success',
           'rhetoric',
           'petraeus',
           'colombia',
           'myself',
           'millions',
           'demand',
           'plants',
           'inside',
           'friendly',
           'unique',
           'representing',
           'israeli',
           'convention',
           'workplace',
           'morning',
           'hirono',
           'recognition',
           'implemented',
           'arab',
           'hereby',
           'st',
           'missing',
           'explanation',
           'combined',
           'transportation',
           'corruption',
           'sources',
           'area',
           'beginning',
           'not',
           'secretary',
           'exploration',
           'whole',
           'version',
           'prescription',
           'introduced',
           'authorizes',
           'addressed',
           'listening',
           'have',
           'cross',
           'carrying',
           'touch',
           'property',
           'want',
           'engineers',
           'true',
           'training',
           'wisdom',
           'solve',
           'sons',
           'under',
           'moreover',
           'developing',
           'miles',
           'fight',
           'favor',
           'handle',
           'kinds',
           'must',
           'nobody',
           'men',
           'projected',
           'harman',
           'residents',
           'sectors',
           'centers',
           'childhood',
           'sectarian',
           'wall',
           'shut',
           'statistics',
           'study',
           'hiv',
           'seek',
           'common',
           'sitting',
           'creating',
           'detail',
           'asia',
           'framework',
           'rates',
           'religion',
           'palestinian',
           'measure',
           'push',
           'russia',
           'father',
           'estate',
           'mark',
           'author',
           'horrific',
           'medicine',
           'roosevelt',
           'town',
           'agency',
           'based',
           'bachus',
           'word',
           'dhs',
           '2007',
           'interests',
           'healthcare',
           'mandate',
           'around',
           'ownership',
           'operation',
           'physical',
           'budget',
           'offer',
           'draw',
           'real',
           'causes',
           'step',
           'lot',
           'though',
           'cover',
           'effort',
           'basis',
           'employees',
           'collective',
           'structure',
           'beautiful',
           'meetings',
           'election',
           'means',
           'foreclosure',
           'republican',
           'fitting',
           'buying',
           'demonstrated',
           'speaks',
           'that',
           'appreciate',
           'high',
           'january',
           'none',
           '18',
           'read',
           'gives',
           'accomplish',
           'screening',
           'pursuit',
           'grounds',
           'reverse',
           'sharing',
           'complex',
           'disabilities',
           'hundreds',
           'add',
           'classroom',
           'remarks',
           'peru',
           'her',
           'additional',
           'is',
           'prison',
           'watching',
           'levels',
           'whence',
           'monday',
           'va',
           '200',
           'violations',
           'sense',
           'cities',
           'fifth',
           'expanding',
           'households',
           'overseas',
           'thereupon',
           'pelosi',
           'keeping',
           '28',
           'representation',
           'star',
           'flexibility',
           '65',
           'text',
           'mess',
           'invest',
           'ways',
           'disadvantaged',
           'refused',
           'ally',
           'services',
           'performed',
           'future',
           'reflects',
           'track',
           'reducing',
           'relating',
           'question',
           'active',
           'memory',
           'plans',
           'closing',
           'road',
           'prior',
           'defend',
           'range',
           'traumatic',
           'veteran',
           'honored',
           'mexico',
           'clarke',
           'disparities',
           'severely',
           'physicians',
           'resolution',
           'group',
           'officer',
           'minority',
           'closer',
           'computer',
           'rule',
           'marines',
           'playing',
           'stem',
           'affordability',
           'personnel',
           'elderly',
           'contained',
           'interested',
           'code',
           'million',
           'tireless',
           '50',
           'families',
           'retired',
           'oversight',
           'culture',
           'fair',
           'missions',
           'responding',
           'teach',
           'eye',
           'supplies',
           'fallen',
           'agreements',
           'critical',
           'multiple',
           'including',
           'contribution',
           'blank',
           'sex',
           'reduces',
           'troubled',
           'maybe',
           'document',
           'middle',
           'borders',
           'down',
           'suffered',
           'up',
           'questions',
           'whereafter',
           'policies',
           'condolences',
           'living',
           'negotiation',
           'degrees',
           'launched',
           'neighbors',
           'once',
           'polls',
           'parties',
           'representatives',
           'growing',
           'places',
           'heritage',
           'invested',
           'mac',
           '68',
           'marked',
           'arts',
           'greece',
           'a',
           'did',
           'dropped',
           'ourselves',
           'revenues',
           'powerful',
           'reported',
           'etc',
           'or',
           'publicly',
           'sutton',
           'protect',
           'sea',
           'expansion',
           'often',
           'home',
           'vehicle',
           'technology',
           'commerce',
           'hall',
           'attention',
           'be',
           'mandates',
           'anywhere',
           'rita',
           'working',
           'requirements',
           'saddam',
           'telling',
           'on',
           'doors',
           'numbers',
           'richardson',
           'human',
           'organization',
           'loan',
           'double',
           'ideas',
           'arizona',
           'drop',
           'farmers',
           'fourth',
           'attacks',
           'promotes',
           'darfur',
           'vital',
           'timely',
           'adult',
           'urban',
           'bailout',
           '120',
           'wrote',
           'peterson',
           'i',
           'federal',
           'vouchers',
           'industry',
           'control',
           'pull',
           'started',
           'diabetes',
           'democracy',
           'sad',
           'smart',
           'rise',
           'suffer',
           'employment',
           'parks',
           'tomorrow',
           'wherever',
           'santa',
           'extended',
           'coal',
           'war',
           'specific',
           'firms',
           'immediately',
           'urgent',
           'compassion',
           'taxpayer',
           'school',
           'ensure',
           'missouri',
           'airport',
           'identity',
           'standing',
           'achieved',
           'thomas',
           'stock',
           'kids',
           'sacramento',
           'reached',
           'extra',
           'good',
           'according',
           'forced',
           'welcome',
           'tour',
           'jones',
           'accountable',
           'raise',
           'tell',
           'will',
           'combat',
           'counties',
           'communication',
           'show',
           'schools',
           'share',
           'acts',
           'scholarship',
           'sustained',
           'toward',
           'woolsey',
           'too',
           'shift',
           'totally',
           'arrested',
           'lakes',
           'traffic',
           'neglect',
           'prohibited',
           'courage',
           'cry',
           'caught',
           'fraud',
           'identify',
           'trying',
           '10',
           'events',
           'testimony',
           'half',
           'seeming',
           'respect',
           'kind',
           'focusing',
           'imperative',
           'billion',
           'targeting',
           'promised',
           'delighted',
           'casualties',
           'christmas',
           'starting',
           'voucher',
           'bipartisan',
           'literacy',
           'basic',
           'rejected',
           'extension',
           'deeply',
           'destroy',
           'party',
           'porter',
           'likely',
           'mom',
           '1995',
           'your',
           'deny',
           'expect',
           'native',
           'owners',
           'protection',
           'repeal',
           'seriously',
           'potential',
           'old',
           'fact',
           'programs',
           'veterans',
           'reason',
           '21',
           'effects',
           'criminal',
           'past',
           'primary',
           'strongly',
           'frank',
           'assistant',
           'science',
           'wind',
           'creation',
           'subprime',
           'businesses',
           'except',
           'latter',
           'conducting',
           'consensus',
           'wounded',
           'color',
           'farm',
           'case',
           'eligibility',
           'products',
           'barrel',
           'enable',
           'foxx',
           'light',
           'tried',
           'accomplishments',
           'able',
           '11',
           'early',
           'mistakes',
           'social',
           'produced',
           'conditions',
           'highest',
           'successful',
           'doc',
           'loophole',
           'huge',
           'gasoline',
           'operate',
           'priorities',
           'restrictions',
           'never',
           'pennsylvania',
           'civil',
           'earmark',
           '23',
           'unemployment',
           'commend',
           'white',
           'related',
           'discussing',
           'pursue',
           '95',
           'chicago',
           'sanctions',
           'far',
           'methods',
           'ours',
           'imports',
           'federally',
           'ongoing',
           'port',
           'europe',
           'ms',
           'nation',
           'continental',
           'looking',
           'faced',
           'ford',
           'fashion',
           'thought',
           'database',
           'blackburn',
           'using',
           'married',
           'suicide',
           'strengthening',
           'opposite',
           'qualify',
           'managing',
           'continues',
           'of',
           'additionally',
           'missile',
           'noted',
           'dollar',
           'rental',
           'called',
           'forever',
           'begun',
           'improves',
           'ie',
           'japan',
           '80',
           'outreach',
           'disastrous',
           'supports',
           'hero',
           'contracts',
           '27',
           'baby',
           'introduce',
           'target',
           'authorization',
           'process',
           'streets',
           'helps',
           'game',
           'renewable',
           'asset',
           'backlog',
           'lending',
           'drug',
           'laws',
           'years',
           'education',
           'graduates',
           'kansas',
           'much',
           'advocate',
           'problems',
           'leadership',
           'behavior',
           'agriculture',
           'condition',
           'medicare',
           'worthy',
           'sister',
           'expressed',
           'genetic',
           'earn',
           'consistent',
           'component',
           'contrast',
           'private',
           '24',
           'brown',
           'warrant',
           'essential',
           'witnessed',
           'taxes',
           'tibet',
           'compromise',
           'conduct',
           'couple',
           'hemisphere',
           'll',
           'serving',
           'choice',
           'bureaucracy',
           'nowhere',
           'types',
           'interest',
           'considered',
           'america',
           'wherein',
           'incredible',
           'hearing',
           'nearly',
           '01',
           'help',
           'fully',
           'unable',
           'april',
           'properties',
           'basically',
           'commitment',
           'student',
           'telecommunications',
           '150',
           'provisions',
           'actions',
           'affect',
           'pointed',
           'received',
           'recall',
           'memorial',
           'competitiveness',
           'proposed',
           'engaging',
           'mill',
           '180',
           'consent',
           'over',
           'ultimate',
           'neighborhoods',
           'view',
           'point',
           'judicial',
           'advice',
           'enforcement',
           'just',
           'deaths',
           'located',
           'electronic',
           'schwartz',
           'schakowsky',
           'monitor',
           'listened',
           'public',
           'commonsense',
           'television',
           'lose',
           'chemical',
           'actual',
           'out',
           'saw',
           'definition',
           'facts',
           'trucks',
           'allow',
           'compared',
           'natural',
           'negotiations',
           'entirely',
           'extent',
           'driving',
           'sacrifice',
           'bernice',
           'bridge',
           'held',
           'fired',
           'payments',
           'sure',
           'cells',
           'less',
           'final',
           'lebanon',
           'violence',
           'domestic',
           'angeles',
           'expense',
           'biofuels',
           'enacted',
           'math',
           'does',
           'edge',
           'expected',
           'assets',
           'consider',
           'continuing',
           'call',
           'ptsd',
           'camps',
           'mccarthy',
           'exercise',
           'universal',
           'design',
           'create',
           'john',
           'occurring',
           'regarding',
           'power',
           'guarantee',
           'briefly',
           'one',
           'imagine',
           'country',
           'surge',
           'katrina',
           'whose',
           'firm',
           'returned',
           'solar',
           'proven',
           'establishing',
           'outer',
           'particularly',
           'holds',
           'core',
           'diplomatic',
           'insured',
           'hopefully',
           'visit',
           'die',
           'barrels',
           'taken',
           'record',
           'ignored',
           'business',
           'church',
           'lehtinen',
           'join',
           'fewer',
           'appointed',
           'offices',
           'recognizing',
           'requires',
           'poorest',
           'senators',
           'broad',
           'escalation',
           'credit',
           'thru',
           'changed',
           'occupation',
           'waters',
           'each',
           'nor',
           'celebration',
           'included',
           'ceo',
           'sixty',
           'savings',
           'brothers',
           'contrary',
           'detection',
           'calls',
           'saying',
           'vehicles',
           'decision',
           'month',
           'together',
           'wild',
           'ran',
           'letter',
           'previously',
           'caused',
           'effect',
           'african',
           'senate',
           'personally',
           'korea',
           'union',
           'outstanding',
           'divided',
           'followed',
           'apply',
           'welfare',
           'convicted',
           'cards',
           'delays',
           'mail',
           'cost',
           'engage',
           'targeted',
           'charges',
           'biggest',
           'tragic',
           'studies',
           'shea',
           'driven',
           'spend',
           'americorps',
           'invasion',
           'gave',
           'lobbying',
           'grant',
           'leaves',
           '2004',
           'contractors',
           'employer',
           'nationwide',
           'castor',
           'letters',
           'plant',
           'five',
           '2008',
           'barriers',
           'nuclear',
           'site',
           'trial',
           'elementary',
           'cosponsors',
           'everyday',
           'saving',
           'discipline',
           'internet',
           'celebrate',
           'agreed',
           'rest',
           'legislature',
           'worldwide',
           'listen',
           'struggle',
           'rely',
           'make',
           'pump',
           'twice',
           'between',
           'decades',
           'certainly',
           'abortion',
           'transparency',
           'citizens',
           'deep',
           'vi',
           'pipeline',
           'sale',
           'above',
           'countless',
           'treasury',
           'wage',
           'cases',
           'torture',
           'whereas',
           'regulators',
           ...})
In [70]:
lda_model, lda, lda_vec, cv, corpus = run_lda(data, 40, stop_words)
Topic 0:
[('duncan', 0.8125087034627877), ('horses', 0.636563654410237), ('olympic', 0.5533880579711062), ('staggering', 0.5527900974402409), ('sendler', 0.5516915414390462), ('roybal', 0.5495142256341471), ('stewardship', 0.539990481442148), ('truck', 0.5301725845028952), ('toys', 0.5263862471243266), ('holiness', 0.5200709595461378)]
Topic 1:
[('contraceptives', 1.1322917713596823), ('insert', 0.819779049499601), ('speculation', 0.7600676808738385), ('cuba', 0.7478983049740057), ('commodity', 0.7454970757293969), ('folio', 0.7212123338954801), ('unintended', 0.6916996599637789), ('egypt', 0.6769680911958883), ('crops', 0.6740616336872267), ('usaid', 0.6631324683368433)]
Topic 2:
[('latinos', 28.57168440672087), ('chavez', 12.259258607480422), ('ferrera', 9.087648101652317), ('cesar', 7.962559706105602), ('estrada', 7.455998703496498), ('ciudad', 7.1079346630714895), ('latinas', 6.3997410186732955), ('islander', 5.8864296194686485), ('dtv', 5.716305162191971), ('promotoras', 5.665231244414415)]
Topic 3:
[('mentoring', 18.259233803905108), ('diploma', 12.119931039716786), ('mills', 8.753096448564516), ('sandberg', 8.049337569760016), ('minneapolis', 5.941193962866121), ('ceecee', 5.642099582365889), ('cole', 4.657985207482534), ('prenatally', 4.6510876600678035), ('lyles', 4.034983475130155), ('mentor', 3.943596108889469)]
Topic 4:
[('myrick', 35.580922733254134), ('kristen', 5.9911197739881255), ('meineke', 4.869688196019243), ('ric', 4.83460324639221), ('flair', 4.825883290906731), ('pap', 4.638299324177404), ('charlotte', 4.539149178510334), ('dwi', 4.098580142448335), ('nay', 3.4015693705911643), ('cervical', 3.3282733911635085)]
Topic 5:
[('granger', 34.40785516627489), ('talkers', 11.580917506225033), ('chibitty', 6.331839594505975), ('mosteiro', 5.543053227052157), ('spencer', 5.0243453482474365), ('clarence', 4.925198805617137), ('tahmahkera', 4.019393594801073), ('comanche', 3.9835079698447), ('paulie', 2.4474625185474337), ('allan', 2.424458177320304)]
Topic 6:
[('cloning', 31.75892563335224), ('reproductive', 19.641433515491627), ('embryos', 7.4912217773644745), ('cloned', 4.575742320112252), ('civics', 4.199812504314061), ('mouse', 3.743105933702287), ('transplant', 3.6004350756332406), ('denver', 3.248981824635166), ('scnt', 3.1818250067924225), ('bans', 3.114101345365679)]
Topic 7:
[('meth', 22.684423700186834), ('dandy', 9.335012769984825), ('walker', 7.583955720790898), ('syndrome', 7.0957952389580115), ('katelyn', 7.0688930788352655), ('kohlberg', 7.050536253729905), ('prostate', 6.6429693494567585), ('jerome', 6.349921183117512), ('beavers', 6.264877551032239), ('beaver', 5.382784554770402)]
Topic 8:
[('emerson', 25.149006261270976), ('hansbrough', 4.808680212904641), ('sr', 4.38369215982008), ('hudson', 3.728690505759795), ('limbaugh', 3.62532143192573), ('tyler', 3.115717008151453), ('dissidents', 3.04996962842637), ('berry', 2.6850076258304663), ('girardeau', 2.471265356305475), ('clay', 2.452296955095839)]
Topic 9:
[('boating', 0.4269410770634471), ('contraceptives', 0.41960094062098663), ('servicemen', 0.41424908832966667), ('granger', 0.412722484643257), ('contact', 0.40247626899129196), ('egypt', 0.39226077323336433), ('emerson', 0.3827482973220052), ('murtha', 0.37703459666617034), ('jacksonville', 0.3668034787187638), ('firmly', 0.36436227810056415)]
Topic 10:
[('cloning', 0.9507235253827079), ('pensions', 0.7418722928287097), ('glider', 0.679878020668157), ('reproductive', 0.6711257184467102), ('truck', 0.6609524854245595), ('roybal', 0.6486906156757777), ('allard', 0.634440704597814), ('highways', 0.616134609956907), ('leavenworth', 0.5334253524298543), ('barracks', 0.5218447985137469)]
Topic 11:
[('wire', 34.9113046924464), ('probable', 18.747853235005373), ('scaap', 14.749274631128078), ('verify', 13.734101933366876), ('foreigner', 12.599656304445869), ('astronomy', 11.99447668205341), ('heathrow', 10.343103676932564), ('haul', 10.176220995370569), ('phones', 9.493353094544316), ('stockpile', 9.340211511286105)]
Topic 12:
[('latinos', 0.713661095497453), ('chavez', 0.48231501854879105), ('betty', 0.463533102960083), ('citizenship', 0.4517123625393424), ('juarez', 0.4044613284122105), ('permanently', 0.4007216395100808), ('toys', 0.38637148979026487), ('blessed', 0.378079600069133), ('latin', 0.3747279144869569), ('collar', 0.3727541065986048)]
Topic 13:
[('hawai', 33.22607239139987), ('kalaupapa', 19.08044249744785), ('filipino', 17.030719074824688), ('indigenous', 13.497323132112811), ('natives', 10.522756228378356), ('aloha', 9.839148731795722), ('motorcycle', 9.19808291082505), ('uh', 8.742973639888959), ('mahalo', 8.022796848697975), ('ix', 7.92816331430612)]
Topic 14:
[('paralysis', 12.509271067616044), ('reclamation', 12.368387376424483), ('paired', 12.226427578654578), ('cobra', 12.096511596680713), ('blindness', 9.835719287877977), ('blinded', 9.600726713778014), ('trains', 9.191659292828389), ('basin', 8.753802519393775), ('postpartum', 8.578995760870814), ('cervical', 8.477258038662994)]
Topic 15:
[('tsongas', 0.7017416387931157), ('militia', 0.41131485365421283), ('barrett', 0.3948687744832978), ('cite', 0.38791053512422924), ('risen', 0.3833669966299425), ('exploited', 0.3811543809549447), ('location', 0.38091374495975644), ('challenging', 0.37981116035255214), ('50th', 0.37861120397861725), ('professor', 0.3745834478792065)]
Topic 16:
[('russian', 0.482279439990327), ('filipino', 0.45942425225106254), ('sorority', 0.4385422398311869), ('johnny', 0.43188497111243596), ('cherokee', 0.41593317197606416), ('islands', 0.41571737663823544), ('mercury', 0.4134344976392453), ('embrace', 0.4108428191378143), ('11th', 0.4084278523786756), ('sends', 0.4069953547983149)]
Topic 17:
[('ali', 51.22375279533656), ('scale', 50.98692694163963), ('certification', 50.98241393489512), ('catholic', 50.89954459392632), ('negotiated', 50.817198651933644), ('engagement', 50.8029979071559), ('utilize', 50.64993363442352), ('pending', 50.64004984116555), ('conservative', 50.611953433557886), ('ed', 50.57036491423741)]
Topic 18:
[('yucca', 38.20012144567988), ('vegas', 31.753144658250513), ('las', 31.21297568337285), ('hamas', 19.22239660101952), ('justin', 14.599410804831185), ('taiwan', 14.475186602846442), ('gaza', 12.022792655153635), ('israelis', 12.007039834681802), ('shipments', 8.472651650536392), ('sderot', 8.46188338362181)]
Topic 19:
[('allard', 0.39299111477014076), ('sat', 0.36386356589910607), ('sacrificed', 0.362596708826715), ('permits', 0.3554702803213507), ('recess', 0.354419152565692), ('regulate', 0.3505653461717683), ('kalaupapa', 0.34814531736531035), ('hawai', 0.3469303728030136), ('successes', 0.3453478342228781), ('conservative', 0.34459186213729287)]
Topic 20:
[('cincinnati', 0.3959669860758346), ('reverend', 0.393620277738077), ('miami', 0.39122204377442527), ('doubled', 0.38634006659317793), ('brooklyn', 0.36028468170637207), ('pending', 0.35609057194589905), ('certification', 0.35600249099080133), ('strengthened', 0.35458207977632566), ('sudanese', 0.35428412376439317), ('pornography', 0.35115612079607034)]
Topic 21:
[('hawai', 0.5473599898575761), ('filipino', 0.4325739505972028), ('naval', 0.4226724076702524), ('scholarships', 0.37297380635056127), ('pryce', 0.37067172622659855), ('hyde', 0.36384664785139964), ('38', 0.3582530473096638), ('wednesday', 0.3551401625851505), ('uh', 0.35381640344371906), ('sudanese', 0.3527686698370079)]
Topic 22:
[('toledo', 0.6956434564900542), ('werth', 0.6021013935038799), ('gillibrand', 0.5474907787140475), ('undemocratic', 0.527281552232615), ('dressel', 0.5192023884757508), ('detective', 0.5188692973688263), ('latta', 0.518639716409579), ('citigroup', 0.4944473080166176), ('joy', 0.47832826173483695), ('ronald', 0.46636087046646346)]
Topic 23:
[('appalachian', 0.37918934247125724), ('prosecutor', 0.37756301845573725), ('trees', 0.377239144994428), ('refineries', 0.3748090099631378), ('connected', 0.37457157976960415), ('strikes', 0.370114035319386), ('latta', 0.36757450141530756), ('diagnosis', 0.36639655273466953), ('bowl', 0.3663339736591978), ('serv', 0.36305528701717527)]
Topic 24:
[('homeowner', 0.6470926388176161), ('waite', 0.5525119800750995), ('openness', 0.4442809829706434), ('surely', 0.4180211007475265), ('newly', 0.40197995059166614), ('controlling', 0.38828073290536547), ('pen', 0.3871347825966023), ('1990', 0.3850761763885205), ('leased', 0.3804677919168707), ('route', 0.37893726090169927)]
Topic 25:
[('granger', 0.49494784295758415), ('emerson', 0.4086440302078075), ('controls', 0.3793131310080471), ('attempted', 0.3488145551302645), ('quo', 0.34815385222116557), ('tie', 0.346653340365238), ('del', 0.3463394491550414), ('complaints', 0.3449642989433149), ('median', 0.34217637775045734), ('figures', 0.3412412415408793)]
Topic 26:
[('blindness', 0.5134360469700382), ('paired', 0.512944598884765), ('cobra', 0.4649087778470602), ('paralysis', 0.4573950566667391), ('postpartum', 0.45670409676179263), ('connected', 0.44889600476889746), ('madison', 0.42918522573561346), ('blinded', 0.41830489253771747), ('frustration', 0.4070137344445208), ('idiopathic', 0.40679535149084145)]
Topic 27:
[('roybal', 24.122270295710766), ('allard', 23.46089211695106), ('newborn', 10.218321809237267), ('underage', 5.798194971811154), ('1381', 4.002805807770457), ('saves', 2.2225759600472483), ('5919', 2.1979523527258684), ('preventable', 2.0579223738899386), ('absenteeism', 1.965069929024387), ('chronic', 1.8962226812250926)]
Topic 28:
[('herseth', 5.566906849265752), ('chaplain', 4.428344207311348), ('gruneich', 4.104396039498751), ('dakota', 3.267978683070249), ('legion', 2.60994563981993), ('stan', 1.7088000912376424), ('reverend', 1.664581683353139), ('sioux', 1.6218019894382145), ('reprioritizes', 0.9907263704035106), ('dakotas', 0.9745865765678978)]
Topic 29:
[('jamestown', 3.0954395196563502), ('chickahominy', 1.575585623507163), ('thomasina', 1.5146923543329511), ('colonists', 1.204535730491889), ('indians', 1.1366324286896878), ('400th', 1.0989952660745592), ('1607', 1.0559114322792902), ('nansemond', 0.9208764026235462), ('mattaponi', 0.8964088160834407), ('1294', 0.8685308996457298)]
Topic 30:
[('gillibrand', 35.52665478090988), ('upstate', 14.805786155980986), ('placid', 14.039997098518285), ('olympic', 10.961667295079398), ('hunting', 8.649737752528196), ('fishing', 6.6561142451729225), ('suzanne', 6.136443960561101), ('albany', 5.2880466622052875), ('suny', 4.0950282420299375), ('1932', 3.4643889360149305)]
Topic 31:
[('anaheim', 13.844684732848535), ('pavarotti', 12.215743677475714), ('vietnamese', 11.87219601064632), ('ana', 9.91607982179824), ('replenishment', 8.444737999403593), ('groundwater', 8.312759073095396), ('molina', 6.838120476319835), ('rowland', 6.812944127866036), ('muir', 6.419170145358405), ('bulletproof', 5.831191305954826)]
Topic 32:
[('boating', 0.7204390738323612), ('indians', 0.6117330706640481), ('cafe', 0.5809547916639097), ('lapeer', 0.543323625185996), ('turrill', 0.5342226168369453), ('nics', 0.5267741824337832), ('blessed', 0.5229595628690648), ('indigenous', 0.5127594391090068), ('hawai', 0.5126217562161469), ('mills', 0.49708252452988716)]
Topic 33:
[('legion', 0.4143770006246741), ('breakfast', 0.4116520793820419), ('milwaukee', 0.4043135139189563), ('hopes', 0.39093150785410585), ('enduring', 0.37416167262110384), ('reverend', 0.3704199149964053), ('armor', 0.3678144565745031), ('herseth', 0.3640655968433751), ('sovereign', 0.3630615651316789), ('wounds', 0.3626699257717096)]
Topic 34:
[('gators', 0.6443691180849711), ('freight', 0.5819092613713144), ('mica', 0.5549189102410074), ('poison', 0.5144500400509874), ('railroads', 0.509590158869525), ('downtown', 0.5066587105819484), ('cruise', 0.49840456177468234), ('mem', 0.468617052063636), ('jacksonville', 0.4681751171626424), ('diego', 0.46475146154890506)]
Topic 35:
[('nics', 44.10429523056637), ('glider', 21.776823715651965), ('bullying', 18.04909072313932), ('dba', 17.126232888217615), ('bowling', 14.806882770581378), ('volunteerism', 12.764673347153524), ('invigorating', 11.491203804500756), ('imaging', 10.5970978553936), ('2640', 8.89370136235541), ('wichita', 8.790737871879099)]
Topic 36:
[('tsongas', 18.11955577908706), ('barrett', 7.964388531147645), ('jimenez', 5.685900996619862), ('lowell', 5.337609411979292), ('alex', 4.748788764611502), ('militia', 3.743430179506232), ('tunnel', 2.517541363338138), ('whereabouts', 2.4340731727598217), ('concord', 2.2208451389836945), ('boundary', 2.0893137615816992)]
Topic 37:
[('myrick', 0.9304278594679118), ('akron', 0.5534511554493476), ('josh', 0.5343165139749546), ('hawai', 0.5340117150273894), ('trading', 0.48361297659996033), ('popcorn', 0.47534661720404353), ('basin', 0.45341670656839106), ('193', 0.44050947013538005), ('filipino', 0.42322927741528604), ('13th', 0.4229800997244627)]
Topic 38:
[('wyoming', 12.86132760457899), ('meth', 12.66368078806605), ('trona', 8.827824303104054), ('mcgee', 7.079098313404798), ('cubin', 6.30046602948609), ('gale', 5.642282167337626), ('6901', 3.411895823554226), ('laramie', 2.551652826636237), ('wills', 2.496086051613352), ('335', 2.3105761442080412)]
Topic 39:
[('pryce', 0.6104805380787394), ('glider', 0.5750694278227312), ('nics', 0.5699499329382702), ('volunteerism', 0.4663633400402911), ('criminals', 0.41893842748203564), ('london', 0.3852186270469489), ('tria', 0.38178270764466227), ('rep', 0.3731807027438391), ('exploited', 0.37115651506089553), ('exposure', 0.36769625827886127)]
In [71]:
start_vis(lda, lda_vec, cv)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/pyLDAvis/_prepare.py:257: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  return pd.concat([default_term_info] + list(topic_dfs))
In [ ]: