## =======================================================
## IMPORTING
## =======================================================
import os
def get_data_from_files(path):
directory = os.listdir(path)
results = []
for file in directory:
f=open(path+file, encoding = "ISO-8859-1")
results.append(f.read())
f.close()
return results
## =======================================================
## MODELING
## =======================================================
import pandas as pd
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
def run_lda(data, num_topics, stop_words):
cv = CountVectorizer(stop_words = stop_words)
lda_vec = cv.fit_transform(data)
lda_columns = cv.get_feature_names()
corpus = pd.DataFrame(lda_vec.toarray(), columns = lda_columns)
lda = LatentDirichletAllocation(n_components=num_topics, max_iter=10,
learning_method='online')
lda_model = lda.fit_transform(lda_vec)
print_topics(lda, cv)
return lda_model, lda, lda_vec, cv, corpus
## =======================================================
## HELPERS
## =======================================================
import numpy as np
np.random.seed(210)
def print_topics(model, vectorizer, top_n=10):
for idx, topic in enumerate(model.components_):
print("Topic %d:" % (idx))
print([(vectorizer.get_feature_names()[i], topic[i])
for i in topic.argsort()[:-top_n - 1:-1]])
## =======================================================
## VISUALIZING
## =======================================================
import pyLDAvis.sklearn as LDAvis
import pyLDAvis
def start_vis(lda, lda_vec, cv):
panel = LDAvis.prepare(lda, lda_vec, cv, mds='tsne')
pyLDAvis.show(panel)
# pyLDAvis.save_html(panel, 'HW8_lda.html')
# data = get_data_from_files('Dog_Hike/')
# lda_model, lda, lda_vec, cv = run_lda(data)
data_fd = get_data_from_files('110/110-f-d/')
data_fr = get_data_from_files('110/110-f-r/')
data = data_fd + data_fr
data
lda_model, lda, lda_vec, cv, corpus = run_lda(data)
Topic 0:
[('the', 139166.20627866255), ('of', 85492.14362503793), ('to', 81466.41040767683), ('and', 78594.90040183658), ('in', 47973.701838936315), ('that', 43342.88607828087), ('for', 29249.2655364567), ('this', 28859.59783095147), ('is', 28270.074522886112), ('we', 27013.196067882127)]
Topic 1:
[('the', 1677.5842128197867), ('to', 846.5616279968173), ('and', 807.3821817461204), ('of', 773.1045091499119), ('in', 619.7655449974652), ('that', 488.29524584042053), ('we', 310.587681504484), ('for', 309.47625594198684), ('this', 294.0477824295658), ('it', 259.4750313814392)]
# corpus
# c2 = corpus.append(df.sum().rename('Total'))
ct = corpus.T
ct['total'] = ct.sum(axis=1)
big_total = ct[ct['total'] > 68]
len(big_total)
3516
len(ct)
33688
btt = big_total.T
additional_stopwords = btt.columns
from sklearn.feature_extraction import text
stop_words = text.ENGLISH_STOP_WORDS.union(additional_stopwords)
stop_words
frozenset({'interesting',
'about',
'blind',
'100',
'supporters',
'enrolled',
'reauthorize',
'suspend',
'players',
'sound',
'force',
'presented',
'trust',
'leaving',
'disappointed',
'isn',
'known',
'bankruptcy',
'murder',
'redeploy',
'national',
'gentlewoman',
'non',
'activity',
'hereupon',
'earlier',
'organ',
'simply',
'teams',
'development',
'oppose',
'goal',
'lessons',
'believes',
'note',
'head',
'carson',
'flag',
'increased',
'substitute',
'joint',
'prevented',
'legal',
'properly',
'gratitude',
'injustice',
'false',
'covered',
'guard',
'basketball',
'why',
'caucus',
'lost',
'confront',
'factors',
'completely',
'valley',
'opponents',
'academy',
'voting',
'commemorating',
'housing',
'level',
'clearly',
'printed',
'tonight',
'map',
'victim',
'ignore',
'begin',
'gone',
'homeowners',
'flow',
'liberty',
'barbara',
'cbc',
'transition',
'hard',
'understanding',
'connection',
'poverty',
'debating',
'disability',
'would',
'members',
'attack',
'sanchez',
'general',
'authorizing',
'package',
'glenn',
'houston',
'pick',
'and',
'governments',
'attorney',
'commander',
'investments',
'hawaiians',
'skelton',
'hardworking',
'hasn',
'fires',
'survive',
'korean',
'tuesday',
'occurred',
'flood',
'regulations',
'hometown',
'statement',
'talking',
'longer',
'gift',
'when',
'gathering',
'nature',
'straight',
'drilling',
'securities',
'progressive',
'happening',
'trillion',
'designation',
'won',
'honors',
'crime',
'relevant',
'shown',
'comment',
'mary',
'elsewhere',
'library',
'sell',
'con',
'person',
'90',
'park',
'reform',
'000',
'debbie',
'bombing',
'shelf',
'vision',
'resources',
'109th',
'mention',
'lands',
'nato',
'taught',
'success',
'rhetoric',
'petraeus',
'colombia',
'myself',
'millions',
'demand',
'plants',
'inside',
'friendly',
'unique',
'representing',
'israeli',
'convention',
'workplace',
'morning',
'hirono',
'recognition',
'implemented',
'arab',
'hereby',
'st',
'missing',
'explanation',
'combined',
'transportation',
'corruption',
'sources',
'area',
'beginning',
'not',
'secretary',
'exploration',
'whole',
'version',
'prescription',
'introduced',
'authorizes',
'addressed',
'listening',
'have',
'cross',
'carrying',
'touch',
'property',
'want',
'engineers',
'true',
'training',
'wisdom',
'solve',
'sons',
'under',
'moreover',
'developing',
'miles',
'fight',
'favor',
'handle',
'kinds',
'must',
'nobody',
'men',
'projected',
'harman',
'residents',
'sectors',
'centers',
'childhood',
'sectarian',
'wall',
'shut',
'statistics',
'study',
'hiv',
'seek',
'common',
'sitting',
'creating',
'detail',
'asia',
'framework',
'rates',
'religion',
'palestinian',
'measure',
'push',
'russia',
'father',
'estate',
'mark',
'author',
'horrific',
'medicine',
'roosevelt',
'town',
'agency',
'based',
'bachus',
'word',
'dhs',
'2007',
'interests',
'healthcare',
'mandate',
'around',
'ownership',
'operation',
'physical',
'budget',
'offer',
'draw',
'real',
'causes',
'step',
'lot',
'though',
'cover',
'effort',
'basis',
'employees',
'collective',
'structure',
'beautiful',
'meetings',
'election',
'means',
'foreclosure',
'republican',
'fitting',
'buying',
'demonstrated',
'speaks',
'that',
'appreciate',
'high',
'january',
'none',
'18',
'read',
'gives',
'accomplish',
'screening',
'pursuit',
'grounds',
'reverse',
'sharing',
'complex',
'disabilities',
'hundreds',
'add',
'classroom',
'remarks',
'peru',
'her',
'additional',
'is',
'prison',
'watching',
'levels',
'whence',
'monday',
'va',
'200',
'violations',
'sense',
'cities',
'fifth',
'expanding',
'households',
'overseas',
'thereupon',
'pelosi',
'keeping',
'28',
'representation',
'star',
'flexibility',
'65',
'text',
'mess',
'invest',
'ways',
'disadvantaged',
'refused',
'ally',
'services',
'performed',
'future',
'reflects',
'track',
'reducing',
'relating',
'question',
'active',
'memory',
'plans',
'closing',
'road',
'prior',
'defend',
'range',
'traumatic',
'veteran',
'honored',
'mexico',
'clarke',
'disparities',
'severely',
'physicians',
'resolution',
'group',
'officer',
'minority',
'closer',
'computer',
'rule',
'marines',
'playing',
'stem',
'affordability',
'personnel',
'elderly',
'contained',
'interested',
'code',
'million',
'tireless',
'50',
'families',
'retired',
'oversight',
'culture',
'fair',
'missions',
'responding',
'teach',
'eye',
'supplies',
'fallen',
'agreements',
'critical',
'multiple',
'including',
'contribution',
'blank',
'sex',
'reduces',
'troubled',
'maybe',
'document',
'middle',
'borders',
'down',
'suffered',
'up',
'questions',
'whereafter',
'policies',
'condolences',
'living',
'negotiation',
'degrees',
'launched',
'neighbors',
'once',
'polls',
'parties',
'representatives',
'growing',
'places',
'heritage',
'invested',
'mac',
'68',
'marked',
'arts',
'greece',
'a',
'did',
'dropped',
'ourselves',
'revenues',
'powerful',
'reported',
'etc',
'or',
'publicly',
'sutton',
'protect',
'sea',
'expansion',
'often',
'home',
'vehicle',
'technology',
'commerce',
'hall',
'attention',
'be',
'mandates',
'anywhere',
'rita',
'working',
'requirements',
'saddam',
'telling',
'on',
'doors',
'numbers',
'richardson',
'human',
'organization',
'loan',
'double',
'ideas',
'arizona',
'drop',
'farmers',
'fourth',
'attacks',
'promotes',
'darfur',
'vital',
'timely',
'adult',
'urban',
'bailout',
'120',
'wrote',
'peterson',
'i',
'federal',
'vouchers',
'industry',
'control',
'pull',
'started',
'diabetes',
'democracy',
'sad',
'smart',
'rise',
'suffer',
'employment',
'parks',
'tomorrow',
'wherever',
'santa',
'extended',
'coal',
'war',
'specific',
'firms',
'immediately',
'urgent',
'compassion',
'taxpayer',
'school',
'ensure',
'missouri',
'airport',
'identity',
'standing',
'achieved',
'thomas',
'stock',
'kids',
'sacramento',
'reached',
'extra',
'good',
'according',
'forced',
'welcome',
'tour',
'jones',
'accountable',
'raise',
'tell',
'will',
'combat',
'counties',
'communication',
'show',
'schools',
'share',
'acts',
'scholarship',
'sustained',
'toward',
'woolsey',
'too',
'shift',
'totally',
'arrested',
'lakes',
'traffic',
'neglect',
'prohibited',
'courage',
'cry',
'caught',
'fraud',
'identify',
'trying',
'10',
'events',
'testimony',
'half',
'seeming',
'respect',
'kind',
'focusing',
'imperative',
'billion',
'targeting',
'promised',
'delighted',
'casualties',
'christmas',
'starting',
'voucher',
'bipartisan',
'literacy',
'basic',
'rejected',
'extension',
'deeply',
'destroy',
'party',
'porter',
'likely',
'mom',
'1995',
'your',
'deny',
'expect',
'native',
'owners',
'protection',
'repeal',
'seriously',
'potential',
'old',
'fact',
'programs',
'veterans',
'reason',
'21',
'effects',
'criminal',
'past',
'primary',
'strongly',
'frank',
'assistant',
'science',
'wind',
'creation',
'subprime',
'businesses',
'except',
'latter',
'conducting',
'consensus',
'wounded',
'color',
'farm',
'case',
'eligibility',
'products',
'barrel',
'enable',
'foxx',
'light',
'tried',
'accomplishments',
'able',
'11',
'early',
'mistakes',
'social',
'produced',
'conditions',
'highest',
'successful',
'doc',
'loophole',
'huge',
'gasoline',
'operate',
'priorities',
'restrictions',
'never',
'pennsylvania',
'civil',
'earmark',
'23',
'unemployment',
'commend',
'white',
'related',
'discussing',
'pursue',
'95',
'chicago',
'sanctions',
'far',
'methods',
'ours',
'imports',
'federally',
'ongoing',
'port',
'europe',
'ms',
'nation',
'continental',
'looking',
'faced',
'ford',
'fashion',
'thought',
'database',
'blackburn',
'using',
'married',
'suicide',
'strengthening',
'opposite',
'qualify',
'managing',
'continues',
'of',
'additionally',
'missile',
'noted',
'dollar',
'rental',
'called',
'forever',
'begun',
'improves',
'ie',
'japan',
'80',
'outreach',
'disastrous',
'supports',
'hero',
'contracts',
'27',
'baby',
'introduce',
'target',
'authorization',
'process',
'streets',
'helps',
'game',
'renewable',
'asset',
'backlog',
'lending',
'drug',
'laws',
'years',
'education',
'graduates',
'kansas',
'much',
'advocate',
'problems',
'leadership',
'behavior',
'agriculture',
'condition',
'medicare',
'worthy',
'sister',
'expressed',
'genetic',
'earn',
'consistent',
'component',
'contrast',
'private',
'24',
'brown',
'warrant',
'essential',
'witnessed',
'taxes',
'tibet',
'compromise',
'conduct',
'couple',
'hemisphere',
'll',
'serving',
'choice',
'bureaucracy',
'nowhere',
'types',
'interest',
'considered',
'america',
'wherein',
'incredible',
'hearing',
'nearly',
'01',
'help',
'fully',
'unable',
'april',
'properties',
'basically',
'commitment',
'student',
'telecommunications',
'150',
'provisions',
'actions',
'affect',
'pointed',
'received',
'recall',
'memorial',
'competitiveness',
'proposed',
'engaging',
'mill',
'180',
'consent',
'over',
'ultimate',
'neighborhoods',
'view',
'point',
'judicial',
'advice',
'enforcement',
'just',
'deaths',
'located',
'electronic',
'schwartz',
'schakowsky',
'monitor',
'listened',
'public',
'commonsense',
'television',
'lose',
'chemical',
'actual',
'out',
'saw',
'definition',
'facts',
'trucks',
'allow',
'compared',
'natural',
'negotiations',
'entirely',
'extent',
'driving',
'sacrifice',
'bernice',
'bridge',
'held',
'fired',
'payments',
'sure',
'cells',
'less',
'final',
'lebanon',
'violence',
'domestic',
'angeles',
'expense',
'biofuels',
'enacted',
'math',
'does',
'edge',
'expected',
'assets',
'consider',
'continuing',
'call',
'ptsd',
'camps',
'mccarthy',
'exercise',
'universal',
'design',
'create',
'john',
'occurring',
'regarding',
'power',
'guarantee',
'briefly',
'one',
'imagine',
'country',
'surge',
'katrina',
'whose',
'firm',
'returned',
'solar',
'proven',
'establishing',
'outer',
'particularly',
'holds',
'core',
'diplomatic',
'insured',
'hopefully',
'visit',
'die',
'barrels',
'taken',
'record',
'ignored',
'business',
'church',
'lehtinen',
'join',
'fewer',
'appointed',
'offices',
'recognizing',
'requires',
'poorest',
'senators',
'broad',
'escalation',
'credit',
'thru',
'changed',
'occupation',
'waters',
'each',
'nor',
'celebration',
'included',
'ceo',
'sixty',
'savings',
'brothers',
'contrary',
'detection',
'calls',
'saying',
'vehicles',
'decision',
'month',
'together',
'wild',
'ran',
'letter',
'previously',
'caused',
'effect',
'african',
'senate',
'personally',
'korea',
'union',
'outstanding',
'divided',
'followed',
'apply',
'welfare',
'convicted',
'cards',
'delays',
'mail',
'cost',
'engage',
'targeted',
'charges',
'biggest',
'tragic',
'studies',
'shea',
'driven',
'spend',
'americorps',
'invasion',
'gave',
'lobbying',
'grant',
'leaves',
'2004',
'contractors',
'employer',
'nationwide',
'castor',
'letters',
'plant',
'five',
'2008',
'barriers',
'nuclear',
'site',
'trial',
'elementary',
'cosponsors',
'everyday',
'saving',
'discipline',
'internet',
'celebrate',
'agreed',
'rest',
'legislature',
'worldwide',
'listen',
'struggle',
'rely',
'make',
'pump',
'twice',
'between',
'decades',
'certainly',
'abortion',
'transparency',
'citizens',
'deep',
'vi',
'pipeline',
'sale',
'above',
'countless',
'treasury',
'wage',
'cases',
'torture',
'whereas',
'regulators',
...})
lda_model, lda, lda_vec, cv, corpus = run_lda(data, 40, stop_words)
Topic 0:
[('duncan', 0.8125087034627877), ('horses', 0.636563654410237), ('olympic', 0.5533880579711062), ('staggering', 0.5527900974402409), ('sendler', 0.5516915414390462), ('roybal', 0.5495142256341471), ('stewardship', 0.539990481442148), ('truck', 0.5301725845028952), ('toys', 0.5263862471243266), ('holiness', 0.5200709595461378)]
Topic 1:
[('contraceptives', 1.1322917713596823), ('insert', 0.819779049499601), ('speculation', 0.7600676808738385), ('cuba', 0.7478983049740057), ('commodity', 0.7454970757293969), ('folio', 0.7212123338954801), ('unintended', 0.6916996599637789), ('egypt', 0.6769680911958883), ('crops', 0.6740616336872267), ('usaid', 0.6631324683368433)]
Topic 2:
[('latinos', 28.57168440672087), ('chavez', 12.259258607480422), ('ferrera', 9.087648101652317), ('cesar', 7.962559706105602), ('estrada', 7.455998703496498), ('ciudad', 7.1079346630714895), ('latinas', 6.3997410186732955), ('islander', 5.8864296194686485), ('dtv', 5.716305162191971), ('promotoras', 5.665231244414415)]
Topic 3:
[('mentoring', 18.259233803905108), ('diploma', 12.119931039716786), ('mills', 8.753096448564516), ('sandberg', 8.049337569760016), ('minneapolis', 5.941193962866121), ('ceecee', 5.642099582365889), ('cole', 4.657985207482534), ('prenatally', 4.6510876600678035), ('lyles', 4.034983475130155), ('mentor', 3.943596108889469)]
Topic 4:
[('myrick', 35.580922733254134), ('kristen', 5.9911197739881255), ('meineke', 4.869688196019243), ('ric', 4.83460324639221), ('flair', 4.825883290906731), ('pap', 4.638299324177404), ('charlotte', 4.539149178510334), ('dwi', 4.098580142448335), ('nay', 3.4015693705911643), ('cervical', 3.3282733911635085)]
Topic 5:
[('granger', 34.40785516627489), ('talkers', 11.580917506225033), ('chibitty', 6.331839594505975), ('mosteiro', 5.543053227052157), ('spencer', 5.0243453482474365), ('clarence', 4.925198805617137), ('tahmahkera', 4.019393594801073), ('comanche', 3.9835079698447), ('paulie', 2.4474625185474337), ('allan', 2.424458177320304)]
Topic 6:
[('cloning', 31.75892563335224), ('reproductive', 19.641433515491627), ('embryos', 7.4912217773644745), ('cloned', 4.575742320112252), ('civics', 4.199812504314061), ('mouse', 3.743105933702287), ('transplant', 3.6004350756332406), ('denver', 3.248981824635166), ('scnt', 3.1818250067924225), ('bans', 3.114101345365679)]
Topic 7:
[('meth', 22.684423700186834), ('dandy', 9.335012769984825), ('walker', 7.583955720790898), ('syndrome', 7.0957952389580115), ('katelyn', 7.0688930788352655), ('kohlberg', 7.050536253729905), ('prostate', 6.6429693494567585), ('jerome', 6.349921183117512), ('beavers', 6.264877551032239), ('beaver', 5.382784554770402)]
Topic 8:
[('emerson', 25.149006261270976), ('hansbrough', 4.808680212904641), ('sr', 4.38369215982008), ('hudson', 3.728690505759795), ('limbaugh', 3.62532143192573), ('tyler', 3.115717008151453), ('dissidents', 3.04996962842637), ('berry', 2.6850076258304663), ('girardeau', 2.471265356305475), ('clay', 2.452296955095839)]
Topic 9:
[('boating', 0.4269410770634471), ('contraceptives', 0.41960094062098663), ('servicemen', 0.41424908832966667), ('granger', 0.412722484643257), ('contact', 0.40247626899129196), ('egypt', 0.39226077323336433), ('emerson', 0.3827482973220052), ('murtha', 0.37703459666617034), ('jacksonville', 0.3668034787187638), ('firmly', 0.36436227810056415)]
Topic 10:
[('cloning', 0.9507235253827079), ('pensions', 0.7418722928287097), ('glider', 0.679878020668157), ('reproductive', 0.6711257184467102), ('truck', 0.6609524854245595), ('roybal', 0.6486906156757777), ('allard', 0.634440704597814), ('highways', 0.616134609956907), ('leavenworth', 0.5334253524298543), ('barracks', 0.5218447985137469)]
Topic 11:
[('wire', 34.9113046924464), ('probable', 18.747853235005373), ('scaap', 14.749274631128078), ('verify', 13.734101933366876), ('foreigner', 12.599656304445869), ('astronomy', 11.99447668205341), ('heathrow', 10.343103676932564), ('haul', 10.176220995370569), ('phones', 9.493353094544316), ('stockpile', 9.340211511286105)]
Topic 12:
[('latinos', 0.713661095497453), ('chavez', 0.48231501854879105), ('betty', 0.463533102960083), ('citizenship', 0.4517123625393424), ('juarez', 0.4044613284122105), ('permanently', 0.4007216395100808), ('toys', 0.38637148979026487), ('blessed', 0.378079600069133), ('latin', 0.3747279144869569), ('collar', 0.3727541065986048)]
Topic 13:
[('hawai', 33.22607239139987), ('kalaupapa', 19.08044249744785), ('filipino', 17.030719074824688), ('indigenous', 13.497323132112811), ('natives', 10.522756228378356), ('aloha', 9.839148731795722), ('motorcycle', 9.19808291082505), ('uh', 8.742973639888959), ('mahalo', 8.022796848697975), ('ix', 7.92816331430612)]
Topic 14:
[('paralysis', 12.509271067616044), ('reclamation', 12.368387376424483), ('paired', 12.226427578654578), ('cobra', 12.096511596680713), ('blindness', 9.835719287877977), ('blinded', 9.600726713778014), ('trains', 9.191659292828389), ('basin', 8.753802519393775), ('postpartum', 8.578995760870814), ('cervical', 8.477258038662994)]
Topic 15:
[('tsongas', 0.7017416387931157), ('militia', 0.41131485365421283), ('barrett', 0.3948687744832978), ('cite', 0.38791053512422924), ('risen', 0.3833669966299425), ('exploited', 0.3811543809549447), ('location', 0.38091374495975644), ('challenging', 0.37981116035255214), ('50th', 0.37861120397861725), ('professor', 0.3745834478792065)]
Topic 16:
[('russian', 0.482279439990327), ('filipino', 0.45942425225106254), ('sorority', 0.4385422398311869), ('johnny', 0.43188497111243596), ('cherokee', 0.41593317197606416), ('islands', 0.41571737663823544), ('mercury', 0.4134344976392453), ('embrace', 0.4108428191378143), ('11th', 0.4084278523786756), ('sends', 0.4069953547983149)]
Topic 17:
[('ali', 51.22375279533656), ('scale', 50.98692694163963), ('certification', 50.98241393489512), ('catholic', 50.89954459392632), ('negotiated', 50.817198651933644), ('engagement', 50.8029979071559), ('utilize', 50.64993363442352), ('pending', 50.64004984116555), ('conservative', 50.611953433557886), ('ed', 50.57036491423741)]
Topic 18:
[('yucca', 38.20012144567988), ('vegas', 31.753144658250513), ('las', 31.21297568337285), ('hamas', 19.22239660101952), ('justin', 14.599410804831185), ('taiwan', 14.475186602846442), ('gaza', 12.022792655153635), ('israelis', 12.007039834681802), ('shipments', 8.472651650536392), ('sderot', 8.46188338362181)]
Topic 19:
[('allard', 0.39299111477014076), ('sat', 0.36386356589910607), ('sacrificed', 0.362596708826715), ('permits', 0.3554702803213507), ('recess', 0.354419152565692), ('regulate', 0.3505653461717683), ('kalaupapa', 0.34814531736531035), ('hawai', 0.3469303728030136), ('successes', 0.3453478342228781), ('conservative', 0.34459186213729287)]
Topic 20:
[('cincinnati', 0.3959669860758346), ('reverend', 0.393620277738077), ('miami', 0.39122204377442527), ('doubled', 0.38634006659317793), ('brooklyn', 0.36028468170637207), ('pending', 0.35609057194589905), ('certification', 0.35600249099080133), ('strengthened', 0.35458207977632566), ('sudanese', 0.35428412376439317), ('pornography', 0.35115612079607034)]
Topic 21:
[('hawai', 0.5473599898575761), ('filipino', 0.4325739505972028), ('naval', 0.4226724076702524), ('scholarships', 0.37297380635056127), ('pryce', 0.37067172622659855), ('hyde', 0.36384664785139964), ('38', 0.3582530473096638), ('wednesday', 0.3551401625851505), ('uh', 0.35381640344371906), ('sudanese', 0.3527686698370079)]
Topic 22:
[('toledo', 0.6956434564900542), ('werth', 0.6021013935038799), ('gillibrand', 0.5474907787140475), ('undemocratic', 0.527281552232615), ('dressel', 0.5192023884757508), ('detective', 0.5188692973688263), ('latta', 0.518639716409579), ('citigroup', 0.4944473080166176), ('joy', 0.47832826173483695), ('ronald', 0.46636087046646346)]
Topic 23:
[('appalachian', 0.37918934247125724), ('prosecutor', 0.37756301845573725), ('trees', 0.377239144994428), ('refineries', 0.3748090099631378), ('connected', 0.37457157976960415), ('strikes', 0.370114035319386), ('latta', 0.36757450141530756), ('diagnosis', 0.36639655273466953), ('bowl', 0.3663339736591978), ('serv', 0.36305528701717527)]
Topic 24:
[('homeowner', 0.6470926388176161), ('waite', 0.5525119800750995), ('openness', 0.4442809829706434), ('surely', 0.4180211007475265), ('newly', 0.40197995059166614), ('controlling', 0.38828073290536547), ('pen', 0.3871347825966023), ('1990', 0.3850761763885205), ('leased', 0.3804677919168707), ('route', 0.37893726090169927)]
Topic 25:
[('granger', 0.49494784295758415), ('emerson', 0.4086440302078075), ('controls', 0.3793131310080471), ('attempted', 0.3488145551302645), ('quo', 0.34815385222116557), ('tie', 0.346653340365238), ('del', 0.3463394491550414), ('complaints', 0.3449642989433149), ('median', 0.34217637775045734), ('figures', 0.3412412415408793)]
Topic 26:
[('blindness', 0.5134360469700382), ('paired', 0.512944598884765), ('cobra', 0.4649087778470602), ('paralysis', 0.4573950566667391), ('postpartum', 0.45670409676179263), ('connected', 0.44889600476889746), ('madison', 0.42918522573561346), ('blinded', 0.41830489253771747), ('frustration', 0.4070137344445208), ('idiopathic', 0.40679535149084145)]
Topic 27:
[('roybal', 24.122270295710766), ('allard', 23.46089211695106), ('newborn', 10.218321809237267), ('underage', 5.798194971811154), ('1381', 4.002805807770457), ('saves', 2.2225759600472483), ('5919', 2.1979523527258684), ('preventable', 2.0579223738899386), ('absenteeism', 1.965069929024387), ('chronic', 1.8962226812250926)]
Topic 28:
[('herseth', 5.566906849265752), ('chaplain', 4.428344207311348), ('gruneich', 4.104396039498751), ('dakota', 3.267978683070249), ('legion', 2.60994563981993), ('stan', 1.7088000912376424), ('reverend', 1.664581683353139), ('sioux', 1.6218019894382145), ('reprioritizes', 0.9907263704035106), ('dakotas', 0.9745865765678978)]
Topic 29:
[('jamestown', 3.0954395196563502), ('chickahominy', 1.575585623507163), ('thomasina', 1.5146923543329511), ('colonists', 1.204535730491889), ('indians', 1.1366324286896878), ('400th', 1.0989952660745592), ('1607', 1.0559114322792902), ('nansemond', 0.9208764026235462), ('mattaponi', 0.8964088160834407), ('1294', 0.8685308996457298)]
Topic 30:
[('gillibrand', 35.52665478090988), ('upstate', 14.805786155980986), ('placid', 14.039997098518285), ('olympic', 10.961667295079398), ('hunting', 8.649737752528196), ('fishing', 6.6561142451729225), ('suzanne', 6.136443960561101), ('albany', 5.2880466622052875), ('suny', 4.0950282420299375), ('1932', 3.4643889360149305)]
Topic 31:
[('anaheim', 13.844684732848535), ('pavarotti', 12.215743677475714), ('vietnamese', 11.87219601064632), ('ana', 9.91607982179824), ('replenishment', 8.444737999403593), ('groundwater', 8.312759073095396), ('molina', 6.838120476319835), ('rowland', 6.812944127866036), ('muir', 6.419170145358405), ('bulletproof', 5.831191305954826)]
Topic 32:
[('boating', 0.7204390738323612), ('indians', 0.6117330706640481), ('cafe', 0.5809547916639097), ('lapeer', 0.543323625185996), ('turrill', 0.5342226168369453), ('nics', 0.5267741824337832), ('blessed', 0.5229595628690648), ('indigenous', 0.5127594391090068), ('hawai', 0.5126217562161469), ('mills', 0.49708252452988716)]
Topic 33:
[('legion', 0.4143770006246741), ('breakfast', 0.4116520793820419), ('milwaukee', 0.4043135139189563), ('hopes', 0.39093150785410585), ('enduring', 0.37416167262110384), ('reverend', 0.3704199149964053), ('armor', 0.3678144565745031), ('herseth', 0.3640655968433751), ('sovereign', 0.3630615651316789), ('wounds', 0.3626699257717096)]
Topic 34:
[('gators', 0.6443691180849711), ('freight', 0.5819092613713144), ('mica', 0.5549189102410074), ('poison', 0.5144500400509874), ('railroads', 0.509590158869525), ('downtown', 0.5066587105819484), ('cruise', 0.49840456177468234), ('mem', 0.468617052063636), ('jacksonville', 0.4681751171626424), ('diego', 0.46475146154890506)]
Topic 35:
[('nics', 44.10429523056637), ('glider', 21.776823715651965), ('bullying', 18.04909072313932), ('dba', 17.126232888217615), ('bowling', 14.806882770581378), ('volunteerism', 12.764673347153524), ('invigorating', 11.491203804500756), ('imaging', 10.5970978553936), ('2640', 8.89370136235541), ('wichita', 8.790737871879099)]
Topic 36:
[('tsongas', 18.11955577908706), ('barrett', 7.964388531147645), ('jimenez', 5.685900996619862), ('lowell', 5.337609411979292), ('alex', 4.748788764611502), ('militia', 3.743430179506232), ('tunnel', 2.517541363338138), ('whereabouts', 2.4340731727598217), ('concord', 2.2208451389836945), ('boundary', 2.0893137615816992)]
Topic 37:
[('myrick', 0.9304278594679118), ('akron', 0.5534511554493476), ('josh', 0.5343165139749546), ('hawai', 0.5340117150273894), ('trading', 0.48361297659996033), ('popcorn', 0.47534661720404353), ('basin', 0.45341670656839106), ('193', 0.44050947013538005), ('filipino', 0.42322927741528604), ('13th', 0.4229800997244627)]
Topic 38:
[('wyoming', 12.86132760457899), ('meth', 12.66368078806605), ('trona', 8.827824303104054), ('mcgee', 7.079098313404798), ('cubin', 6.30046602948609), ('gale', 5.642282167337626), ('6901', 3.411895823554226), ('laramie', 2.551652826636237), ('wills', 2.496086051613352), ('335', 2.3105761442080412)]
Topic 39:
[('pryce', 0.6104805380787394), ('glider', 0.5750694278227312), ('nics', 0.5699499329382702), ('volunteerism', 0.4663633400402911), ('criminals', 0.41893842748203564), ('london', 0.3852186270469489), ('tria', 0.38178270764466227), ('rep', 0.3731807027438391), ('exploited', 0.37115651506089553), ('exposure', 0.36769625827886127)]
start_vis(lda, lda_vec, cv)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/pyLDAvis/_prepare.py:257: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'. return pd.concat([default_term_info] + list(topic_dfs))