## =======================================================
## IMPORTING
## =======================================================
import os
def get_data_from_files(path):
directory = os.listdir(path)
results = []
for file in directory:
f=open(path+file, encoding = "ISO-8859-1")
results.append(f.read())
f.close()
return results
## =======================================================
## MODELING
## =======================================================
import pandas as pd
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
def run_lda(data, num_topics, stop_words):
cv = CountVectorizer(stop_words = stop_words)
lda_vec = cv.fit_transform(data)
lda_columns = cv.get_feature_names()
corpus = pd.DataFrame(lda_vec.toarray(), columns = lda_columns)
lda = LatentDirichletAllocation(n_components=num_topics, max_iter=10,
learning_method='online')
lda_model = lda.fit_transform(lda_vec)
print_topics(lda, cv)
return lda_model, lda, lda_vec, cv, corpus
## =======================================================
## HELPERS
## =======================================================
import numpy as np
np.random.seed(210)
def print_topics(model, vectorizer, top_n=10):
for idx, topic in enumerate(model.components_):
print("Topic %d:" % (idx))
print([(vectorizer.get_feature_names()[i], topic[i])
for i in topic.argsort()[:-top_n - 1:-1]])
## =======================================================
## VISUALIZING
## =======================================================
import pyLDAvis.sklearn as LDAvis
import pyLDAvis
def start_vis(lda, lda_vec, cv):
panel = LDAvis.prepare(lda, lda_vec, cv, mds='tsne')
pyLDAvis.show(panel)
# pyLDAvis.save_html(panel, 'HW8_lda.html')
# data = get_data_from_files('Dog_Hike/')
# lda_model, lda, lda_vec, cv = run_lda(data)
data_fd = get_data_from_files('110/110-f-d/')
data_fr = get_data_from_files('110/110-f-r/')
data = data_fd + data_fr
data
lda_model, lda, lda_vec, cv, corpus = run_lda(data)
Topic 0: [('the', 139166.20627866255), ('of', 85492.14362503793), ('to', 81466.41040767683), ('and', 78594.90040183658), ('in', 47973.701838936315), ('that', 43342.88607828087), ('for', 29249.2655364567), ('this', 28859.59783095147), ('is', 28270.074522886112), ('we', 27013.196067882127)] Topic 1: [('the', 1677.5842128197867), ('to', 846.5616279968173), ('and', 807.3821817461204), ('of', 773.1045091499119), ('in', 619.7655449974652), ('that', 488.29524584042053), ('we', 310.587681504484), ('for', 309.47625594198684), ('this', 294.0477824295658), ('it', 259.4750313814392)]
# corpus
# c2 = corpus.append(df.sum().rename('Total'))
ct = corpus.T
ct['total'] = ct.sum(axis=1)
big_total = ct[ct['total'] > 68]
len(big_total)
3516
len(ct)
33688
btt = big_total.T
additional_stopwords = btt.columns
from sklearn.feature_extraction import text
stop_words = text.ENGLISH_STOP_WORDS.union(additional_stopwords)
stop_words
frozenset({'interesting', 'about', 'blind', '100', 'supporters', 'enrolled', 'reauthorize', 'suspend', 'players', 'sound', 'force', 'presented', 'trust', 'leaving', 'disappointed', 'isn', 'known', 'bankruptcy', 'murder', 'redeploy', 'national', 'gentlewoman', 'non', 'activity', 'hereupon', 'earlier', 'organ', 'simply', 'teams', 'development', 'oppose', 'goal', 'lessons', 'believes', 'note', 'head', 'carson', 'flag', 'increased', 'substitute', 'joint', 'prevented', 'legal', 'properly', 'gratitude', 'injustice', 'false', 'covered', 'guard', 'basketball', 'why', 'caucus', 'lost', 'confront', 'factors', 'completely', 'valley', 'opponents', 'academy', 'voting', 'commemorating', 'housing', 'level', 'clearly', 'printed', 'tonight', 'map', 'victim', 'ignore', 'begin', 'gone', 'homeowners', 'flow', 'liberty', 'barbara', 'cbc', 'transition', 'hard', 'understanding', 'connection', 'poverty', 'debating', 'disability', 'would', 'members', 'attack', 'sanchez', 'general', 'authorizing', 'package', 'glenn', 'houston', 'pick', 'and', 'governments', 'attorney', 'commander', 'investments', 'hawaiians', 'skelton', 'hardworking', 'hasn', 'fires', 'survive', 'korean', 'tuesday', 'occurred', 'flood', 'regulations', 'hometown', 'statement', 'talking', 'longer', 'gift', 'when', 'gathering', 'nature', 'straight', 'drilling', 'securities', 'progressive', 'happening', 'trillion', 'designation', 'won', 'honors', 'crime', 'relevant', 'shown', 'comment', 'mary', 'elsewhere', 'library', 'sell', 'con', 'person', '90', 'park', 'reform', '000', 'debbie', 'bombing', 'shelf', 'vision', 'resources', '109th', 'mention', 'lands', 'nato', 'taught', 'success', 'rhetoric', 'petraeus', 'colombia', 'myself', 'millions', 'demand', 'plants', 'inside', 'friendly', 'unique', 'representing', 'israeli', 'convention', 'workplace', 'morning', 'hirono', 'recognition', 'implemented', 'arab', 'hereby', 'st', 'missing', 'explanation', 'combined', 'transportation', 'corruption', 'sources', 'area', 'beginning', 'not', 'secretary', 'exploration', 'whole', 'version', 'prescription', 'introduced', 'authorizes', 'addressed', 'listening', 'have', 'cross', 'carrying', 'touch', 'property', 'want', 'engineers', 'true', 'training', 'wisdom', 'solve', 'sons', 'under', 'moreover', 'developing', 'miles', 'fight', 'favor', 'handle', 'kinds', 'must', 'nobody', 'men', 'projected', 'harman', 'residents', 'sectors', 'centers', 'childhood', 'sectarian', 'wall', 'shut', 'statistics', 'study', 'hiv', 'seek', 'common', 'sitting', 'creating', 'detail', 'asia', 'framework', 'rates', 'religion', 'palestinian', 'measure', 'push', 'russia', 'father', 'estate', 'mark', 'author', 'horrific', 'medicine', 'roosevelt', 'town', 'agency', 'based', 'bachus', 'word', 'dhs', '2007', 'interests', 'healthcare', 'mandate', 'around', 'ownership', 'operation', 'physical', 'budget', 'offer', 'draw', 'real', 'causes', 'step', 'lot', 'though', 'cover', 'effort', 'basis', 'employees', 'collective', 'structure', 'beautiful', 'meetings', 'election', 'means', 'foreclosure', 'republican', 'fitting', 'buying', 'demonstrated', 'speaks', 'that', 'appreciate', 'high', 'january', 'none', '18', 'read', 'gives', 'accomplish', 'screening', 'pursuit', 'grounds', 'reverse', 'sharing', 'complex', 'disabilities', 'hundreds', 'add', 'classroom', 'remarks', 'peru', 'her', 'additional', 'is', 'prison', 'watching', 'levels', 'whence', 'monday', 'va', '200', 'violations', 'sense', 'cities', 'fifth', 'expanding', 'households', 'overseas', 'thereupon', 'pelosi', 'keeping', '28', 'representation', 'star', 'flexibility', '65', 'text', 'mess', 'invest', 'ways', 'disadvantaged', 'refused', 'ally', 'services', 'performed', 'future', 'reflects', 'track', 'reducing', 'relating', 'question', 'active', 'memory', 'plans', 'closing', 'road', 'prior', 'defend', 'range', 'traumatic', 'veteran', 'honored', 'mexico', 'clarke', 'disparities', 'severely', 'physicians', 'resolution', 'group', 'officer', 'minority', 'closer', 'computer', 'rule', 'marines', 'playing', 'stem', 'affordability', 'personnel', 'elderly', 'contained', 'interested', 'code', 'million', 'tireless', '50', 'families', 'retired', 'oversight', 'culture', 'fair', 'missions', 'responding', 'teach', 'eye', 'supplies', 'fallen', 'agreements', 'critical', 'multiple', 'including', 'contribution', 'blank', 'sex', 'reduces', 'troubled', 'maybe', 'document', 'middle', 'borders', 'down', 'suffered', 'up', 'questions', 'whereafter', 'policies', 'condolences', 'living', 'negotiation', 'degrees', 'launched', 'neighbors', 'once', 'polls', 'parties', 'representatives', 'growing', 'places', 'heritage', 'invested', 'mac', '68', 'marked', 'arts', 'greece', 'a', 'did', 'dropped', 'ourselves', 'revenues', 'powerful', 'reported', 'etc', 'or', 'publicly', 'sutton', 'protect', 'sea', 'expansion', 'often', 'home', 'vehicle', 'technology', 'commerce', 'hall', 'attention', 'be', 'mandates', 'anywhere', 'rita', 'working', 'requirements', 'saddam', 'telling', 'on', 'doors', 'numbers', 'richardson', 'human', 'organization', 'loan', 'double', 'ideas', 'arizona', 'drop', 'farmers', 'fourth', 'attacks', 'promotes', 'darfur', 'vital', 'timely', 'adult', 'urban', 'bailout', '120', 'wrote', 'peterson', 'i', 'federal', 'vouchers', 'industry', 'control', 'pull', 'started', 'diabetes', 'democracy', 'sad', 'smart', 'rise', 'suffer', 'employment', 'parks', 'tomorrow', 'wherever', 'santa', 'extended', 'coal', 'war', 'specific', 'firms', 'immediately', 'urgent', 'compassion', 'taxpayer', 'school', 'ensure', 'missouri', 'airport', 'identity', 'standing', 'achieved', 'thomas', 'stock', 'kids', 'sacramento', 'reached', 'extra', 'good', 'according', 'forced', 'welcome', 'tour', 'jones', 'accountable', 'raise', 'tell', 'will', 'combat', 'counties', 'communication', 'show', 'schools', 'share', 'acts', 'scholarship', 'sustained', 'toward', 'woolsey', 'too', 'shift', 'totally', 'arrested', 'lakes', 'traffic', 'neglect', 'prohibited', 'courage', 'cry', 'caught', 'fraud', 'identify', 'trying', '10', 'events', 'testimony', 'half', 'seeming', 'respect', 'kind', 'focusing', 'imperative', 'billion', 'targeting', 'promised', 'delighted', 'casualties', 'christmas', 'starting', 'voucher', 'bipartisan', 'literacy', 'basic', 'rejected', 'extension', 'deeply', 'destroy', 'party', 'porter', 'likely', 'mom', '1995', 'your', 'deny', 'expect', 'native', 'owners', 'protection', 'repeal', 'seriously', 'potential', 'old', 'fact', 'programs', 'veterans', 'reason', '21', 'effects', 'criminal', 'past', 'primary', 'strongly', 'frank', 'assistant', 'science', 'wind', 'creation', 'subprime', 'businesses', 'except', 'latter', 'conducting', 'consensus', 'wounded', 'color', 'farm', 'case', 'eligibility', 'products', 'barrel', 'enable', 'foxx', 'light', 'tried', 'accomplishments', 'able', '11', 'early', 'mistakes', 'social', 'produced', 'conditions', 'highest', 'successful', 'doc', 'loophole', 'huge', 'gasoline', 'operate', 'priorities', 'restrictions', 'never', 'pennsylvania', 'civil', 'earmark', '23', 'unemployment', 'commend', 'white', 'related', 'discussing', 'pursue', '95', 'chicago', 'sanctions', 'far', 'methods', 'ours', 'imports', 'federally', 'ongoing', 'port', 'europe', 'ms', 'nation', 'continental', 'looking', 'faced', 'ford', 'fashion', 'thought', 'database', 'blackburn', 'using', 'married', 'suicide', 'strengthening', 'opposite', 'qualify', 'managing', 'continues', 'of', 'additionally', 'missile', 'noted', 'dollar', 'rental', 'called', 'forever', 'begun', 'improves', 'ie', 'japan', '80', 'outreach', 'disastrous', 'supports', 'hero', 'contracts', '27', 'baby', 'introduce', 'target', 'authorization', 'process', 'streets', 'helps', 'game', 'renewable', 'asset', 'backlog', 'lending', 'drug', 'laws', 'years', 'education', 'graduates', 'kansas', 'much', 'advocate', 'problems', 'leadership', 'behavior', 'agriculture', 'condition', 'medicare', 'worthy', 'sister', 'expressed', 'genetic', 'earn', 'consistent', 'component', 'contrast', 'private', '24', 'brown', 'warrant', 'essential', 'witnessed', 'taxes', 'tibet', 'compromise', 'conduct', 'couple', 'hemisphere', 'll', 'serving', 'choice', 'bureaucracy', 'nowhere', 'types', 'interest', 'considered', 'america', 'wherein', 'incredible', 'hearing', 'nearly', '01', 'help', 'fully', 'unable', 'april', 'properties', 'basically', 'commitment', 'student', 'telecommunications', '150', 'provisions', 'actions', 'affect', 'pointed', 'received', 'recall', 'memorial', 'competitiveness', 'proposed', 'engaging', 'mill', '180', 'consent', 'over', 'ultimate', 'neighborhoods', 'view', 'point', 'judicial', 'advice', 'enforcement', 'just', 'deaths', 'located', 'electronic', 'schwartz', 'schakowsky', 'monitor', 'listened', 'public', 'commonsense', 'television', 'lose', 'chemical', 'actual', 'out', 'saw', 'definition', 'facts', 'trucks', 'allow', 'compared', 'natural', 'negotiations', 'entirely', 'extent', 'driving', 'sacrifice', 'bernice', 'bridge', 'held', 'fired', 'payments', 'sure', 'cells', 'less', 'final', 'lebanon', 'violence', 'domestic', 'angeles', 'expense', 'biofuels', 'enacted', 'math', 'does', 'edge', 'expected', 'assets', 'consider', 'continuing', 'call', 'ptsd', 'camps', 'mccarthy', 'exercise', 'universal', 'design', 'create', 'john', 'occurring', 'regarding', 'power', 'guarantee', 'briefly', 'one', 'imagine', 'country', 'surge', 'katrina', 'whose', 'firm', 'returned', 'solar', 'proven', 'establishing', 'outer', 'particularly', 'holds', 'core', 'diplomatic', 'insured', 'hopefully', 'visit', 'die', 'barrels', 'taken', 'record', 'ignored', 'business', 'church', 'lehtinen', 'join', 'fewer', 'appointed', 'offices', 'recognizing', 'requires', 'poorest', 'senators', 'broad', 'escalation', 'credit', 'thru', 'changed', 'occupation', 'waters', 'each', 'nor', 'celebration', 'included', 'ceo', 'sixty', 'savings', 'brothers', 'contrary', 'detection', 'calls', 'saying', 'vehicles', 'decision', 'month', 'together', 'wild', 'ran', 'letter', 'previously', 'caused', 'effect', 'african', 'senate', 'personally', 'korea', 'union', 'outstanding', 'divided', 'followed', 'apply', 'welfare', 'convicted', 'cards', 'delays', 'mail', 'cost', 'engage', 'targeted', 'charges', 'biggest', 'tragic', 'studies', 'shea', 'driven', 'spend', 'americorps', 'invasion', 'gave', 'lobbying', 'grant', 'leaves', '2004', 'contractors', 'employer', 'nationwide', 'castor', 'letters', 'plant', 'five', '2008', 'barriers', 'nuclear', 'site', 'trial', 'elementary', 'cosponsors', 'everyday', 'saving', 'discipline', 'internet', 'celebrate', 'agreed', 'rest', 'legislature', 'worldwide', 'listen', 'struggle', 'rely', 'make', 'pump', 'twice', 'between', 'decades', 'certainly', 'abortion', 'transparency', 'citizens', 'deep', 'vi', 'pipeline', 'sale', 'above', 'countless', 'treasury', 'wage', 'cases', 'torture', 'whereas', 'regulators', ...})
lda_model, lda, lda_vec, cv, corpus = run_lda(data, 40, stop_words)
Topic 0: [('duncan', 0.8125087034627877), ('horses', 0.636563654410237), ('olympic', 0.5533880579711062), ('staggering', 0.5527900974402409), ('sendler', 0.5516915414390462), ('roybal', 0.5495142256341471), ('stewardship', 0.539990481442148), ('truck', 0.5301725845028952), ('toys', 0.5263862471243266), ('holiness', 0.5200709595461378)] Topic 1: [('contraceptives', 1.1322917713596823), ('insert', 0.819779049499601), ('speculation', 0.7600676808738385), ('cuba', 0.7478983049740057), ('commodity', 0.7454970757293969), ('folio', 0.7212123338954801), ('unintended', 0.6916996599637789), ('egypt', 0.6769680911958883), ('crops', 0.6740616336872267), ('usaid', 0.6631324683368433)] Topic 2: [('latinos', 28.57168440672087), ('chavez', 12.259258607480422), ('ferrera', 9.087648101652317), ('cesar', 7.962559706105602), ('estrada', 7.455998703496498), ('ciudad', 7.1079346630714895), ('latinas', 6.3997410186732955), ('islander', 5.8864296194686485), ('dtv', 5.716305162191971), ('promotoras', 5.665231244414415)] Topic 3: [('mentoring', 18.259233803905108), ('diploma', 12.119931039716786), ('mills', 8.753096448564516), ('sandberg', 8.049337569760016), ('minneapolis', 5.941193962866121), ('ceecee', 5.642099582365889), ('cole', 4.657985207482534), ('prenatally', 4.6510876600678035), ('lyles', 4.034983475130155), ('mentor', 3.943596108889469)] Topic 4: [('myrick', 35.580922733254134), ('kristen', 5.9911197739881255), ('meineke', 4.869688196019243), ('ric', 4.83460324639221), ('flair', 4.825883290906731), ('pap', 4.638299324177404), ('charlotte', 4.539149178510334), ('dwi', 4.098580142448335), ('nay', 3.4015693705911643), ('cervical', 3.3282733911635085)] Topic 5: [('granger', 34.40785516627489), ('talkers', 11.580917506225033), ('chibitty', 6.331839594505975), ('mosteiro', 5.543053227052157), ('spencer', 5.0243453482474365), ('clarence', 4.925198805617137), ('tahmahkera', 4.019393594801073), ('comanche', 3.9835079698447), ('paulie', 2.4474625185474337), ('allan', 2.424458177320304)] Topic 6: [('cloning', 31.75892563335224), ('reproductive', 19.641433515491627), ('embryos', 7.4912217773644745), ('cloned', 4.575742320112252), ('civics', 4.199812504314061), ('mouse', 3.743105933702287), ('transplant', 3.6004350756332406), ('denver', 3.248981824635166), ('scnt', 3.1818250067924225), ('bans', 3.114101345365679)] Topic 7: [('meth', 22.684423700186834), ('dandy', 9.335012769984825), ('walker', 7.583955720790898), ('syndrome', 7.0957952389580115), ('katelyn', 7.0688930788352655), ('kohlberg', 7.050536253729905), ('prostate', 6.6429693494567585), ('jerome', 6.349921183117512), ('beavers', 6.264877551032239), ('beaver', 5.382784554770402)] Topic 8: [('emerson', 25.149006261270976), ('hansbrough', 4.808680212904641), ('sr', 4.38369215982008), ('hudson', 3.728690505759795), ('limbaugh', 3.62532143192573), ('tyler', 3.115717008151453), ('dissidents', 3.04996962842637), ('berry', 2.6850076258304663), ('girardeau', 2.471265356305475), ('clay', 2.452296955095839)] Topic 9: [('boating', 0.4269410770634471), ('contraceptives', 0.41960094062098663), ('servicemen', 0.41424908832966667), ('granger', 0.412722484643257), ('contact', 0.40247626899129196), ('egypt', 0.39226077323336433), ('emerson', 0.3827482973220052), ('murtha', 0.37703459666617034), ('jacksonville', 0.3668034787187638), ('firmly', 0.36436227810056415)] Topic 10: [('cloning', 0.9507235253827079), ('pensions', 0.7418722928287097), ('glider', 0.679878020668157), ('reproductive', 0.6711257184467102), ('truck', 0.6609524854245595), ('roybal', 0.6486906156757777), ('allard', 0.634440704597814), ('highways', 0.616134609956907), ('leavenworth', 0.5334253524298543), ('barracks', 0.5218447985137469)] Topic 11: [('wire', 34.9113046924464), ('probable', 18.747853235005373), ('scaap', 14.749274631128078), ('verify', 13.734101933366876), ('foreigner', 12.599656304445869), ('astronomy', 11.99447668205341), ('heathrow', 10.343103676932564), ('haul', 10.176220995370569), ('phones', 9.493353094544316), ('stockpile', 9.340211511286105)] Topic 12: [('latinos', 0.713661095497453), ('chavez', 0.48231501854879105), ('betty', 0.463533102960083), ('citizenship', 0.4517123625393424), ('juarez', 0.4044613284122105), ('permanently', 0.4007216395100808), ('toys', 0.38637148979026487), ('blessed', 0.378079600069133), ('latin', 0.3747279144869569), ('collar', 0.3727541065986048)] Topic 13: [('hawai', 33.22607239139987), ('kalaupapa', 19.08044249744785), ('filipino', 17.030719074824688), ('indigenous', 13.497323132112811), ('natives', 10.522756228378356), ('aloha', 9.839148731795722), ('motorcycle', 9.19808291082505), ('uh', 8.742973639888959), ('mahalo', 8.022796848697975), ('ix', 7.92816331430612)] Topic 14: [('paralysis', 12.509271067616044), ('reclamation', 12.368387376424483), ('paired', 12.226427578654578), ('cobra', 12.096511596680713), ('blindness', 9.835719287877977), ('blinded', 9.600726713778014), ('trains', 9.191659292828389), ('basin', 8.753802519393775), ('postpartum', 8.578995760870814), ('cervical', 8.477258038662994)] Topic 15: [('tsongas', 0.7017416387931157), ('militia', 0.41131485365421283), ('barrett', 0.3948687744832978), ('cite', 0.38791053512422924), ('risen', 0.3833669966299425), ('exploited', 0.3811543809549447), ('location', 0.38091374495975644), ('challenging', 0.37981116035255214), ('50th', 0.37861120397861725), ('professor', 0.3745834478792065)] Topic 16: [('russian', 0.482279439990327), ('filipino', 0.45942425225106254), ('sorority', 0.4385422398311869), ('johnny', 0.43188497111243596), ('cherokee', 0.41593317197606416), ('islands', 0.41571737663823544), ('mercury', 0.4134344976392453), ('embrace', 0.4108428191378143), ('11th', 0.4084278523786756), ('sends', 0.4069953547983149)] Topic 17: [('ali', 51.22375279533656), ('scale', 50.98692694163963), ('certification', 50.98241393489512), ('catholic', 50.89954459392632), ('negotiated', 50.817198651933644), ('engagement', 50.8029979071559), ('utilize', 50.64993363442352), ('pending', 50.64004984116555), ('conservative', 50.611953433557886), ('ed', 50.57036491423741)] Topic 18: [('yucca', 38.20012144567988), ('vegas', 31.753144658250513), ('las', 31.21297568337285), ('hamas', 19.22239660101952), ('justin', 14.599410804831185), ('taiwan', 14.475186602846442), ('gaza', 12.022792655153635), ('israelis', 12.007039834681802), ('shipments', 8.472651650536392), ('sderot', 8.46188338362181)] Topic 19: [('allard', 0.39299111477014076), ('sat', 0.36386356589910607), ('sacrificed', 0.362596708826715), ('permits', 0.3554702803213507), ('recess', 0.354419152565692), ('regulate', 0.3505653461717683), ('kalaupapa', 0.34814531736531035), ('hawai', 0.3469303728030136), ('successes', 0.3453478342228781), ('conservative', 0.34459186213729287)] Topic 20: [('cincinnati', 0.3959669860758346), ('reverend', 0.393620277738077), ('miami', 0.39122204377442527), ('doubled', 0.38634006659317793), ('brooklyn', 0.36028468170637207), ('pending', 0.35609057194589905), ('certification', 0.35600249099080133), ('strengthened', 0.35458207977632566), ('sudanese', 0.35428412376439317), ('pornography', 0.35115612079607034)] Topic 21: [('hawai', 0.5473599898575761), ('filipino', 0.4325739505972028), ('naval', 0.4226724076702524), ('scholarships', 0.37297380635056127), ('pryce', 0.37067172622659855), ('hyde', 0.36384664785139964), ('38', 0.3582530473096638), ('wednesday', 0.3551401625851505), ('uh', 0.35381640344371906), ('sudanese', 0.3527686698370079)] Topic 22: [('toledo', 0.6956434564900542), ('werth', 0.6021013935038799), ('gillibrand', 0.5474907787140475), ('undemocratic', 0.527281552232615), ('dressel', 0.5192023884757508), ('detective', 0.5188692973688263), ('latta', 0.518639716409579), ('citigroup', 0.4944473080166176), ('joy', 0.47832826173483695), ('ronald', 0.46636087046646346)] Topic 23: [('appalachian', 0.37918934247125724), ('prosecutor', 0.37756301845573725), ('trees', 0.377239144994428), ('refineries', 0.3748090099631378), ('connected', 0.37457157976960415), ('strikes', 0.370114035319386), ('latta', 0.36757450141530756), ('diagnosis', 0.36639655273466953), ('bowl', 0.3663339736591978), ('serv', 0.36305528701717527)] Topic 24: [('homeowner', 0.6470926388176161), ('waite', 0.5525119800750995), ('openness', 0.4442809829706434), ('surely', 0.4180211007475265), ('newly', 0.40197995059166614), ('controlling', 0.38828073290536547), ('pen', 0.3871347825966023), ('1990', 0.3850761763885205), ('leased', 0.3804677919168707), ('route', 0.37893726090169927)] Topic 25: [('granger', 0.49494784295758415), ('emerson', 0.4086440302078075), ('controls', 0.3793131310080471), ('attempted', 0.3488145551302645), ('quo', 0.34815385222116557), ('tie', 0.346653340365238), ('del', 0.3463394491550414), ('complaints', 0.3449642989433149), ('median', 0.34217637775045734), ('figures', 0.3412412415408793)] Topic 26: [('blindness', 0.5134360469700382), ('paired', 0.512944598884765), ('cobra', 0.4649087778470602), ('paralysis', 0.4573950566667391), ('postpartum', 0.45670409676179263), ('connected', 0.44889600476889746), ('madison', 0.42918522573561346), ('blinded', 0.41830489253771747), ('frustration', 0.4070137344445208), ('idiopathic', 0.40679535149084145)] Topic 27: [('roybal', 24.122270295710766), ('allard', 23.46089211695106), ('newborn', 10.218321809237267), ('underage', 5.798194971811154), ('1381', 4.002805807770457), ('saves', 2.2225759600472483), ('5919', 2.1979523527258684), ('preventable', 2.0579223738899386), ('absenteeism', 1.965069929024387), ('chronic', 1.8962226812250926)] Topic 28: [('herseth', 5.566906849265752), ('chaplain', 4.428344207311348), ('gruneich', 4.104396039498751), ('dakota', 3.267978683070249), ('legion', 2.60994563981993), ('stan', 1.7088000912376424), ('reverend', 1.664581683353139), ('sioux', 1.6218019894382145), ('reprioritizes', 0.9907263704035106), ('dakotas', 0.9745865765678978)] Topic 29: [('jamestown', 3.0954395196563502), ('chickahominy', 1.575585623507163), ('thomasina', 1.5146923543329511), ('colonists', 1.204535730491889), ('indians', 1.1366324286896878), ('400th', 1.0989952660745592), ('1607', 1.0559114322792902), ('nansemond', 0.9208764026235462), ('mattaponi', 0.8964088160834407), ('1294', 0.8685308996457298)] Topic 30: [('gillibrand', 35.52665478090988), ('upstate', 14.805786155980986), ('placid', 14.039997098518285), ('olympic', 10.961667295079398), ('hunting', 8.649737752528196), ('fishing', 6.6561142451729225), ('suzanne', 6.136443960561101), ('albany', 5.2880466622052875), ('suny', 4.0950282420299375), ('1932', 3.4643889360149305)] Topic 31: [('anaheim', 13.844684732848535), ('pavarotti', 12.215743677475714), ('vietnamese', 11.87219601064632), ('ana', 9.91607982179824), ('replenishment', 8.444737999403593), ('groundwater', 8.312759073095396), ('molina', 6.838120476319835), ('rowland', 6.812944127866036), ('muir', 6.419170145358405), ('bulletproof', 5.831191305954826)] Topic 32: [('boating', 0.7204390738323612), ('indians', 0.6117330706640481), ('cafe', 0.5809547916639097), ('lapeer', 0.543323625185996), ('turrill', 0.5342226168369453), ('nics', 0.5267741824337832), ('blessed', 0.5229595628690648), ('indigenous', 0.5127594391090068), ('hawai', 0.5126217562161469), ('mills', 0.49708252452988716)] Topic 33: [('legion', 0.4143770006246741), ('breakfast', 0.4116520793820419), ('milwaukee', 0.4043135139189563), ('hopes', 0.39093150785410585), ('enduring', 0.37416167262110384), ('reverend', 0.3704199149964053), ('armor', 0.3678144565745031), ('herseth', 0.3640655968433751), ('sovereign', 0.3630615651316789), ('wounds', 0.3626699257717096)] Topic 34: [('gators', 0.6443691180849711), ('freight', 0.5819092613713144), ('mica', 0.5549189102410074), ('poison', 0.5144500400509874), ('railroads', 0.509590158869525), ('downtown', 0.5066587105819484), ('cruise', 0.49840456177468234), ('mem', 0.468617052063636), ('jacksonville', 0.4681751171626424), ('diego', 0.46475146154890506)] Topic 35: [('nics', 44.10429523056637), ('glider', 21.776823715651965), ('bullying', 18.04909072313932), ('dba', 17.126232888217615), ('bowling', 14.806882770581378), ('volunteerism', 12.764673347153524), ('invigorating', 11.491203804500756), ('imaging', 10.5970978553936), ('2640', 8.89370136235541), ('wichita', 8.790737871879099)] Topic 36: [('tsongas', 18.11955577908706), ('barrett', 7.964388531147645), ('jimenez', 5.685900996619862), ('lowell', 5.337609411979292), ('alex', 4.748788764611502), ('militia', 3.743430179506232), ('tunnel', 2.517541363338138), ('whereabouts', 2.4340731727598217), ('concord', 2.2208451389836945), ('boundary', 2.0893137615816992)] Topic 37: [('myrick', 0.9304278594679118), ('akron', 0.5534511554493476), ('josh', 0.5343165139749546), ('hawai', 0.5340117150273894), ('trading', 0.48361297659996033), ('popcorn', 0.47534661720404353), ('basin', 0.45341670656839106), ('193', 0.44050947013538005), ('filipino', 0.42322927741528604), ('13th', 0.4229800997244627)] Topic 38: [('wyoming', 12.86132760457899), ('meth', 12.66368078806605), ('trona', 8.827824303104054), ('mcgee', 7.079098313404798), ('cubin', 6.30046602948609), ('gale', 5.642282167337626), ('6901', 3.411895823554226), ('laramie', 2.551652826636237), ('wills', 2.496086051613352), ('335', 2.3105761442080412)] Topic 39: [('pryce', 0.6104805380787394), ('glider', 0.5750694278227312), ('nics', 0.5699499329382702), ('volunteerism', 0.4663633400402911), ('criminals', 0.41893842748203564), ('london', 0.3852186270469489), ('tria', 0.38178270764466227), ('rep', 0.3731807027438391), ('exploited', 0.37115651506089553), ('exposure', 0.36769625827886127)]
start_vis(lda, lda_vec, cv)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/pyLDAvis/_prepare.py:257: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version of pandas will change to not sort by default. To accept the future behavior, pass 'sort=False'. To retain the current behavior and silence the warning, pass 'sort=True'. return pd.concat([default_term_info] + list(topic_dfs))