In [2]:
import pandas as pd
from nlpia.data.loaders import get_data
pd.options.display.width = 120
sms = get_data('sms-spam')
sms
2019-12-01 09:48:50,057 WARNING:nlpia.constants:107:            <module> Starting logger in nlpia.constants...
Out[2]:
spam text
0 0 Go until jurong point, crazy.. Available only ...
1 0 Ok lar... Joking wif u oni...
2 1 Free entry in 2 a wkly comp to win FA Cup fina...
3 0 U dun say so early hor... U c already then say...
4 0 Nah I don't think he goes to usf, he lives aro...
... ... ...
4832 1 This is the 2nd time we have tried 2 contact u...
4833 0 Will ü b going to esplanade fr home?
4834 0 Pity, * was in mood for that. So...any other s...
4835 0 The guy did some bitching but I acted like i'd...
4836 0 Rofl. Its true to its name

4837 rows × 2 columns

In [3]:
# Flagging spam with an exclamation mark
index = ['sms{}{}'.format(i, '!'*j) for (i,j) in zip(range(len(sms)), sms.spam)]
index
Out[3]:
['sms0',
 'sms1',
 'sms2!',
 'sms3',
 'sms4',
 'sms5!',
 'sms6',
 'sms7',
 'sms8!',
 'sms9!',
 'sms10',
 'sms11!',
 'sms12!',
 'sms13',
 'sms14',
 'sms15!',
 'sms16',
 'sms17',
 'sms18',
 'sms19!',
 'sms20',
 'sms21',
 'sms22',
 'sms23',
 'sms24',
 'sms25',
 'sms26',
 'sms27',
 'sms28',
 'sms29',
 'sms30',
 'sms31',
 'sms32',
 'sms33',
 'sms34!',
 'sms35',
 'sms36',
 'sms37',
 'sms38',
 'sms39',
 'sms40',
 'sms41',
 'sms42!',
 'sms43',
 'sms44',
 'sms45',
 'sms46',
 'sms47',
 'sms48',
 'sms49',
 'sms50',
 'sms51',
 'sms52',
 'sms53',
 'sms54!',
 'sms55',
 'sms56!',
 'sms57',
 'sms58',
 'sms59',
 'sms60',
 'sms61',
 'sms62',
 'sms63',
 'sms64',
 'sms65!',
 'sms66',
 'sms67!',
 'sms68!',
 'sms69',
 'sms70',
 'sms71',
 'sms72',
 'sms73',
 'sms74',
 'sms75',
 'sms76',
 'sms77',
 'sms78',
 'sms79',
 'sms80',
 'sms81',
 'sms82',
 'sms83',
 'sms84',
 'sms85',
 'sms86',
 'sms87',
 'sms88',
 'sms89',
 'sms90',
 'sms91',
 'sms92',
 'sms93!',
 'sms94',
 'sms95!',
 'sms96',
 'sms97',
 'sms98',
 'sms99',
 'sms100',
 'sms101',
 'sms102',
 'sms103',
 'sms104',
 'sms105',
 'sms106',
 'sms107',
 'sms108',
 'sms109',
 'sms110',
 'sms111',
 'sms112',
 'sms113',
 'sms114!',
 'sms115',
 'sms116',
 'sms117!',
 'sms118',
 'sms119',
 'sms120!',
 'sms121!',
 'sms122',
 'sms123!',
 'sms124',
 'sms125',
 'sms126',
 'sms127',
 'sms128',
 'sms129',
 'sms130',
 'sms131',
 'sms132',
 'sms133',
 'sms134!',
 'sms135!',
 'sms136',
 'sms137',
 'sms138',
 'sms139!',
 'sms140',
 'sms141',
 'sms142',
 'sms143',
 'sms144',
 'sms145',
 'sms146',
 'sms147!',
 'sms148',
 'sms149',
 'sms150',
 'sms151',
 'sms152',
 'sms153',
 'sms154',
 'sms155',
 'sms156',
 'sms157',
 'sms158',
 'sms159!',
 'sms160!',
 'sms161',
 'sms162',
 'sms163',
 'sms164!',
 'sms165!',
 'sms166',
 'sms167!',
 'sms168',
 'sms169',
 'sms170',
 'sms171',
 'sms172',
 'sms173',
 'sms174',
 'sms175',
 'sms176',
 'sms177',
 'sms178',
 'sms179',
 'sms180',
 'sms181',
 'sms182',
 'sms183',
 'sms184',
 'sms185',
 'sms186',
 'sms187',
 'sms188!',
 'sms189',
 'sms190',
 'sms191!',
 'sms192',
 'sms193',
 'sms194',
 'sms195',
 'sms196',
 'sms197',
 'sms198',
 'sms199',
 'sms200',
 'sms201',
 'sms202',
 'sms203',
 'sms204',
 'sms205',
 'sms206',
 'sms207',
 'sms208',
 'sms209',
 'sms210',
 'sms211',
 'sms212',
 'sms213',
 'sms214',
 'sms215',
 'sms216',
 'sms217',
 'sms218',
 'sms219',
 'sms220',
 'sms221',
 'sms222',
 'sms223',
 'sms224',
 'sms225!',
 'sms226',
 'sms227!',
 'sms228',
 'sms229',
 'sms230',
 'sms231',
 'sms232',
 'sms233',
 'sms234',
 'sms235!',
 'sms236',
 'sms237',
 'sms238',
 'sms239',
 'sms240!',
 'sms241',
 'sms242',
 'sms243',
 'sms244',
 'sms245',
 'sms246',
 'sms247',
 'sms248',
 'sms249',
 'sms250!',
 'sms251',
 'sms252',
 'sms253',
 'sms254',
 'sms255',
 'sms256',
 'sms257',
 'sms258',
 'sms259!',
 'sms260',
 'sms261',
 'sms262',
 'sms263',
 'sms264!',
 'sms265',
 'sms266',
 'sms267',
 'sms268!',
 'sms269',
 'sms270!',
 'sms271',
 'sms272',
 'sms273!',
 'sms274',
 'sms275',
 'sms276',
 'sms277',
 'sms278',
 'sms279',
 'sms280',
 'sms281',
 'sms282',
 'sms283',
 'sms284',
 'sms285',
 'sms286',
 'sms287',
 'sms288',
 'sms289',
 'sms290',
 'sms291',
 'sms292!',
 'sms293',
 'sms294',
 'sms295',
 'sms296',
 'sms297',
 'sms298',
 'sms299',
 'sms300!',
 'sms301',
 'sms302',
 'sms303',
 'sms304',
 'sms305',
 'sms306',
 'sms307',
 'sms308!',
 'sms309',
 'sms310',
 'sms311!',
 'sms312',
 'sms313',
 'sms314',
 'sms315',
 'sms316',
 'sms317',
 'sms318',
 'sms319',
 'sms320',
 'sms321',
 'sms322',
 'sms323',
 'sms324!',
 'sms325',
 'sms326',
 'sms327',
 'sms328',
 'sms329!',
 'sms330',
 'sms331',
 'sms332',
 'sms333',
 'sms334',
 'sms335',
 'sms336',
 'sms337',
 'sms338',
 'sms339',
 'sms340',
 'sms341',
 'sms342!',
 'sms343',
 'sms344',
 'sms345',
 'sms346',
 'sms347',
 'sms348',
 'sms349',
 'sms350',
 'sms351',
 'sms352!',
 'sms353',
 'sms354!',
 'sms355!',
 'sms356',
 'sms357',
 'sms358',
 'sms359',
 'sms360',
 'sms361',
 'sms362!',
 'sms363',
 'sms364!',
 'sms365',
 'sms366!',
 'sms367',
 'sms368!',
 'sms369',
 'sms370',
 'sms371',
 'sms372',
 'sms373',
 'sms374',
 'sms375',
 'sms376',
 'sms377',
 'sms378!',
 'sms379',
 'sms380',
 'sms381',
 'sms382',
 'sms383',
 'sms384',
 'sms385',
 'sms386',
 'sms387',
 'sms388',
 'sms389',
 'sms390',
 'sms391',
 'sms392',
 'sms393',
 'sms394',
 'sms395',
 'sms396',
 'sms397',
 'sms398',
 'sms399',
 'sms400',
 'sms401!',
 'sms402',
 'sms403',
 'sms404',
 'sms405',
 'sms406',
 'sms407',
 'sms408',
 'sms409',
 'sms410',
 'sms411',
 'sms412',
 'sms413!',
 'sms414',
 'sms415',
 'sms416!',
 'sms417',
 'sms418',
 'sms419',
 'sms420!',
 'sms421',
 'sms422',
 'sms423',
 'sms424',
 'sms425',
 'sms426',
 'sms427',
 'sms428!',
 'sms429!',
 'sms430!',
 'sms431',
 'sms432',
 'sms433',
 'sms434',
 'sms435!',
 'sms436',
 'sms437',
 'sms438',
 'sms439',
 'sms440',
 'sms441',
 'sms442',
 'sms443',
 'sms444!',
 'sms445',
 'sms446',
 'sms447',
 'sms448!',
 'sms449',
 'sms450',
 'sms451',
 'sms452',
 'sms453',
 'sms454',
 'sms455',
 'sms456',
 'sms457',
 'sms458',
 'sms459!',
 'sms460',
 'sms461',
 'sms462',
 'sms463!',
 'sms464!',
 'sms465',
 'sms466',
 'sms467',
 'sms468',
 'sms469',
 'sms470',
 'sms471',
 'sms472!',
 'sms473',
 'sms474',
 'sms475',
 'sms476',
 'sms477',
 'sms478!',
 'sms479!',
 'sms480',
 'sms481',
 'sms482',
 'sms483',
 'sms484',
 'sms485',
 'sms486!',
 'sms487',
 'sms488!',
 'sms489',
 'sms490',
 'sms491',
 'sms492!',
 'sms493',
 'sms494!',
 'sms495!',
 'sms496',
 'sms497',
 'sms498',
 'sms499!',
 'sms500',
 'sms501',
 'sms502',
 'sms503',
 'sms504',
 'sms505!',
 'sms506',
 'sms507',
 'sms508',
 'sms509',
 'sms510',
 'sms511!',
 'sms512',
 'sms513',
 'sms514',
 'sms515',
 'sms516',
 'sms517',
 'sms518',
 'sms519',
 'sms520',
 'sms521!',
 'sms522',
 'sms523!',
 'sms524!',
 'sms525!',
 'sms526',
 'sms527',
 'sms528',
 'sms529',
 'sms530',
 'sms531!',
 'sms532',
 'sms533!',
 'sms534!',
 'sms535!',
 'sms536!',
 'sms537',
 'sms538',
 'sms539',
 'sms540',
 'sms541',
 'sms542',
 'sms543',
 'sms544',
 'sms545',
 'sms546',
 'sms547',
 'sms548',
 'sms549',
 'sms550',
 'sms551',
 'sms552',
 'sms553',
 'sms554',
 'sms555!',
 'sms556',
 'sms557',
 'sms558',
 'sms559',
 'sms560',
 'sms561',
 'sms562!',
 'sms563',
 'sms564',
 'sms565!',
 'sms566',
 'sms567',
 'sms568',
 'sms569',
 'sms570',
 'sms571',
 'sms572',
 'sms573',
 'sms574',
 'sms575',
 'sms576',
 'sms577',
 'sms578',
 'sms579',
 'sms580',
 'sms581',
 'sms582',
 'sms583',
 'sms584',
 'sms585!',
 'sms586',
 'sms587',
 'sms588',
 'sms589',
 'sms590!',
 'sms591',
 'sms592',
 'sms593',
 'sms594!',
 'sms595',
 'sms596',
 'sms597',
 'sms598!',
 'sms599',
 'sms600',
 'sms601',
 'sms602!',
 'sms603',
 'sms604',
 'sms605',
 'sms606',
 'sms607',
 'sms608',
 'sms609',
 'sms610!',
 'sms611',
 'sms612',
 'sms613',
 'sms614',
 'sms615',
 'sms616',
 'sms617!',
 'sms618',
 'sms619',
 'sms620',
 'sms621',
 'sms622',
 'sms623',
 'sms624',
 'sms625',
 'sms626',
 'sms627',
 'sms628',
 'sms629',
 'sms630',
 'sms631',
 'sms632',
 'sms633',
 'sms634',
 'sms635',
 'sms636',
 'sms637',
 'sms638!',
 'sms639',
 'sms640',
 'sms641',
 'sms642',
 'sms643',
 'sms644',
 'sms645',
 'sms646',
 'sms647',
 'sms648',
 'sms649',
 'sms650',
 'sms651',
 'sms652',
 'sms653',
 'sms654',
 'sms655',
 'sms656',
 'sms657!',
 'sms658',
 'sms659',
 'sms660',
 'sms661',
 'sms662!',
 'sms663',
 'sms664',
 'sms665',
 'sms666',
 'sms667',
 'sms668',
 'sms669',
 'sms670',
 'sms671',
 'sms672!',
 'sms673',
 'sms674',
 'sms675',
 'sms676',
 'sms677!',
 'sms678',
 'sms679',
 'sms680',
 'sms681',
 'sms682',
 'sms683',
 'sms684',
 'sms685',
 'sms686',
 'sms687',
 'sms688',
 'sms689',
 'sms690',
 'sms691',
 'sms692',
 'sms693',
 'sms694',
 'sms695',
 'sms696',
 'sms697',
 'sms698',
 'sms699',
 'sms700!',
 'sms701',
 'sms702',
 'sms703!',
 'sms704',
 'sms705!',
 'sms706',
 'sms707',
 'sms708',
 'sms709',
 'sms710',
 'sms711',
 'sms712',
 'sms713',
 'sms714',
 'sms715!',
 'sms716',
 'sms717',
 'sms718',
 'sms719!',
 'sms720',
 'sms721',
 'sms722',
 'sms723',
 'sms724!',
 'sms725',
 'sms726',
 'sms727!',
 'sms728!',
 'sms729',
 'sms730',
 'sms731',
 'sms732',
 'sms733',
 'sms734',
 'sms735',
 'sms736',
 'sms737',
 'sms738',
 'sms739',
 'sms740',
 'sms741',
 'sms742',
 'sms743',
 'sms744!',
 'sms745',
 'sms746!',
 'sms747',
 'sms748',
 'sms749',
 'sms750',
 'sms751',
 'sms752!',
 'sms753',
 'sms754',
 'sms755',
 'sms756',
 'sms757',
 'sms758',
 'sms759',
 'sms760!',
 'sms761',
 'sms762',
 'sms763',
 'sms764',
 'sms765',
 'sms766',
 'sms767',
 'sms768',
 'sms769',
 'sms770',
 'sms771',
 'sms772',
 'sms773!',
 'sms774',
 'sms775!',
 'sms776',
 'sms777!',
 'sms778',
 'sms779',
 'sms780',
 'sms781!',
 'sms782',
 'sms783',
 'sms784!',
 'sms785',
 'sms786',
 'sms787',
 'sms788',
 'sms789',
 'sms790',
 'sms791',
 'sms792!',
 'sms793',
 'sms794',
 'sms795',
 'sms796',
 'sms797!',
 'sms798',
 'sms799',
 'sms800',
 'sms801!',
 'sms802',
 'sms803',
 'sms804',
 'sms805',
 'sms806',
 'sms807',
 'sms808',
 'sms809!',
 'sms810',
 'sms811',
 'sms812',
 'sms813',
 'sms814',
 'sms815',
 'sms816',
 'sms817',
 'sms818!',
 'sms819',
 'sms820',
 'sms821',
 'sms822',
 'sms823',
 'sms824',
 'sms825',
 'sms826',
 'sms827!',
 'sms828',
 'sms829',
 'sms830',
 'sms831',
 'sms832',
 'sms833',
 'sms834',
 'sms835',
 'sms836',
 'sms837',
 'sms838',
 'sms839',
 'sms840',
 'sms841',
 'sms842',
 'sms843',
 'sms844',
 'sms845',
 'sms846',
 'sms847',
 'sms848',
 'sms849!',
 'sms850',
 'sms851!',
 'sms852',
 'sms853',
 'sms854',
 'sms855',
 'sms856',
 'sms857',
 'sms858',
 'sms859',
 'sms860!',
 'sms861',
 'sms862!',
 'sms863',
 'sms864',
 'sms865',
 'sms866',
 'sms867',
 'sms868',
 'sms869',
 'sms870',
 'sms871',
 'sms872!',
 'sms873',
 'sms874',
 'sms875',
 'sms876!',
 'sms877',
 'sms878',
 'sms879',
 'sms880!',
 'sms881',
 'sms882!',
 'sms883',
 'sms884!',
 'sms885',
 'sms886',
 'sms887',
 'sms888',
 'sms889',
 'sms890',
 'sms891',
 'sms892',
 'sms893',
 'sms894',
 'sms895',
 'sms896',
 'sms897',
 'sms898',
 'sms899',
 'sms900',
 'sms901',
 'sms902',
 'sms903',
 'sms904',
 'sms905',
 'sms906',
 'sms907!',
 'sms908',
 'sms909',
 'sms910',
 'sms911',
 'sms912',
 'sms913',
 'sms914',
 'sms915',
 'sms916',
 'sms917',
 'sms918!',
 'sms919',
 'sms920',
 'sms921',
 'sms922',
 'sms923',
 'sms924!',
 'sms925',
 'sms926',
 'sms927',
 'sms928',
 'sms929!',
 'sms930',
 'sms931',
 'sms932',
 'sms933',
 'sms934',
 'sms935',
 'sms936',
 'sms937',
 'sms938',
 'sms939',
 'sms940',
 'sms941',
 'sms942',
 'sms943',
 'sms944',
 'sms945',
 'sms946',
 'sms947',
 'sms948',
 'sms949',
 'sms950',
 'sms951',
 'sms952',
 'sms953',
 'sms954',
 'sms955',
 'sms956',
 'sms957',
 'sms958',
 'sms959',
 'sms960',
 'sms961',
 'sms962!',
 'sms963!',
 'sms964',
 'sms965',
 'sms966',
 'sms967',
 'sms968',
 'sms969',
 'sms970',
 'sms971',
 'sms972',
 'sms973!',
 'sms974',
 'sms975',
 'sms976',
 'sms977',
 'sms978',
 'sms979',
 'sms980',
 'sms981',
 'sms982',
 'sms983!',
 'sms984',
 'sms985',
 'sms986',
 'sms987',
 'sms988',
 'sms989',
 'sms990',
 'sms991',
 'sms992',
 'sms993',
 'sms994',
 'sms995',
 'sms996',
 'sms997',
 'sms998',
 'sms999',
 ...]
In [4]:
sms.spam
Out[4]:
0       0
1       0
2       1
3       0
4       0
       ..
4832    1
4833    0
4834    0
4835    0
4836    0
Name: spam, Length: 4837, dtype: int64
In [5]:
sms = pd.DataFrame(sms.values, columns=sms.columns, index=index)
sms['spam'] = sms.spam.astype(int)
len(sms)
sms.spam.sum()
sms.head(6)
Out[5]:
spam text
sms0 0 Go until jurong point, crazy.. Available only ...
sms1 0 Ok lar... Joking wif u oni...
sms2! 1 Free entry in 2 a wkly comp to win FA Cup fina...
sms3 0 U dun say so early hor... U c already then say...
sms4 0 Nah I don't think he goes to usf, he lives aro...
sms5! 1 FreeMsg Hey there darling it's been 3 week's n...
In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize.casual import casual_tokenize
tfidf_model = TfidfVectorizer(tokenizer = casual_tokenize)
tfidf_docs = tfidf_model.fit_transform(raw_documents = sms.text).toarray()
print(tfidf_docs.shape)
sms.spam.sum()
(4837, 9232)
Out[6]:
638
In [7]:
# select only spam/ham
mask = sms.spam.astype(bool).values
mask
Out[7]:
array([False, False,  True, ..., False, False, False])
In [8]:
# calculating each column independently using axis = 0
spam_centroid = tfidf_docs[mask].mean(axis=0)
spam_centroid
Out[8]:
array([0.06377591, 0.0041675 , 0.00056204, ..., 0.        , 0.        ,
       0.        ])
In [9]:
ham_centroid = tfidf_docs[~mask].mean(axis=0)
ham_centroid
Out[9]:
array([1.98493115e-02, 6.09435187e-03, 1.77747817e-04, ...,
       6.31869803e-05, 6.31869803e-05, 6.31869803e-05])
In [10]:
# Get the line between them by subtracting one from the other
spamminess_score = tfidf_docs.dot(spam_centroid - ham_centroid)
ss = spamminess_score.round(2)
ss
Out[10]:
array([-0.01, -0.02,  0.04, ..., -0.01, -0.  ,  0.  ])
In [11]:
from sklearn.preprocessing import MinMaxScaler
sms['lda_score'] = MinMaxScaler().fit_transform(spamminess_score.reshape(-1,1))
sms['lda_predict'] = (sms.lda_score > .5).astype(int)
sms['spam lda_predict lda_score'.split()].round(2).head(6)
Out[11]:
spam lda_predict lda_score
sms0 0 0 0.23
sms1 0 0 0.18
sms2! 1 1 0.72
sms3 0 0 0.18
sms4 0 0 0.29
sms5! 1 1 0.55
In [12]:
from pugnlp.stats import Confusion
Confusion(sms['spam lda_predict'.split()])
/Users/danielcaraway/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pugnlp/stats.py:504: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  self.__setattr__('_hist_labels', self.sum().astype(int))
/Users/danielcaraway/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pugnlp/stats.py:510: UserWarning: Pandas doesn't allow columns to be created via a new attribute name - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute-access
  setattr(self, '_hist_classes', self.T.sum())
Out[12]:
lda_predict 0 1
spam
0 4135 64
1 45 593
In [13]:
(1. - (sms.spam - sms.lda_predict).abs().sum() / len(sms)).round(3)
Out[13]:
0.977
In [14]:
## =======================================================
## IMPORTING
## =======================================================
import os
import pandas as pd
from nlpia.data.loaders import get_data
pd.options.display.width = 120

def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    for file in directory:
        f=open(path+file,  encoding = "ISO-8859-1")
        results.append(f.read())
        f.close()
    return results
In [15]:
sms = get_data('sms-spam')
hw8 = get_data_from_files('110/110-f-d/')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-15-0f1ff1b26e44> in <module>
      1 sms = get_data('sms-spam')
----> 2 hw8 = get_data_from_files('110/110-f-d/')

<ipython-input-14-aca3d0b6665d> in get_data_from_files(path)
      8 
      9 def get_data_from_files(path):
---> 10     directory = os.listdir(path)
     11     results = []
     12     for file in directory:

FileNotFoundError: [Errno 2] No such file or directory: '110/110-f-d/'
In [ ]:
from nlpia.book.examples.ch04_catdog_lsa_3x6x16\
import word_topic_vectors
word_topic_vectors.T.round(1)
In [ ]:
word_topic_vectors
In [ ]:
from nlpia.book.examples.ch04_catdog_lsa_sorted\
import lsa_models, prettify_tdm
bow_svd, tfidf_svd = lsa_models()
prettify_tdm(**bow_svd)
In [ ]:
tdm = bow_svd['tdm']
In [ ]:
tdm
In [16]:
# import numpy as np
# U, s, Vt = np.linalg.svd(tdm)
# import pandas as pd
# pd.DataFrame(U, index=tdm.index).round(2)

import numpy as np
U, s, Vt = np.linalg.svd(tdm)
import pandas as pd
pd.DataFrame(U, index = tdm.index).round(2)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-11810bd0e083> in <module>
      5 
      6 import numpy as np
----> 7 U, s, Vt = np.linalg.svd(tdm)
      8 import pandas as pd
      9 pd.DataFrame(U, index = tdm.index).round(2)

NameError: name 'tdm' is not defined
In [17]:
cd = get_data('cats_and_dogs_sorted')
100%|██████████| 263/263 [00:00<00:00, 44447.66it/s]
In [18]:
cd
Out[18]:
array(['NYC is the Big Apple.', 'NYC is known as the Big Apple.',
       'I love NYC!', 'I wore a hat to the Big Apple party in NYC.',
       'Come to NYC. See the Big Apple!',
       'Manhattan is called the Big Apple.',
       'New York is a big city for a small cat.',
       'The lion, a big cat, is the king of the jungle.',
       'I love my pet cat.', 'I love New York City (NYC).',
       'Your dog chased my cat.', 'Bright lights, big city?',
       "Simba, in Lion King, was inspired by Bambi who wasn't even a cat.",
       'Does your dog have a dog house?',
       'The cat steered clear of the dog house.', 'I love turtles.',
       'Bengi was a small stray dog with a fluffy tan spotted coat.',
       'The woman flew to NYC with her cat.',
       'That dog is a big animal. He must eat a lot.',
       'How big is New York? Is it a big city?',
       'The dog ran through Central Park in NYC.', 'Where is NYC?',
       "The Cat's Meow", 'The dog sat on the floor.',
       'The cat chased a mouse.', 'Dogs and cats love raw meat.',
       'The cat never made eye contact.',
       'The cat chased my laser pointer.', 'I pet the cat.',
       'The cat died.', 'A dog chased the car, barking.',
       'Mom and Mormor both love turtles.',
       'The cat ate the bearded dragon.', 'A cat burglar stole my pets.',
       'I chased your dog.',
       'The lion tamer rode on top of the clown car into the ring carrying a chair.',
       'NYC is a city that never sleeps.',
       'I was in the dog house last night.', 'The cat in the window',
       'He refused to sleep in the dog house.',
       'Dogs love to smell the air rushing by in a car.',
       'Your cat is cute.', 'A cat meowed on the hot tin roof',
       'A dog chased my bike and barked loudly.',
       'The dog ate my orchids.', 'The dog dropped the ball at my feet.',
       'Dogs love to chase cars, trucks, and bikes.',
       'So I went to NYC to be born again.',
       'The man raised the lion from a cub and they still frolic in the jungle.',
       'Wolves, dogs, and puppies love to play chase.',
       'The dog rescued me from a hairy . ', 'He makes a good guard dog.',
       'The dog walked up and sniffed my leg.',
       'The cat hated getting in the car.', "The cat licked it's fur.",
       'The black cat crossed my path.', 'The post man likes our dog.',
       'The dog likes a scratch behind his ear.',
       'He pet the dog on the head.', 'Mom loves to walk through NYC.',
       'Rascal was a tabby cat.',
       'NYC is the only city where you can hardly find a typical American.',
       'My cat has long hair.', 'She was an alley cat.',
       'Can your dog do tricks?', 'Fido chased the cat up the alley.',
       'The cat chased a speedy rat.',
       "Dogs don't have much room to run in a big city.",
       'The woman took her dog on the plane.',
       'Is NYC a city or a way of life?',
       'My dog is a good boy most of the time.',
       'The cat coughed up a hair ball.', 'The cat died at the vet.',
       'The fireman rescued the cat in the tree.',
       "Cat, you ruined mom's dress!",
       'The dog whined until I pet its head.',
       'The dog chased the ball and caught it.',
       'The dog jumped up on the bed.',
       'I ran from the dog and jumped the bench.',
       'Ashley had a wiener dog that she took boating up the river.',
       'He took his life in his hands, j-walking in NYC.',
       'A stray cat played with the injured frog.',
       'The lion opened his mouth wide as she put her head into his mouth.',
       'Bengi was a movie about the adventures of a lovable stray dog.',
       'The tabby cat had a fluffy tail.',
       'The cat likes a scratch under her chin.',
       'The dog flew down the street after the bike.',
       'The man had a cat in his carry-on.',
       'The cat crossed the lane and then the sidewalk.',
       'Moon got mauled in a fight with an alley cat.',
       'A black cat crossed the sidewalk in front of me.',
       "The cat held the lizard's tail in its mouth.",
       'The raccoons ate all the cat food in the garage.',
       'The dog wash was just a hose and they hated it.',
       'There are no lions in NYC, but there are lots of house cats.',
       'The cat meowed and I pet it until it purred.',
       'Rascal was an alley cat before she became a Lane pet.',
       'A car struck the cat and we took it to the vet.',
       'A cat pounced at the lizard but came away with only its tail.',
       "An old dog can learn new tricks if there's food involved.",
       'Our Bengi was a mottled tan dog that loved to run around the yard.',
       "Animals don't drive cars, but my pet dog likes to stick his head out the window.",
       'The Cat in the Hat is not about an animal or a hat.',
       'Ursa was smart and deceptive.',
       'Are there fish in your fountain?',
       'Australian sheep dogs are smarter than the sheep.',
       'Are there fish in the pond?', 'Are there turtles in your pond?',
       'How many pets do you have at home?',
       'Where do you keep a turtle in your house?',
       'Carnivore cunning and cooperation makes them smarter than herbivores.',
       'No lone wolf would dare attack a lone moose or adult caribou.',
       'I loved frogs and the color green.',
       'Wolf puppies play with crows.',
       'Crows help wolves track down prey and wolves share the kill.',
       'Toxoplasmosis will change your mind.',
       'America is littered with Toxoplasmosis.',
       'Billy never had any pets at his house.',
       'Ursa would inch her way into the dining room sheepishly.',
       'Walter and Moon taught me how to crack video games.',
       'Mice get attracted to the smell of cats when the have Toxoplasmosis.',
       "No man is an island, unless he's a lone wolf like Walter Anderson.",
       'Alligators and wolves compete for food on Horn Island.',
       'He use duck tape to keep its mouth closed.',
       'The ranger dragged the alligator over the seawall.',
       'What about frogs in the pond?',
       'Clinton helped Clayton catch the alligator.',
       'The Inner Harbor had an alligator, some turtles and lots of fish.',
       'An alligator ate several pets and ducks before Clayton caught him.',
       'Can lizards swim under water?',
       'Char drooled with Pavlovian delight at the hotdog in my hand.',
       "Rascal hated the car because it's associated with the vet.",
       'She caught a frog with her paw.',
       'She bit the frog with her teeth.',
       'Berk, the vet, has ideas about sports games for people.',
       "You don't get a fever from Toxoplasmosis, you just get aggressive.",
       'Bear loved to hang his head out the truck window.',
       'Wild cats chase bikes and runners but not cars or trucks.',
       'Ursa used to chase her tail when she was young.',
       'Lizards, turtles, and alligators are kind-of green and slimy.',
       "The dogs licked my plate so I didn't have to wash it.",
       'Dogs wag their tail when they are happy.',
       'Ursa, a black lab, would beat her tail against the wall until it was raw.',
       'Bear lapped water from the hose with his tongue.',
       'Rascal lapped milk from her bowl, curling her tongue.',
       'Bear was bloody and panting after mauling the goats and sheep.',
       'Will cuddled with Moon and Zoe on the couch.',
       'Char and Ursa played on the green grass in the back yard.',
       'Rascal was a stray when we found her in a tree in the back yard.',
       'A black kitten crossed the road dodging cars like Frogger.',
       'Goats and sheep make great lawn mowers for a boat yard.',
       'Bear lapped out of the truck window.',
       'The Inner Harbor was our playground.',
       'The dogs were not allowed on the couch or in the dining room.',
       'Lane rescued Moon with Will power.',
       'Men become more gullible once they get Toxoplasmosis.',
       'Wolves eat deer and stay away from sheep if they smell humans.',
       'Ants get a virus that makes crawl to the tip of a blade of grass.',
       'Brian wanted to start an alligator farm.',
       'Humans harbor infectious diseases from domestic pets.',
       'Our neighbors raised baby alligators.',
       'Early humans slept in the barn with domesticated animals.',
       'Sheep and deer eat grass.', 'My brother had an aquarium.',
       'Some lizards can grow a new tail.',
       'Mormor loved turtles and had them all over her house.',
       'The litter box smells.', 'The snapping turtle won the race.',
       'Jupiter and Moon each had their own food bowl.',
       'A turtle beat the rabbit in a race.',
       'Cats and dogs sleeping together.',
       'Women become more trusting once they get Toxoplasmosis.',
       "It's Berk a vet?", 'I froze as he sniffed.',
       'I rode my bike home.', 'Mom loves to walk around Manhattan.',
       'Give me such shows — give me the streets of Manhattan!',
       'She loves dogs.', 'Is that a pet rat in your carry-on?',
       'He put a hat on his head.', 'He put his hat in the overhead bin.',
       'The car is in the garage.', 'Dogs like to chase cars.',
       'The car had a bike rack.',
       'Marc steered my bike into a parked car.',
       'A cute kitten played with its mother.',
       'Where do you live little guy?', 'Where did you come from?',
       'You sure are cute.', 'Go lie down on your bed.',
       'Be a good boy and go on home now.', "That's a good boy.",
       'It rained cats and dogs.', 'She keeps a clean house.',
       'She took the train into the city to see the ball drop.',
       'The snake chased the rat.', 'Rascal taught me empathy and care.',
       "Snakes aren't usually considered pets.",
       "She doesn't like hats in the car but I do.",
       'She rode her bike though central park wearing a hat.',
       "Animals, including pets, don't like riding in cars.",
       "Cats don't like riding into the city in a car.",
       'The rat ran into a hole in the back.',
       'The rat ate a hole in his hat.', 'How many dogs are in the city?',
       'Where is Soho? In New York City?',
       'Look at me! Look at me! Look at me NOW!',
       'It is fun to have fun. But you have to know how.',
       'Honey, it was ruined when she bought it.',
       '"He should not be here," said the fish in the pot. ',
       '"He should not be here when your mother is not.” ',
       'Speedy was too fast for Sylvester.', 'You are an animal.',
       "You're an animal!", 'Animals are not allowed on this flight.',
       'Some flights allow animals in carry-ons.',
       "Snakes aren't usually allowed on planes.",
       'The litter box is in the back of the house.',
       'Do all dogs go to heaven?', 'Cats and dogs playing together.',
       'Kittens are cute.', "Jupiter's hair stood on end.",
       'The lizard aquarium was moist.', 'Turtles need water.',
       'Cats hat water.', 'I chased the ferret with a water pistol.',
       'The ferret got struck by lightening.', 'We have a car carrier.',
       'Algernon lost his mind.',
       'Flowers for Algernon is my favorite book.',
       'The puppy played in the flower bed.', 'She brought me flowers.',
       'I kept the compost full of worms and the flowers bloomed.',
       'Her orchids and my Amaryllis bloomed the same day.',
       'Char are the flowers.', 'Marc chased rascal with a squirt gun.',
       'Rascal hid in the Cypress tree.',
       'I cried at the end of Algernon.',
       'Algernon taught me about animal consciousness, smarts.',
       'Books taught me how to read people.', 'Moon leapt into my lap.',
       'I want to be reborn as a Lane pet.', 'The giving tree gave out.',
       'Do you have a pet?', "That's a cute kitten.",
       'Old dogs can learn tricks.', 'Sit Ubu, sit.', 'Sit Char, sit.',
       'Sit Bear, sit.', 'I flew a kite.', 'It rained cats and dogs.',
       'Do dogs go to heaven?', 'What kind of pet do you have?',
       'Ursa ran a squirrel up the tree.', 'The catbird seat',
       'Lindstrom pets are spoiled.',
       "I painted Turtle's shell with nail polish.",
       'I named my pet rock Rocky.', 'Are you a vet?',
       'My flowers are blooming.',
       "A single flower grew in Benji's grave.",
       'Char chased the squirrel.',
       'I gnawed the frog legs with my teeth.'], dtype=object)
In [19]:
data = pd.DataFrame(cd)
data.to_csv('cats_and_dogs_sorted.csv')

4.4.2

In [20]:
sms = get_data('sms-spam')
In [76]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize.casual import casual_tokenize
 
tfidf = TfidfVectorizer(tokenizer=casual_tokenize)
tfidf_docs = tfidf.fit_transform(raw_documents=sms.text).toarray()
len(tfidf.vocabulary_)

tfidf_docs = pd.DataFrame(tfidf_docs)
tfidf_docs = tfidf_docs - tfidf_docs.mean()
tfidf_docs.shape

sms.spam.sum()
Out[76]:
638
In [77]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 16)
pca = pca.fit(tfidf_docs)
pca_topic_vectors = pca.transform(tfidf_docs)
columns = ['topic{}'.format(i) for i in range(pca.n_components)]
pca_topic_vectors = pd.DataFrame(pca_topic_vectors, columns = columns, index = index)
pca_topic_vectors.round(3).head(6)
Out[77]:
topic0 topic1 topic2 topic3 topic4 topic5 ... topic10 topic11 topic12 topic13 topic14 topic15
sms0 0.201 0.003 0.037 0.011 -0.019 -0.053 ... 0.007 -0.008 -0.010 -0.037 -0.029 0.030
sms1 0.404 -0.094 -0.078 0.051 0.100 0.047 ... -0.005 0.039 -0.040 -0.026 0.045 -0.037
sms2! -0.030 -0.048 0.090 -0.067 0.091 -0.043 ... 0.127 0.021 -0.038 -0.009 -0.041 0.048
sms3 0.329 -0.033 -0.035 -0.016 0.052 0.056 ... 0.020 0.026 -0.069 -0.040 0.011 -0.078
sms4 0.002 0.031 0.038 0.034 -0.075 -0.093 ... 0.027 -0.008 -0.024 0.053 -0.074 -0.027
sms5! -0.016 0.059 0.014 -0.006 0.122 -0.040 ... 0.041 0.057 0.046 0.066 0.010 0.031

6 rows × 16 columns

In [41]:
# pca.components_
In [42]:
# sms
In [74]:
tfidf.vocabulary_
Out[74]:
{'go': 3807,
 'until': 8487,
 'jurong': 4675,
 'point': 6296,
 ',': 13,
 'crazy': 2549,
 '..': 21,
 'available': 1531,
 'only': 5910,
 'in': 4396,
 'bugis': 1973,
 'n': 5594,
 'great': 3894,
 'world': 8977,
 'la': 4811,
 'e': 3056,
 'buffet': 1971,
 '...': 25,
 'cine': 2277,
 'there': 8071,
 'got': 3855,
 'amore': 1296,
 'wat': 8736,
 'ok': 5874,
 'lar': 4848,
 'joking': 4642,
 'wif': 8875,
 'u': 8395,
 'oni': 5906,
 'free': 3604,
 'entry': 3195,
 '2': 471,
 'a': 1054,
 'wkly': 8933,
 'comp': 2386,
 'to': 8192,
 'win': 8890,
 'fa': 3328,
 'cup': 2608,
 'final': 3450,
 'tkts': 8180,
 '21st': 497,
 'may': 5272,
 '2005': 487,
 '.': 15,
 'text': 8020,
 '87121': 948,
 'receive': 6688,
 'question': 6574,
 '(': 9,
 'std': 7651,
 'txt': 8379,
 'rate': 6628,
 ')': 10,
 't': 7889,
 '&': 7,
 "c's": 2020,
 'apply': 1383,
 '08452810075': 115,
 'over': 6003,
 '18': 438,
 "'": 8,
 's': 6959,
 'dun': 3041,
 'say': 7034,
 'so': 7438,
 'early': 3069,
 'hor': 4207,
 'c': 2019,
 'already': 1268,
 'then': 8065,
 'nah': 5606,
 'i': 4311,
 "don't": 2948,
 'think': 8092,
 'he': 4048,
 'goes': 3819,
 'usf': 8537,
 'lives': 5004,
 'around': 1435,
 'here': 4104,
 'though': 8111,
 'freemsg': 3613,
 'hey': 4116,
 'darling': 2666,
 "it's": 4535,
 'been': 1693,
 '3': 591,
 "week's": 8788,
 'now': 5784,
 'and': 1310,
 'no': 5732,
 'word': 8967,
 'back': 1584,
 '!': 0,
 "i'd": 4312,
 'like': 4954,
 'some': 7454,
 'fun': 3677,
 'you': 9158,
 'up': 8489,
 'for': 3552,
 'it': 4533,
 'still': 7674,
 '?': 1037,
 'tb': 7955,
 'xxx': 9097,
 'chgs': 2230,
 'send': 7127,
 '£': 9216,
 '1.50': 344,
 'rcv': 6641,
 'even': 3240,
 'my': 5584,
 'brother': 1942,
 'is': 4519,
 'not': 5769,
 'speak': 7529,
 'with': 8918,
 'me': 5281,
 'they': 8083,
 'treat': 8312,
 'aids': 1214,
 'patent': 6106,
 'as': 1452,
 'per': 6148,
 'your': 9171,
 'request': 6796,
 'melle': 5315,
 'oru': 5968,
 'minnaminunginte': 5386,
 'nurungu': 5807,
 'vettam': 8599,
 'has': 4022,
 'set': 7154,
 'callertune': 2047,
 'all': 1253,
 'callers': 2046,
 'press': 6418,
 '*': 11,
 '9': 982,
 'copy': 2489,
 'friends': 3634,
 'winner': 8900,
 'valued': 8569,
 'network': 5678,
 'customer': 2620,
 'have': 4036,
 'selected': 7113,
 'receivea': 6689,
 '900': 986,
 'prize': 6450,
 'reward': 6851,
 'claim': 2283,
 'call': 2038,
 '09061701461': 263,
 'code': 2344,
 'kl341': 4771,
 'valid': 8565,
 '12': 384,
 'hours': 4226,
 'had': 3965,
 'mobile': 5441,
 '11': 371,
 'months': 5484,
 'or': 5946,
 'more': 5489,
 'r': 6590,
 'entitled': 3192,
 'update': 8495,
 'the': 8052,
 'latest': 4862,
 'colour': 2364,
 'mobiles': 5442,
 'camera': 2058,
 'co': 2333,
 'on': 5897,
 '08002986030': 99,
 "i'm": 4314,
 'gonna': 3834,
 'be': 1669,
 'home': 4176,
 'soon': 7483,
 'want': 8715,
 'talk': 7921,
 'about': 1076,
 'this': 8100,
 'stuff': 7741,
 'anymore': 1350,
 'tonight': 8235,
 'k': 4683,
 "i've": 4316,
 'cried': 2566,
 'enough': 3182,
 'today': 8199,
 'six': 7341,
 'chances': 2172,
 'cash': 2116,
 'from': 3652,
 '100': 354,
 '20,000': 482,
 'pounds': 6357,
 '>': 1035,
 'csh': 2584,
 '87575': 952,
 'cost': 2501,
 '150p': 415,
 '/': 27,
 'day': 2683,
 '6days': 827,
 '16': 431,
 '+': 12,
 'tsandcs': 8344,
 'reply': 6788,
 'hl': 4148,
 '4': 659,
 'info': 4433,
 'urgent': 8513,
 'won': 8950,
 '1': 337,
 'week': 8787,
 'membership': 5321,
 'our': 5980,
 '100,000': 355,
 'jackpot': 4564,
 ':': 1006,
 '81010': 900,
 'www.dbuk.net': 9039,
 'lccltd': 4880,
 'pobox': 6286,
 '4403ldnw1a7rw18': 696,
 'searching': 7081,
 'right': 6863,
 'words': 8968,
 'thank': 8037,
 'breather': 1912,
 'promise': 6487,
 'wont': 8958,
 'take': 7913,
 'help': 4089,
 'granted': 3883,
 'will': 8887,
 'fulfil': 3673,
 'wonderful': 8955,
 'blessing': 1802,
 'at': 1488,
 'times': 8158,
 'date': 2675,
 'sunday': 7805,
 'xxxmobilemovieclub': 9098,
 'use': 8531,
 'credit': 2556,
 'click': 2306,
 'wap': 8719,
 'link': 4977,
 'next': 5696,
 'message': 5340,
 'http://wap': 4259,
 'xxxmobilemovieclub.com': 9099,
 '=': 1031,
 'qjkgighjjgcbl': 6566,
 'oh': 5869,
 'watching': 8743,
 ':)': 1008,
 'eh': 3116,
 'remember': 6755,
 'how': 4233,
 'spell': 7545,
 'his': 4139,
 'name': 5612,
 'yes': 9137,
 'did': 2823,
 'v': 8553,
 'naughty': 5635,
 'make': 5193,
 'wet': 8828,
 'fine': 3458,
 'if': 4350,
 'that': 8045,
 '\x92': 9211,
 'way': 8753,
 'feel': 3400,
 'its': 4546,
 'gota': 3856,
 'b': 1560,
 'england': 3173,
 'macedonia': 5156,
 '-': 14,
 'dont': 2952,
 'miss': 5402,
 'goals': 3812,
 'team': 7968,
 'news': 5692,
 'ur': 8510,
 'national': 5629,
 '87077': 947,
 'eg': 3109,
 'try': 8340,
 'wales': 8695,
 'scotland': 7060,
 '4txt': 737,
 'ú1': 9220,
 '20': 481,
 'poboxox': 6287,
 '36504w45wq': 629,
 'seriously': 7147,
 '‘': 9225,
 'm': 5139,
 'going': 3823,
 'ha': 3961,
 'ü': 9221,
 'pay': 6119,
 'first': 3476,
 'when': 8840,
 'da': 2639,
 'stock': 7678,
 'comin': 2376,
 'aft': 1182,
 'finish': 3462,
 'lunch': 5121,
 'str': 7702,
 'down': 2974,
 'lor': 5058,
 'ard': 1410,
 'smth': 7422,
 'ffffffffff': 3420,
 'alright': 1269,
 'can': 2062,
 'meet': 5303,
 'sooner': 7485,
 'just': 4677,
 'forced': 3554,
 'myself': 5591,
 'eat': 3081,
 'slice': 7372,
 'really': 6670,
 'hungry': 4287,
 'tho': 8107,
 'sucks': 7778,
 'mark': 5230,
 'getting': 3767,
 'worried': 8981,
 'knows': 4782,
 'sick': 7289,
 'turn': 8362,
 'pizza': 6238,
 'lol': 5035,
 'always': 1279,
 'convincing': 2476,
 'catch': 2128,
 'bus': 1993,
 'are': 1411,
 'frying': 3660,
 'an': 1305,
 'egg': 3111,
 'tea': 7962,
 'eating': 3084,
 "mom's": 5463,
 'left': 4901,
 'dinner': 2858,
 'do': 2909,
 'love': 5081,
 "we're": 8760,
 'packing': 6032,
 'car': 2085,
 "i'll": 4313,
 'let': 4923,
 'know': 4779,
 "there's": 8074,
 'room': 6906,
 'ahhh': 1209,
 'work': 8970,
 'vaguely': 8560,
 'what': 8832,
 'does': 2923,
 'wait': 8689,
 "that's": 8048,
 'clear': 2300,
 'were': 8817,
 'sure': 7832,
 'being': 1713,
 'sarcastic': 7010,
 'why': 8868,
 'x': 9076,
 "doesn't": 2926,
 'live': 5000,
 'us': 8525,
 'yeah': 9125,
 'was': 8728,
 'apologetic': 1371,
 'fallen': 3352,
 'out': 5983,
 'she': 7199,
 'actin': 1123,
 'spoilt': 7572,
 'child': 2238,
 'caught': 2132,
 'till': 8152,
 'but': 1999,
 'we': 8757,
 "won't": 8951,
 'doing': 2938,
 'too': 8242,
 'badly': 1589,
 'cheers': 2214,
 'tell': 7985,
 'anything': 1356,
 'fear': 3391,
 'of': 5847,
 'fainting': 3344,
 'housework': 4231,
 'quick': 6577,
 'cuppa': 2610,
 'thanks': 8038,
 'subscription': 7766,
 'ringtone': 6872,
 'uk': 8415,
 'charged': 2184,
 '5': 745,
 'month': 5479,
 'please': 6265,
 'confirm': 2431,
 'by': 2016,
 'replying': 6790,
 'yup': 9192,
 'look': 5046,
 'timings': 8162,
 'msg': 5528,
 'again': 1190,
 'xuhui': 9093,
 'learn': 4892,
 '2nd': 567,
 'her': 4099,
 'lesson': 4921,
 '8am': 974,
 'oops': 5921,
 "roommate's": 6909,
 'done': 2950,
 'see': 7098,
 'letter': 4926,
 'decide': 2711,
 'hello': 4084,
 "how's": 4235,
 'saturday': 7024,
 'texting': 8027,
 "you'd": 9159,
 'decided': 2712,
 'tomo': 8224,
 'trying': 8342,
 'invite': 4493,
 'pls': 6273,
 'ahead': 1208,
 'watts': 8751,
 'wanted': 8716,
 'weekend': 8791,
 'abiola': 1072,
 'forget': 3560,
 'need': 5654,
 'crave': 2546,
 'most': 5499,
 'sweet': 7863,
 'arabian': 1407,
 'steed': 7658,
 'mmmmmm': 5431,
 'yummy': 9187,
 '07732584351': 62,
 'rodger': 6895,
 'burns': 1990,
 'tried': 8321,
 're': 6645,
 'sms': 7416,
 'nokia': 5744,
 'camcorder': 2056,
 '08000930705': 95,
 'delivery': 2750,
 'tomorrow': 8226,
 'who': 8859,
 'seeing': 7101,
 'hope': 4198,
 'man': 5203,
 'well': 8807,
 'endowed': 3163,
 'am': 1281,
 '<#>': 1024,
 'inches': 4401,
 'calls': 2053,
 'messages': 5344,
 'missed': 5405,
 "didn't": 2828,
 'get': 3760,
 'hep': 4098,
 'immunisation': 4379,
 'nigeria': 5708,
 'fair': 3345,
 'hopefully': 4201,
 'tyler': 8389,
 "can't": 2063,
 'could': 2511,
 'maybe': 5274,
 'ask': 1463,
 'bit': 1779,
 'stubborn': 7730,
 'hospital': 4214,
 'kept': 4730,
 'telling': 7986,
 'weak': 8762,
 'sucker': 7776,
 'hospitals': 4215,
 'suckers': 7777,
 'thinked': 8093,
 'time': 8154,
 'saw': 7033,
 'class': 2292,
 'gram': 3875,
 'usually': 8543,
 'runs': 6949,
 'half': 3977,
 'eighth': 3119,
 'smarter': 7395,
 'gets': 3763,
 'almost': 1264,
 'whole': 8862,
 'second': 7085,
 'fyi': 3693,
 'ride': 6862,
 'morning': 5493,
 "he's": 4050,
 'crashing': 2545,
 'place': 6240,
 'wow': 8997,
 'never': 5683,
 'realized': 6668,
 'embarassed': 3144,
 'accomodations': 1103,
 'thought': 8112,
 'liked': 4955,
 'since': 7314,
 'best': 1733,
 'seemed': 7105,
 'happy': 4011,
 '"': 1,
 'cave': 2136,
 'sorry': 7494,
 'give': 3788,
 'offered': 5855,
 'embarassing': 3145,
 'ac': 1089,
 'sptv': 7594,
 'new': 5687,
 'jersey': 4608,
 'devils': 2803,
 'detroit': 2797,
 'red': 6711,
 'wings': 8898,
 'play': 6255,
 'ice': 4330,
 'hockey': 4161,
 'correct': 2494,
 'incorrect': 4412,
 'end': 3158,
 'mallika': 5202,
 'sherawat': 7208,
 'yesterday': 9141,
 'find': 3455,
 '@': 1038,
 '<url>': 1030,
 'congrats': 2437,
 'year': 9126,
 'special': 7531,
 'cinema': 2278,
 'pass': 6094,
 'yours': 9176,
 '09061209465': 258,
 'suprman': 7830,
 'matrix': 5263,
 'starwars': 7638,
 'etc': 3230,
 'bx420': 2014,
 'ip4': 4502,
 '5we': 781,
 '150pm': 417,
 'later': 4861,
 'meeting': 5305,
 'where': 8846,
 'reached': 6652,
 'gauti': 3728,
 'sehwag': 7110,
 'odi': 5846,
 'series': 7145,
 'pick': 6211,
 '$': 5,
 'burger': 1985,
 'yourself': 9177,
 'move': 5513,
 'pain': 6039,
 'killing': 4754,
 'good': 3836,
 'joke': 4636,
 'girls': 3785,
 'situation': 7338,
 'seekers': 7102,
 'part': 6081,
 'checking': 2208,
 'iq': 4508,
 'roommates': 6910,
 'took': 8245,
 'forever': 3557,
 'come': 2371,
 'double': 2966,
 'check': 2204,
 'hair': 3972,
 'dresser': 2998,
 'said': 6980,
 'wun': 9024,
 'cut': 2624,
 'short': 7248,
 'nice': 5701,
 'pleased': 6266,
 'advise': 1165,
 'following': 3534,
 'recent': 6692,
 'review': 6849,
 'mob': 5439,
 'awarded': 1549,
 '1500': 414,
 'bonus': 1844,
 '09066364589': 306,
 'song': 7478,
 'dedicated': 2722,
 'which': 8853,
 'dedicate': 2721,
 'valuable': 8566,
 'frnds': 3643,
 'rply': 6925,
 'complimentary': 2406,
 'trip': 8322,
 'eurodisinc': 3234,
 'trav': 8304,
 'aco': 1119,
 '41': 679,
 '1000': 356,
 'dis': 2871,
 '6': 785,
 'morefrmmob': 5490,
 'shracomorsglsuplt': 7273,
 '10': 350,
 'ls1': 5103,
 '3aj': 638,
 'hear': 4062,
 'divorce': 2900,
 'barbie': 1620,
 'comes': 2373,
 "ken's": 4728,
 'plane': 6247,
 'wah': 8682,
 'lucky': 5114,
 'save': 7029,
 'money': 5470,
 'hee': 4075,
 'finished': 3464,
 'hi': 4120,
 'babe': 1574,
 'im': 4368,
 'wanna': 8713,
 'something': 7464,
 'xx': 9094,
 'performed': 6155,
 'waiting': 8692,
 'machan': 5158,
 'once': 5901,
 'thats': 8051,
 'cool': 2481,
 'gentleman': 3751,
 'dignity': 2848,
 'respect': 6816,
 'peoples': 6147,
 'very': 8598,
 'much': 5544,
 'shy': 7283,
 'pa': 6027,
 'operate': 5928,
 'after': 1183,
 'same': 6996,
 'looking': 5050,
 'job': 4623,
 "ta's": 7896,
 'earn': 3070,
 'ah': 1204,
 'stop': 7688,
 'urgnt': 8517,
 'real': 6662,
 'yo': 9152,
 'tickets': 8142,
 'one': 5903,
 'jacket': 4563,
 'used': 8532,
 'multis': 5553,
 'started': 7632,
 'requests': 6797,
 'came': 2057,
 'bed': 1686,
 'coins': 2350,
 'factory': 3335,
 'gotta': 3860,
 'nitros': 5727,
 'ela': 3124,
 'kano': 4708,
 'il': 4362,
 'download': 2975,
 'wen': 8811,
 'don': 2947,
 'stand': 7620,
 'close': 2313,
 'll': 5008,
 'another': 1332,
 'night': 5710,
 'spent': 7550,
 'late': 4858,
 'afternoon': 1185,
 'casualty': 2126,
 'means': 5291,
 "haven't": 4039,
 'any': 1346,
 'y': 9107,
 '42moro': 689,
 'includes': 4405,
 'sheets': 7203,
 'smile': 7403,
 'pleasure': 6268,
 'trouble': 8328,
 'pours': 6359,
 'rain': 6602,
 'sum': 7798,
 'hurts': 4297,
 'becoz': 1684,
 'someone': 7457,
 'loves': 5090,
 'smiling': 7407,
 'service': 7150,
 'representative': 6794,
 '0800 169 6031': 86,
 'between': 1741,
 '10am': 365,
 '9pm': 1002,
 'guaranteed': 3930,
 '5000': 755,
 'havent': 4040,
 'planning': 6251,
 'buy': 2004,
 'lido': 4937,
 '530': 767,
 'show': 7264,
 'collected': 2358,
 'simply': 7311,
 'password': 6102,
 'mix': 5421,
 '85069': 934,
 'verify': 8594,
 'usher': 8538,
 'britney': 1932,
 'fml': 3525,
 'po': 6284,
 'box': 1879,
 '5249': 764,
 'mk17': 5424,
 '92h': 990,
 '450ppw': 705,
 'telugu': 7991,
 'movie': 5516,
 'abt': 1084,
 'loads': 5014,
 'loans': 5016,
 'wk': 8928,
 'hols': 4174,
 'run': 6946,
 'forgot': 3565,
 'hairdressers': 3974,
 'appointment': 1386,
 'four': 3584,
 'shower': 7266,
 'beforehand': 1702,
 'cause': 2133,
 'prob': 6456,
 'coffee': 2345,
 'animation': 1319,
 'nothing': 5774,
 'else': 3138,
 'okay': 5877,
 'price': 6431,
 'long': 5042,
 'legal': 4904,
 'them': 8061,
 'ave': 1536,
 'ams': 1301,
 'gone': 3832,
 '4the': 735,
 'driving': 3007,
 'test': 8014,
 'yet': 9142,
 "you're": 9162,
 'mean': 5287,
 'guess': 3936,
 'gave': 3729,
 'boston': 1866,
 'men': 5326,
 'changed': 2174,
 'search': 7080,
 'location': 5019,
 'nyc': 5819,
 'cuz': 2631,
 'signin': 7299,
 'page': 6035,
 'says': 7038,
 'umma': 8423,
 'life': 4940,
 'vava': 8580,
 'lot': 5066,
 'dear': 2699,
 'wishes': 8912,
 'birthday': 1777,
 'making': 5197,
 'truly': 8335,
 'memorable': 5323,
 'aight': 1216,
 'hit': 4141,
 'would': 8993,
 'ip': 4501,
 'address': 1141,
 'considering': 2449,
 'computer': 2412,
 "isn't": 4528,
 'minecraft': 5380,
 'server': 7149,
 'grumpy': 3923,
 'old': 5889,
 'people': 6146,
 'mom': 5462,
 'better': 1738,
 'lying': 5135,
 'jokes': 4640,
 'worry': 8983,
 'busy': 1998,
 'plural': 6277,
 'noun': 5781,
 'research': 6802,
 'dinner.msg': 2859,
 'cos': 2499,
 'things': 8091,
 'scared': 7044,
 'mah': 5180,
 'loud': 5076,
 'gent': 3749,
 'contact': 2454,
 'last': 4855,
 'weekends': 8793,
 'draw': 2989,
 'shows': 7272,
 '09064012160': 282,
 'k52': 4691,
 '12hrs': 398,
 '150ppm': 419,
 'wa': 8677,
 'openin': 5925,
 'sentence': 7138,
 'formal': 3569,
 'anyway': 1360,
 'juz': 4682,
 'tt': 8348,
 'eatin': 3083,
 'puttin': 6554,
 'weight': 8798,
 'haha': 3968,
 'anythin': 1355,
 'happened': 4003,
 'entered': 3185,
 'cabin': 2025,
 "b'day": 1562,
 'boss': 1865,
 'felt': 3411,
 'askd': 1464,
 'invited': 4494,
 'apartment': 1365,
 'went': 8814,
 'specially': 7536,
 'holiday': 4171,
 'flights': 3502,
 'inc': 4399,
 'operator': 5929,
 '08712778109': 166,
 '10p': 368,
 'min': 5372,
 'goodo': 3846,
 'must': 5575,
 'friday': 3626,
 'egg-potato': 3112,
 'ratio': 6631,
 'tortilla': 8262,
 'needed': 5656,
 'hmm': 4153,
 'uncle': 8433,
 'informed': 4438,
 'paying': 6124,
 'school': 7050,
 'directly': 2865,
 'food': 3542,
 'private': 6447,
 '2004': 486,
 'account': 1107,
 'statement': 7641,
 '07742676969': 64,
 '786': 864,
 'unredeemed': 8477,
 'points': 6297,
 '08719180248': 213,
 'identifier': 4344,
 '45239': 707,
 'expires': 3307,
 '2000': 484,
 'caller': 2045,
 '5/9': 752,
 '03': 46,
 'landline': 4835,
 '09064019788': 288,
 '42wr29c': 690,
 'apples': 1381,
 'pairs': 6044,
 'malarky': 5199,
 'todays': 8205,
 'voda': 8645,
 'numbers': 5804,
 'ending': 3160,
 '7548': 856,
 '350': 624,
 'award': 1548,
 'match': 5251,
 '08712300220': 149,
 'quoting': 6589,
 '4041': 674,
 'standard': 7621,
 'rates': 6629,
 'app': 1375,
 'sao': 7004,
 'mu': 5542,
 'predict': 6392,
 "ü'll": 9222,
 'buying': 2007,
 'yetunde': 9144,
 "hasn't": 4024,
 'sent': 7137,
 'bother': 1869,
 'sending': 7129,
 'involve': 4498,
 "shouldn't": 7259,
 'imposed': 4386,
 'apologise': 1372,
 'girl': 3782,
 'del': 2740,
 'bak': 1597,
 'lucyxx': 5118,
 'tmorrow.pls': 8185,
 'accomodate': 1102,
 'answer': 1335,
 'sunshine': 7812,
 'quiz': 6584,
 'q': 6559,
 'top': 8253,
 'sony': 7480,
 'dvd': 3051,
 'player': 6257,
 'country': 2518,
 'algarve': 1245,
 'ansr': 1334,
 '82277': 907,
 'sp': 7516,
 'tyrone': 8394,
 'laid': 4827,
 'dogging': 2932,
 'locations': 5020,
 'direct': 2864,
 'join': 4631,
 "uk's": 8416,
 'largest': 4852,
 'bt': 1958,
 'txting': 8383,
 'gravel': 3888,
 '69888': 822,
 'nt': 5791,
 'ec2a': 3086,
 '31p': 611,
 '@150p': 1039,
 'haf': 3967,
 'msn': 5534,
 'yijue@hotmail.com': 9149,
 'him': 4132,
 'rooms': 6911,
 'befor': 1699,
 'activities': 1129,
 "you'll": 9161,
 'msgs': 5533,
 'chat': 2195,
 ...}
In [44]:
column_nums, terms = zip(*sorted(zip(tfidf.vocabulary_.values(), tfidf.vocabulary_.keys())))
In [49]:
terms[:5]
Out[49]:
('!', '"', '#', '#150', '#5000')
In [50]:
column_nums[:5]
Out[50]:
(0, 1, 2, 3, 4)
In [51]:
weights = pd.DataFrame(pca.components_, columns = terms, 
                       index = ['topic{}'.format(i) for i in range(16)])
In [54]:
pd.options.display.max_columns = 12
weights.head(4).round(3)
Out[54]:
! " # #150 #5000 $ ... 〨ud
topic0 -0.071 0.008 -0.001 -0.000 -0.001 0.003 ... -0.001 -0.001 -0.002 0.001 0.001 0.001
topic1 0.064 0.008 0.000 -0.000 -0.001 -0.001 ... -0.001 -0.001 0.003 0.001 0.001 0.001
topic2 0.071 0.027 0.000 0.001 0.002 0.000 ... 0.000 0.001 0.002 -0.001 -0.001 -0.001
topic3 -0.059 -0.032 -0.001 -0.000 -0.001 0.001 ... -0.000 -0.000 0.001 0.001 0.001 0.001

4 rows × 9232 columns

In [59]:
deals = weights['! ;) :) half off free crazy deal only $ 80 %'.split()].round(3) * 100
In [60]:
deals
Out[60]:
! ;) :) half off free crazy deal only $ 80 %
topic0 -7.1 0.1 -0.5 -0.0 -0.4 -2.0 -0.0 -0.1 -2.2 0.3 -0.0 -0.0
topic1 6.4 0.0 7.4 0.1 0.4 -2.3 -0.2 -0.1 -3.8 -0.1 -0.0 -0.2
topic2 7.1 0.2 -0.1 0.0 0.3 4.4 0.1 -0.1 0.7 0.0 0.0 0.1
topic3 -5.9 -0.3 -7.1 0.2 0.3 -0.2 0.0 0.1 -2.3 0.1 -0.1 -0.3
topic4 38.1 -0.1 -12.5 -0.1 -0.2 9.9 0.1 -0.2 3.0 0.3 0.1 -0.1
topic5 -26.5 0.1 -1.6 -0.3 -0.7 -1.4 -0.6 -0.2 -1.8 -0.9 0.0 0.0
topic6 -10.9 -0.5 19.9 -0.4 -0.9 -0.6 -0.2 -0.1 -1.4 -0.0 -0.0 -0.1
topic7 15.9 0.1 -18.3 0.8 0.8 -2.9 0.0 0.1 -1.8 -0.3 0.0 -0.1
topic8 35.0 0.2 5.9 -0.5 -0.5 0.3 -0.4 -0.4 3.0 -0.6 -0.0 -0.2
topic9 9.2 -0.3 18.7 1.4 -0.8 6.8 -0.5 -0.4 3.1 -0.5 -0.0 -0.0
topic10 -31.9 -0.2 -7.6 0.1 0.2 12.9 0.1 -0.0 0.2 -0.0 -0.0 -0.2
topic11 -23.0 -0.4 -15.8 -0.5 -1.2 8.1 0.0 -0.2 0.2 0.5 0.0 0.3
topic12 -22.9 -0.2 37.1 -0.1 0.2 -4.9 -0.6 0.2 3.4 0.2 -0.0 0.3
topic13 16.5 -0.2 27.3 -0.3 0.8 3.7 0.5 0.2 -2.8 -0.4 -0.0 -0.2
topic14 12.3 -0.3 21.3 -0.5 -1.2 -0.9 -0.0 0.2 4.1 -0.4 0.1 -0.4
topic15 -15.9 -0.3 1.9 1.1 -1.0 5.1 -0.4 0.5 -0.2 -0.3 0.0 -0.2
In [62]:
deals = weights['dog'.split()].round(3)
deals
Out[62]:
dog
topic0 0.000
topic1 0.002
topic2 -0.001
topic3 0.002
topic4 -0.002
topic5 -0.000
topic6 -0.001
topic7 0.000
topic8 -0.000
topic9 -0.002
topic10 -0.002
topic11 0.008
topic12 0.001
topic13 -0.002
topic14 -0.001
topic15 0.005
In [67]:
import os
def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    for file in directory:
        f=open(path+file)
        results.append(f.read())
        f.close()
    return results
data = get_data_from_files('../Documents/IST_736_TextMining/AmazonPhotoTextCorpus/')
df = pd.DataFrame(data)
all_df = df

import re
def clean_rogue_characters_2(string):
    return re.sub('[^0-9a-zA-Z.]+', ' ', string)
In [68]:
all_df['clean'] = all_df.apply(lambda x: clean_rogue_characters_2(x[0]), axis=1)
In [69]:
all_df
Out[69]:
0 clean
0 *)\niS\n\n11:23 1]\n\nQ Search\n\nIf | shine a... iS 11 23 1 Q Search If shine a white LED ligh...
1 6:51 .\n\n*)\n0\n\n® forums.nexusmods.com\n\ne... 6 51 . 0 forums.nexusmods.com ee BrettM fosaym...
2 6:52 al Se)\n\n® forums.nexusmods.com\n\n17 Ap... 6 52 al Se forums.nexusmods.com 17 Apr 2012 St...
3 4:06 aw Fe\n\n@ spokesman.com (4\n\nILULL. LL ... 4 06 aw Fe spokesman.com 4 ILULL. LL 1S SUUULI...
4 Parents do not own their children.\nNo one own... Parents do not own their children. No one owns...
5 o | changed shampoos, cut out dairy,\nlitres o... o changed shampoos cut out dairy litres of wat...
6 WEIS °2P)40p" o2ua8- yp <i\n\n \n\ndeyiaao SpJ... WEIS 2P 40p o2ua8 yp i deyiaao SpJ0M Jo 3eq su...
7 casispie:\nhugealienpie:\nthechubbynerd:\njust... casispie hugealienpie thechubbynerd just showe...
8 527k J @® 173k it, Share Oo\n\nelfmere * 16h\n... 527k J 173k it Share Oo elfmere 16h Why do you...
9 6:55 at > =)\n@ google.com (h\n= Google ©\nstr... 6 55 at google.com h Google strange women lyin...
10 1:51 =\n< Mail\n@ glassdoor.com\n\nas\nGitHub\... 1 51 Mail glassdoor.com as GitHub Policy Detai...
11 ™ ‘/fantasywriters\nu/SlinkySlang * 3h\n\nWrit... fantasywriters u SlinkySlang 3h Writing advic...
12 10:21 wil @\n\n< Mail\n\n@ jobs.capitalgroup.... 10 21 wil Mail jobs.capitalgroup.com Responsib...
13 177k ¥ @ 3.9k it, Share Oo\n\nGalacticPingvin ... 177k 3.9k it Share Oo GalacticPingvin 3h lemla...
14 a. Ricky Montgomery\n\n| am upset with my pare... a. Ricky Montgomery am upset with my parents f...
15 ig nyx5\n\ni prefer guys who make small dick j... ig nyx5 i prefer guys who make small dick joke...
16 427% @ 27 it, Share Oo\n\nmrtrouble22 % + 4h »... 427 27 it Share Oo mrtrouble22 4h Believer i r...
17 6:52 al Se)\n\n@ forums.nexusmods.com (4\n\nas... 6 52 al Se forums.nexusmods.com 4 as Soon as y...
18 11:54 a eG)\n\nsciencedaily.com\n\n- —_—_—_\n\... 11 54 a eG sciencedaily.com The medical commun...
19 @ lore-54352452524-deactivated201\n\nyou can r... lore 54352452524 deactivated201 you can reple...
20 4128 @ 8 it, Share We)\n\nDatadevourer * 3h\n\... 4128 8 it Share We Datadevourer 3h Recently st...
21 @ 53.2k § @ 176k it, Share o\n\nPyroSnakel41 «... 53.2k 176k it Share o PyroSnakel41 13h Become...
22 r/nosurf\nu/fibonacciseries * 9d\n\nTurning on... r nosurf u fibonacciseries 9d Turning on the G...
23 * Vote J ™ 10 it, Share Oo\n\nit2051229 * 36m\... Vote J 10 it Share Oo it2051229 36m In the be...
24 4:50 >\nQ Search\nNews Home Popular\n\n20k J @... 4 50 Q Search News Home Popular 20k J 15 it Sh...
25 e205 ™ 16 it, Share o\n\niseemath ° 18h\n\n| t... e205 16 it Share o iseemath 18h think you ll f...
26 @ 124k J @ 49k it, Share Oo\nGo © 234\n\n15 MO... 124k J 49k it Share Oo Go 234 15 MORE REPLIES...
27 @ 124k J @ 49k it, Share Oo\n\n10 MORE REPLIES... 124k J 49k it Share Oo 10 MORE REPLIES sudden...
28 2:07 al > a\n\n2 Messages\n\n€ Back Front? Bac... 2 07 al a 2 Messages Back Front Back NV I m no...
29 527k J @® 173k it, Share Oo\nQ ® Reply # 56k H... 527k J 173k it Share Oo Q Reply 56k H insertca...
30 @ 124k J @ 49k it, Share Oo\n\n2 MORE REPLIES\... 124k J 49k it Share Oo 2 MORE REPLIES 1 MORE ...
31 f r/stopdrinking\nu/creaturefeaturel6 * 10h * ... f r stopdrinking u creaturefeaturel6 10h 689 d...
32 6:51 al Se)\n\n@ forums.nexusmods.com (4\n\n \... 6 51 al Se forums.nexusmods.com 4 ToniPrufrock...
33 12:40 1 Fe)\n\nice;comftort ae]\niY wsyusually... 12 40 1 Fe ice comftort ae iY wsyusuallyjread ...
34 8:00 wl LTE @ )\n@ google.com (hy\n\nIt was Oc... 8 00 wl LTE google.com hy It was October 28 19...
35 86k | @ 3.5k it, Share Oo\n\nMrBOOMbabdtlc « 7... 86k 3.5k it Share Oo MrBOOMbabdtlc 7h flinty d...
36 21k J ® 2.1k it, Share Oo\n\nWw DOO! VYUNINIEN... 21k J 2.1k it Share Oo Ww DOO VYUNINIENISO WwW...
37 4 147k J @ 314 it, Share Oo\n\nmikevago ¢ 4h\n... 4 147k J 314 it Share Oo mikevago 4h One of my...
38 631 4 ™ 59 it, Share Oo\n\nPublicFigurex % » 2... 631 4 59 it Share Oo PublicFigurex 28d It s fr...
39 r/EatCheapAndHealthy\nu/chickentender1995 ° 12... r EatCheapAndHealthy u chickentender1995 12h B...
40 423 M6 it, Share Oo\n\nspecific or more detail... 423 M6 it Share Oo specific or more detailed w...
41 9:01 li LTE@ )\n<4 Clock\n\nQ Search (ED SJ o\... 9 01 li LTE 4 Clock Q Search ED SJ o Atwater V...
42 What is the most a dollar has ever gotten\nyou... What is the most a dollar has ever gotten you ...
43 2:48 at\n\n<O @\n\nAli Ho >\n\n*)\n8\n\nToday ... 2 48 at O Ali Ho 8 Today 10 40 AM Recommendati...
44 27k @ 6.2k it, Share Oo\n\nEmpurpledprose « 18... 27k 6.2k it Share Oo Empurpledprose 18h wrote ...
45 6:52 wi Fe\n4\n\n@ forums.nexusmods.com\n\nbes... 6 52 wi Fe 4 forums.nexusmods.com best arrows ...
46 4128 @ 8 it, Share We)\n\nUMNKINg ADOUL It ana... 4128 8 it Share We UMNKINg ADOUL It ana e naQ ...
47 @ 124k J @ 49k it, Share Oo\n\n5 MORE REPLIES\... 124k J 49k it Share Oo 5 MORE REPLIES HonchoM...
48 Being a pet owner is like being a\nsugar daddy... Being a pet owner is like being a sugar daddy....
49 Dana Schwartz @\n@DanaSchwartzzz\n\nBELLE: The... Dana Schwartz DanaSchwartzzz BELLE There goes ...
50 7:42 all > @)\n€ All inboxes “N\n\nParker from... 7 42 all All inboxes N Parker from Interview C...
51 stupid bumps are that painful, pus filled kind... stupid bumps are that painful pus filled kinda...
52 @ spokesman.com (4\n\nA. Jock itch is normally... spokesman.com 4 A. Jock itch is normally caus...
53 10:21\n< Mail\n\nef > &)\n\n@ jobs.capitalgrou... 10 21 Mail ef jobs.capitalgroup.com Date Oct 1...
54 47g Mo it, Share Oo\n\nVULTESIUPIILVIAVII * CI... 47g Mo it Share Oo VULTESIUPIILVIAVII CII Step...
55 a couple of scenes later when Mr. Robot is\nwe... a couple of scenes later when Mr. Robot is wea...
56 27k @ 6.2k it, Share Oo\n\nTell Rachel | said ... 27k 6.2k it Share Oo Tell Rachel said Hi. o 9 ...
57 10:01 WF\n4\n\n@ google.com\n\n= Google ©\ndea... 10 01 WF 4 google.com Google deacon fallout 4 ...
In [91]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize.casual import casual_tokenize
from sklearn.decomposition import PCA
 
def get_pca(data, index):
    tfidf = TfidfVectorizer(tokenizer=casual_tokenize)
    tfidf_docs = tfidf.fit_transform(raw_documents=data).toarray()
    # len(tfidf.vocabulary_)

    tfidf_docs = pd.DataFrame(tfidf_docs)
    tfidf_docs = tfidf_docs - tfidf_docs.mean()
    
    

    
    pca = PCA(n_components = 16)
    pca = pca.fit(tfidf_docs)
    pca_topic_vectors = pca.transform(tfidf_docs)
    columns = ['topic{}'.format(i) for i in range(pca.n_components)]
    pca_topic_vectors = pd.DataFrame(pca_topic_vectors, columns = columns, index = index)
    pca_topic_vectors.round(3).head(6)
    column_nums, terms = zip(*sorted(zip(tfidf.vocabulary_.values(), 
                                         tfidf.vocabulary_.keys())))
    weights = pd.DataFrame(pca.components_, columns = terms, 
                       index = ['topic{}'.format(i) for i in range(16)])
    return terms, tfidf.vocabulary_, weights
In [85]:
index = ['sms{}{}'.format(i, '!'*j) for (i,j) in zip(range(len(sms)), sms.spam)]
sms_terms = get_pca(sms.text, index)
In [92]:
index = ['screenshot{}'.format(i) for i in range(len(all_df))]
amazon_terms, vocab, weights = get_pca(all_df.clean, index)
weights
Out[92]:
. . . .. ... 0 00 ... z za zeasorb zenhabits zenmaster zone
topic0 0.229264 0.000309 -0.002214 -0.072994 0.005771 -0.002835 ... -0.023571 -0.007857 -0.003858 -0.005200 0.003483 -0.000979
topic1 0.035297 0.000097 -0.001342 -0.038699 -0.003630 0.007059 ... -0.009151 -0.003050 -0.002411 -0.002667 0.033287 -0.002602
topic2 0.191607 -0.001189 -0.001526 -0.068731 -0.015793 -0.001053 ... -0.018355 -0.006118 0.002594 -0.003201 0.022875 -0.000049
topic3 -0.074492 0.001327 0.001768 0.049124 0.005403 -0.011546 ... 0.014164 0.004721 0.005253 0.001884 0.066187 0.007248
topic4 -0.005874 0.002137 -0.009819 -0.010543 0.003274 -0.001666 ... 0.001710 0.000570 -0.009883 -0.000078 0.043462 -0.016142
topic5 -0.099815 -0.000812 -0.012130 0.051534 0.012976 -0.005842 ... 0.017965 0.005988 0.000202 0.005617 -0.009969 -0.003960
topic6 -0.050869 0.006584 0.017019 -0.045050 -0.000416 0.012273 ... 0.000027 0.000009 -0.030081 -0.004452 -0.007792 -0.023695
topic7 0.090240 -0.002508 0.009830 -0.027049 -0.003833 -0.026049 ... -0.013009 -0.004336 0.042769 -0.014073 -0.000115 0.039698
topic8 0.103289 0.006838 -0.003935 -0.041375 -0.000292 0.025303 ... -0.000249 -0.000083 0.032249 -0.018270 -0.006250 0.030031
topic9 0.051996 0.018029 -0.008903 0.089009 -0.002116 -0.002474 ... -0.004396 -0.001465 0.001231 0.004732 -0.018304 -0.001448
topic10 -0.034733 -0.000817 -0.015604 -0.055551 0.004881 0.015156 ... 0.016759 0.005586 0.011729 0.037515 -0.003970 0.012022
topic11 0.025973 -0.010854 0.016911 0.091862 0.000784 -0.014416 ... 0.000120 0.000040 0.003060 0.025127 0.003604 -0.013492
topic12 -0.013300 -0.018595 -0.013621 0.205099 -0.017900 0.011086 ... -0.062586 -0.020862 0.005835 0.005365 0.003303 0.004563
topic13 -0.060040 0.023455 -0.007398 -0.026656 -0.028915 -0.005444 ... -0.092498 -0.030833 0.007983 0.009303 -0.004882 -0.001985
topic14 0.022227 0.010324 -0.007269 0.009673 -0.015488 -0.027379 ... -0.046882 -0.015627 -0.011103 -0.015333 0.000165 0.000554
topic15 0.018838 0.010179 0.005162 -0.002069 0.023211 0.002405 ... 0.084030 0.028010 -0.001186 -0.013209 -0.002002 0.002364

16 rows × 2896 columns

In [93]:
weights
Out[93]:
. . . .. ... 0 00 ... z za zeasorb zenhabits zenmaster zone
topic0 0.229264 0.000309 -0.002214 -0.072994 0.005771 -0.002835 ... -0.023571 -0.007857 -0.003858 -0.005200 0.003483 -0.000979
topic1 0.035297 0.000097 -0.001342 -0.038699 -0.003630 0.007059 ... -0.009151 -0.003050 -0.002411 -0.002667 0.033287 -0.002602
topic2 0.191607 -0.001189 -0.001526 -0.068731 -0.015793 -0.001053 ... -0.018355 -0.006118 0.002594 -0.003201 0.022875 -0.000049
topic3 -0.074492 0.001327 0.001768 0.049124 0.005403 -0.011546 ... 0.014164 0.004721 0.005253 0.001884 0.066187 0.007248
topic4 -0.005874 0.002137 -0.009819 -0.010543 0.003274 -0.001666 ... 0.001710 0.000570 -0.009883 -0.000078 0.043462 -0.016142
topic5 -0.099815 -0.000812 -0.012130 0.051534 0.012976 -0.005842 ... 0.017965 0.005988 0.000202 0.005617 -0.009969 -0.003960
topic6 -0.050869 0.006584 0.017019 -0.045050 -0.000416 0.012273 ... 0.000027 0.000009 -0.030081 -0.004452 -0.007792 -0.023695
topic7 0.090240 -0.002508 0.009830 -0.027049 -0.003833 -0.026049 ... -0.013009 -0.004336 0.042769 -0.014073 -0.000115 0.039698
topic8 0.103289 0.006838 -0.003935 -0.041375 -0.000292 0.025303 ... -0.000249 -0.000083 0.032249 -0.018270 -0.006250 0.030031
topic9 0.051996 0.018029 -0.008903 0.089009 -0.002116 -0.002474 ... -0.004396 -0.001465 0.001231 0.004732 -0.018304 -0.001448
topic10 -0.034733 -0.000817 -0.015604 -0.055551 0.004881 0.015156 ... 0.016759 0.005586 0.011729 0.037515 -0.003970 0.012022
topic11 0.025973 -0.010854 0.016911 0.091862 0.000784 -0.014416 ... 0.000120 0.000040 0.003060 0.025127 0.003604 -0.013492
topic12 -0.013300 -0.018595 -0.013621 0.205099 -0.017900 0.011086 ... -0.062586 -0.020862 0.005835 0.005365 0.003303 0.004563
topic13 -0.060040 0.023455 -0.007398 -0.026656 -0.028915 -0.005444 ... -0.092498 -0.030833 0.007983 0.009303 -0.004882 -0.001985
topic14 0.022227 0.010324 -0.007269 0.009673 -0.015488 -0.027379 ... -0.046882 -0.015627 -0.011103 -0.015333 0.000165 0.000554
topic15 0.018838 0.010179 0.005162 -0.002069 0.023211 0.002405 ... 0.084030 0.028010 -0.001186 -0.013209 -0.002002 0.002364

16 rows × 2896 columns

In [94]:
deals = weights['skyrim zenhabits'.split()].round(3) * 100
In [95]:
deals
Out[95]:
skyrim zenhabits
topic0 3.6 -0.5
topic1 -0.2 -0.3
topic2 -2.9 -0.3
topic3 0.4 0.2
topic4 0.7 -0.0
topic5 2.3 0.6
topic6 -0.1 -0.4
topic7 0.1 -1.4
topic8 0.1 -1.8
topic9 -0.1 0.5
topic10 0.1 3.8
topic11 0.2 2.5
topic12 0.1 0.5
topic13 -0.1 0.9
topic14 -0.2 -1.5
topic15 -0.5 -1.3
In [98]:
sorted_vocab = sorted(vocab)
In [105]:
sorted_x = sorted(vocab.items(), key=lambda kv: kv[1], reverse=True)
In [106]:
sorted_x
Out[106]:
[('zone', 2895),
 ('zenmaster', 2894),
 ('zenhabits', 2893),
 ('zeasorb', 2892),
 ('za', 2891),
 ('z', 2890),
 ('yw', 2889),
 ('yr', 2888),
 ('yp', 2887),
 ('youtube', 2886),
 ('yourself', 2885),
 ('yours', 2884),
 ('your', 2883),
 ('younger', 2882),
 ('young', 2881),
 ('you', 2880),
 ('yotym', 2879),
 ('yonposd', 2878),
 ('yonpoad', 2877),
 ('yiym', 2876),
 ('yet', 2875),
 ('yesthisiskendra', 2874),
 ('yes', 2873),
 ('yer', 2872),
 ('yellowknife', 2871),
 ('years', 2870),
 ('year', 2869),
 ('yay', 2868),
 ('yadav', 2867),
 ('y8nouy', 2866),
 ('y', 2865),
 ('xx', 2864),
 ('xwjyoum', 2863),
 ('xuqvul', 2862),
 ('xt', 2861),
 ('xn', 2860),
 ('xanthera', 2859),
 ('x', 2858),
 ('www.vagaro.com', 2857),
 ('www.behindthevoiceactors.com', 2856),
 ('www', 2855),
 ('ww', 2854),
 ('wtf', 2853),
 ('wsyusuallyjread', 2852),
 ('wrote', 2851),
 ('wrong', 2850),
 ('written', 2849),
 ('writing', 2848),
 ('writer', 2847),
 ('write', 2846),
 ('wrapper', 2845),
 ('wow', 2844),
 ('wouldn', 2843),
 ('would', 2842),
 ('worwruayo', 2841),
 ('worth', 2840),
 ('worst', 2839),
 ('worse', 2838),
 ('worry', 2837),
 ('worrisome', 2836),
 ('world', 2835),
 ('works', 2834),
 ('workplace', 2833),
 ('working', 2832),
 ('workers', 2831),
 ('worked', 2830),
 ('workaholic', 2829),
 ('work', 2828),
 ('words', 2827),
 ('worded', 2826),
 ('word', 2825),
 ('wonderful', 2824),
 ('wondered', 2823),
 ('won', 2822),
 ('women', 2821),
 ('woman', 2820),
 ('wok', 2819),
 ('wl', 2818),
 ('without', 2817),
 ('within', 2816),
 ('with', 2815),
 ('witch', 2814),
 ('wins', 2813),
 ('window', 2812),
 ('wilson', 2811),
 ('willing', 2810),
 ('will', 2809),
 ('wild', 2808),
 ('wil', 2807),
 ('wikia', 2806),
 ('wiki', 2805),
 ('wights', 2804),
 ('wife', 2803),
 ('wi', 2802),
 ('why', 2801),
 ('whole', 2800),
 ('who', 2799),
 ('whiterose', 2798),
 ('white', 2797),
 ('while', 2796),
 ('whichever', 2795),
 ('which', 2794),
 ('whether', 2793),
 ('wherever', 2792),
 ('where', 2791),
 ('when', 2790),
 ('whatnot', 2789),
 ('whatever', 2788),
 ('what', 2787),
 ('wf', 2786),
 ('westworld', 2785),
 ('west', 2784),
 ('were', 2783),
 ('went', 2782),
 ('well', 2781),
 ('weis', 2780),
 ('weird', 2779),
 ('weight', 2778),
 ('weeks', 2777),
 ('weekly', 2776),
 ('week', 2775),
 ('wearing', 2774),
 ('weapons', 2773),
 ('wealthy', 2772),
 ('we', 2771),
 ('ways', 2770),
 ('way', 2769),
 ('water', 2768),
 ('watching', 2767),
 ('watcher', 2766),
 ('watch', 2765),
 ('wasting', 2764),
 ('waste', 2763),
 ('wasn', 2762),
 ('waslin', 2761),
 ('was', 2760),
 ('warning', 2759),
 ('wants', 2758),
 ('wanted', 2757),
 ('want', 2756),
 ('wandering', 2755),
 ('walk', 2754),
 ('waldvho', 2753),
 ('wake', 2752),
 ('wait', 2751),
 ('wa', 2750),
 ('w', 2749),
 ('vyuninieniso', 2748),
 ('vy', 2747),
 ('vwinvliine', 2746),
 ('vw', 2745),
 ('vv', 2744),
 ('vultesiupiilviavii', 2743),
 ('vote', 2742),
 ('voila', 2741),
 ('voiced', 2740),
 ('voice', 2739),
 ('vite', 2738),
 ('vinegar', 2737),
 ('village', 2736),
 ('videos', 2735),
 ('vibrant', 2734),
 ('via', 2733),
 ('very', 2732),
 ('vervw', 2731),
 ('venezuela', 2730),
 ('veggies', 2729),
 ('veggie', 2728),
 ('ve', 2727),
 ('various', 2726),
 ('varies', 2725),
 ('values', 2724),
 ('vagaro.com', 2723),
 ('vacation', 2722),
 ('v.redd.it', 2721),
 ('v', 2720),
 ('uwin', 2719),
 ('uw', 2718),
 ('utilize', 2717),
 ('utility', 2716),
 ('ut', 2715),
 ('usually', 2714),
 ('usual', 2713),
 ('using', 2712),
 ('useless', 2711),
 ('useful', 2710),
 ('used', 2709),
 ('use', 2708),
 ('usage', 2707),
 ('us', 2706),
 ('urge', 2705),
 ('ura', 2704),
 ('upset', 2703),
 ('upjnoys', 2702),
 ('update', 2701),
 ('upd', 2700),
 ('up', 2699),
 ('uoyig', 2698),
 ('uonoung', 2697),
 ('uoneorydnynu', 2696),
 ('uoizeaadoo', 2695),
 ('uoije', 2694),
 ('uo', 2693),
 ('until', 2692),
 ('unstop', 2691),
 ('unlimited', 2690),
 ('unless', 2689),
 ('university', 2688),
 ('united', 2687),
 ('unit', 2686),
 ('uni', 2685),
 ('underwent', 2684),
 ('understanding', 2683),
 ('understand', 2682),
 ('underneath', 2681),
 ('under', 2680),
 ('unaffected', 2679),
 ('unacceptably', 2678),
 ('umop', 2677),
 ('umnking', 2676),
 ('ume', 2675),
 ('ultra', 2674),
 ('ul', 2673),
 ('uiyiim', 2672),
 ('ued', 2671),
 ('udy', 2670),
 ('ud', 2669),
 ('uateamba', 2668),
 ('u', 2667),
 ('types', 2666),
 ('type', 2665),
 ('txx', 2664),
 ('txn', 2663),
 ('two', 2662),
 ('turtles', 2661),
 ('turning', 2660),
 ('turned', 2659),
 ('turn', 2658),
 ('tuesday', 2657),
 ('tt', 2656),
 ('trying', 2655),
 ('try', 2654),
 ('tropes', 2653),
 ('trope', 2652),
 ('tripled', 2651),
 ('triple', 2650),
 ('trip', 2649),
 ('trioxide', 2648),
 ('trinidad', 2647),
 ('tried', 2646),
 ('trick', 2645),
 ('tribute', 2644),
 ('trial', 2643),
 ('tremor', 2642),
 ('treatment', 2641),
 ('treated', 2640),
 ('treasure', 2639),
 ('tray', 2638),
 ('traveling', 2637),
 ('traveled', 2636),
 ('travel', 2635),
 ('transmission', 2634),
 ('traditional', 2633),
 ('track', 2632),
 ('tr', 2631),
 ('toxic', 2630),
 ('touched', 2629),
 ('toss', 2628),
 ('tors', 2627),
 ('tor', 2626),
 ('topics', 2625),
 ('topical', 2624),
 ('top', 2623),
 ('tools', 2622),
 ('took', 2621),
 ('too', 2620),
 ('tons', 2619),
 ('toniprufrock', 2618),
 ('tonight', 2617),
 ('tolnaftate', 2616),
 ('told', 2615),
 ('together', 2614),
 ('today', 2613),
 ('toasting', 2612),
 ('toastedstapler', 2611),
 ('toa', 2610),
 ('to', 2609),
 ('tna', 2608),
 ('tldr', 2607),
 ('tl', 2606),
 ('tips', 2605),
 ('tip', 2604),
 ('tinactin', 2603),
 ('times', 2602),
 ('timelines', 2601),
 ('timeline', 2600),
 ('time', 2599),
 ('till', 2598),
 ('tile', 2597),
 ('thyroid', 2596),
 ('thursday', 2595),
 ('thujew', 2594),
 ('thudly', 2593),
 ('thru', 2592),
 ('throwing', 2591),
 ('throw', 2590),
 ('throughout', 2589),
 ('through', 2588),
 ('three', 2587),
 ('thoughts', 2586),
 ('thought', 2585),
 ('though', 2584),
 ('those', 2583),
 ('thos', 2582),
 ('thoroughly', 2581),
 ('thorough', 2580),
 ('this', 2579),
 ('third', 2578),
 ('thinned', 2577),
 ('thinks', 2576),
 ('thinking', 2575),
 ('think', 2574),
 ('things', 2573),
 ('thing', 2572),
 ('thighs', 2571),
 ('they', 2570),
 ('these', 2569),
 ('there', 2568),
 ('therapy', 2567),
 ('therapist', 2566),
 ('theory', 2565),
 ('theories', 2564),
 ('then', 2563),
 ('themselves', 2562),
 ('them', 2561),
 ('their', 2560),
 ('theemotionmac', 2559),
 ('thedudeabides', 2558),
 ('thechubbynerd', 2557),
 ('the', 2556),
 ('thats', 2555),
 ('that', 2554),
 ('thanks', 2553),
 ('thank', 2552),
 ('than', 2551),
 ('text', 2550),
 ('tests', 2549),
 ('test', 2548),
 ('territories', 2547),
 ('terrifying', 2546),
 ('terrible', 2545),
 ('term', 2544),
 ('teriyaki', 2543),
 ('terestine', 2542),
 ('tend', 2541),
 ('tell', 2540),
 ('telescope', 2539),
 ('technology', 2538),
 ('technical', 2537),
 ('teams', 2536),
 ('teammate', 2535),
 ('teamed', 2534),
 ('team', 2533),
 ('teaching', 2532),
 ('teach', 2531),
 ('tea', 2530),
 ('te', 2529),
 ('tdgonex', 2528),
 ('tb', 2527),
 ('taxes', 2526),
 ('taught', 2525),
 ('target', 2524),
 ('talks', 2523),
 ('talking', 2522),
 ('talk', 2521),
 ('taking', 2520),
 ('takes', 2519),
 ('taken', 2518),
 ('take', 2517),
 ('tailings', 2516),
 ('ta', 2515),
 ('t', 2514),
 ('syuduita', 2513),
 ('systems', 2512),
 ('system', 2511),
 ('syracuse', 2510),
 ('sy', 2509),
 ('swept', 2508),
 ('suuuliiis', 2507),
 ('suunseayy', 2506),
 ('sutsodsuen', 2505),
 ('survive', 2504),
 ('surroundings', 2503),
 ('surppe', 2502),
 ('sure', 2501),
 ('surdidynur', 2500),
 ('suppose', 2499),
 ('support', 2498),
 ('supply', 2497),
 ('super', 2496),
 ('suorsuaump', 2495),
 ('sunlight', 2494),
 ('sunken', 2493),
 ('summoning', 2492),
 ('summer', 2491),
 ('sulfide', 2490),
 ('suggestions', 2489),
 ('sugar', 2488),
 ('suffering', 2487),
 ('suddenly', 2486),
 ('sudden', 2485),
 ('such', 2484),
 ('subverting', 2483),
 ('subversion', 2482),
 ('subjects', 2481),
 ('subject', 2480),
 ('sub', 2479),
 ('style', 2478),
 ('stupid', 2477),
 ('stuoyidg', 2476),
 ('stuffed', 2475),
 ('stuff', 2474),
 ('studying', 2473),
 ('study', 2472),
 ('studies', 2471),
 ('studied', 2470),
 ('stuck', 2469),
 ('structures', 2468),
 ('strongest', 2467),
 ('strong', 2466),
 ('string', 2465),
 ('striking', 2464),
 ('street', 2463),
 ('strawberry', 2462),
 ('strategic', 2461),
 ('strange', 2460),
 ('strang', 2459),
 ('straight', 2458),
 ('story', 2457),
 ('storm', 2456),
 ('storing', 2455),
 ('stored', 2454),
 ('store', 2453),
 ('stopped', 2452),
 ('stopdrinking', 2451),
 ('stop', 2450),
 ('stock', 2449),
 ('still', 2448),
 ('stewardship', 2447),
 ('stern', 2446),
 ('steps', 2445),
 ('stephen', 2444),
 ('step', 2443),
 ('stein', 2442),
 ('steam', 2441),
 ('stealth', 2440),
 ('stealing', 2439),
 ('stay', 2438),
 ('statistical', 2437),
 ('stations', 2436),
 ('station', 2435),
 ('states', 2434),
 ('statements', 2433),
 ('state', 2432),
 ('starts', 2431),
 ('starting', 2430),
 ('started', 2429),
 ('start', 2428),
 ('star', 2427),
 ('staph', 2426),
 ('standing', 2425),
 ('standby', 2424),
 ('stamina', 2423),
 ('stack', 2422),
 ('st', 2421),
 ('ssouy', 2420),
 ('ssi', 2419),
 ('ssajun', 2418),
 ('srepy', 2417),
 ('sreereeeee', 2416),
 ('spot', 2415),
 ('sponsorship', 2414),
 ('sponsored', 2413),
 ('spokesman.com', 2412),
 ('spj', 2411),
 ('spiritual', 2410),
 ('spiral', 2409),
 ('spent', 2408),
 ('spending', 2407),
 ('spend', 2406),
 ('spells', 2405),
 ('spectrum', 2404),
 ('specific', 2403),
 ('spat', 2402),
 ('spanning', 2401),
 ('space', 2400),
 ('soy', 2399),
 ('southern', 2398),
 ('sources', 2397),
 ('source', 2396),
 ('sound', 2395),
 ('soulgems', 2394),
 ('soul', 2393),
 ('sorts', 2392),
 ('sort', 2391),
 ('soothing', 2390),
 ('soon', 2389),
 ('sonpoid', 2388),
 ('song', 2387),
 ('sone', 2386),
 ('sometimes', 2385),
 ('something', 2384),
 ('someone', 2383),
 ('somehow', 2382),
 ('some', 2381),
 ('solving', 2380),
 ('solved', 2379),
 ('solve', 2378),
 ('solutions', 2377),
 ('solution', 2376),
 ('soluble', 2375),
 ('solitude', 2374),
 ('soldiering', 2373),
 ('software', 2372),
 ('soften', 2371),
 ('sodleut', 2370),
 ('so', 2369),
 ('snorlax', 2368),
 ('snok', 2367),
 ('sneaky', 2366),
 ('sneaking', 2365),
 ('sneak', 2364),
 ('smoking', 2363),
 ('smokey', 2362),
 ('smithing', 2361),
 ('smile', 2360),
 ('smelting', 2359),
 ('smart', 2358),
 ('small', 2357),
 ('slow', 2356),
 ('slipped', 2355),
 ('slinkyslang', 2354),
 ('slaves', 2353),
 ('skyrim', 2352),
 ('skydiving', 2351),
 ('sky', 2350),
 ('skuldafn', 2349),
 ('skin', 2348),
 ('skills', 2347),
 ('skesae', 2346),
 ('sjuswata', 2345),
 ('sjeypess', 2344),
 ('sj', 2343),
 ('size', 2342),
 ('siy', 2341),
 ('sixty', 2340),
 ('sity', 2339),
 ('situations', 2338),
 ('situation', 2337),
 ('sitcom', 2336),
 ('sistant', 2335),
 ('sissy', 2334),
 ('single', 2333),
 ('singing', 2332),
 ('singer', 2331),
 ('sing', 2330),
 ('sindjno', 2329),
 ('since', 2328),
 ('sinc', 2327),
 ('simulation', 2326),
 ('simpsons', 2325),
 ('simpson', 2324),
 ('simple', 2323),
 ('similar', 2322),
 ('silly', 2321),
 ('signed', 2320),
 ('signal', 2319),
 ('sign', 2318),
 ('sighted', 2317),
 ('sick', 2316),
 ('si0q994', 2315),
 ('si', 2314),
 ('shows', 2313),
 ('showering', 2312),
 ('shower', 2311),
 ('showed', 2310),
 ('show', 2309),
 ('shout', 2308),
 ('shouldn', 2307),
 ('should', 2306),
 ('shots', 2305),
 ('shot', 2304),
 ('shortly', 2303),
 ('short', 2302),
 ('shopping', 2301),
 ('shock', 2300),
 ('sho', 2299),
 ('shit', 2298),
 ('ship', 2297),
 ('shine', 2296),
 ('she', 2295),
 ('shared', 2294),
 ('share', 2293),
 ('shampoos', 2292),
 ('shampoo', 2291),
 ('shall', 2290),
 ('sgate', 2289),
 ('seyeos', 2288),
 ('several', 2287),
 ('setting', 2286),
 ('set', 2285),
 ('sesame', 2284),
 ('serrano', 2283),
 ('seriously', 2282),
 ('series', 2281),
 ('serendipitybot', 2280),
 ('serendipity', 2279),
 ('sepess', 2278),
 ('sentence', 2277),
 ('sense', 2276),
 ('senior', 2275),
 ('senate', 2274),
 ('selsun', 2273),
 ('selfish', 2272),
 ('self', 2271),
 ('selenium', 2270),
 ('seen', 2269),
 ('seems', 2268),
 ('seemed', 2267),
 ('seem', 2266),
 ('seeking', 2265),
 ('seek', 2264),
 ('seeing', 2263),
 ('see', 2262),
 ('section', 2261),
 ('second', 2260),
 ('seat', 2259),
 ('season', 2258),
 ('searched', 2257),
 ('search', 2256),
 ('se', 2255),
 ('sculpture', 2254),
 ('scrignutz', 2253),
 ('screen', 2252),
 ('screaming', 2251),
 ('scientists', 2250),
 ('scientist', 2249),
 ('sciencedaily.com', 2248),
 ('science', 2247),
 ('schwartz', 2246),
 ('school', 2245),
 ('schemes', 2244),
 ('schemer', 2243),
 ('schedules', 2242),
 ('scenes', 2241),
 ('scary', 2240),
 ('scarf', 2239),
 ('scales', 2238),
 ('scale', 2237),
 ('says', 2236),
 ('saying', 2235),
 ('say', 2234),
 ('saw', 2233),
 ('saver', 2232),
 ('save', 2231),
 ('sauce', 2230),
 ('satire', 2229),
 ('sat', 2228),
 ('sanity', 2227),
 ('san', 2226),
 ('same', 2225),
 ('sam', 2224),
 ('salmon', 2223),
 ('salesman', 2222),
 ('sales', 2221),
 ('sale', 2220),
 ('salary', 2219),
 ('salaries', 2218),
 ('sake', 2217),
 ('said', 2216),
 ('safe', 2215),
 ('sad', 2214),
 ('sacrifice', 2213),
 ('s2', 2212),
 ('s10', 2211),
 ('s1', 2210),
 ('s', 2209),
 ('ryder', 2208),
 ('ryan', 2207),
 ('running', 2206),
 ('run', 2205),
 ('ruin', 2204),
 ('rough', 2203),
 ('roseanne', 2202),
 ('rose', 2201),
 ('rop', 2200),
 ('roots', 2199),
 ('roosevelt', 2198),
 ('room', 2197),
 ('rookhunter', 2196),
 ('roles', 2195),
 ('role', 2194),
 ('robot', 2193),
 ('river', 2192),
 ('ritual', 2191),
 ('risk', 2190),
 ('ripping', 2189),
 ('rinsing', 2188),
 ('rinse', 2187),
 ('rigorous', 2186),
 ('right', 2185),
 ('rico', 2184),
 ('ricky', 2183),
 ('rich', 2182),
 ('rice', 2181),
 ('rey', 2180),
 ('revisits', 2179),
 ('reviews', 2178),
 ('reviewed', 2177),
 ('review', 2176),
 ('returned', 2175),
 ('retirement', 2174),
 ('results', 2173),
 ('result', 2172),
 ('restricted', 2171),
 ('restore', 2170),
 ('restoration', 2169),
 ('restingwitchfacials', 2168),
 ('resting', 2167),
 ('restaurant', 2166),
 ('rest', 2165),
 ('responsible', 2164),
 ('responsibilities', 2163),
 ('respected', 2162),
 ('respect', 2161),
 ('resources', 2160),
 ('resistant', 2159),
 ('researching', 2158),
 ('research', 2157),
 ('rescue', 2156),
 ('reread', 2155),
 ('requirements', 2154),
 ('required', 2153),
 ('repy', 2152),
 ('represent', 2151),
 ('repost', 2150),
 ('reports', 2149),
 ('report', 2148),
 ('reply', 2147),
 ('replies', 2146),
 ('replied', 2145),
 ('replenish', 2144),
 ('removed', 2143),
 ('remote', 2142),
 ('reminder', 2141),
 ('remind', 2140),
 ('remembered', 2139),
 ('remain', 2138),
 ('relying', 2137),
 ('rely', 2136),
 ('reload', 2135),
 ('religious', 2134),
 ('relevant', 2133),
 ('relationship', 2132),
 ('relations', 2131),
 ('related', 2130),
 ('reinforced', 2129),
 ('regarding', 2128),
 ('refused', 2127),
 ('refresher', 2126),
 ('refrain', 2125),
 ('refracting', 2124),
 ('reference', 2123),
 ('redux', 2122),
 ('reduce', 2121),
 ('redbubble', 2120),
 ('red', 2119),
 ('recursive', 2118),
 ('records', 2117),
 ('recommended', 2116),
 ('recommendations', 2115),
 ('recommend', 2114),
 ('recently', 2113),
 ('received', 2112),
 ('recc', 2111),
 ('recall', 2110),
 ('reasonable', 2109),
 ('reason', 2108),
 ('realtor', 2107),
 ('really', 2106),
 ('realize', 2105),
 ('real', 2104),
 ('ready', 2103),
 ('reading', 2102),
 ('readily', 2101),
 ('readers', 2100),
 ('read', 2099),
 ('react', 2098),
 ('re', 2097),
 ('ratio', 2096),
 ('rate', 2095),
 ('rata', 2094),
 ('ras', 2093),
 ('range', 2092),
 ('randomthoughts', 2091),
 ('ramifications', 2090),
 ('ralph', 2089),
 ('rallycoding', 2088),
 ('raise', 2087),
 ('rainbows', 2086),
 ('rain', 2085),
 ('radioactive', 2084),
 ('radio', 2083),
 ('rachel', 2082),
 ('rabbit', 2081),
 ('r', 2080),
 ('quote', 2079),
 ('quite', 2078),
 ('quirks', 2077),
 ('quick', 2076),
 ('questions', 2075),
 ('question', 2074),
 ('quest', 2073),
 ('quarter', 2072),
 ('quantitative', 2071),
 ('quantifiable', 2070),
 ('qualitative', 2069),
 ('qualify', 2068),
 ('quaiaiya', 2067),
 ('q', 2066),
 ('python', 2065),
 ('pyrosnakel', 2064),
 ('puzzle', 2063),
 ('put', 2062),
 ('push', 2061),
 ('pus', 2060),
 ('purpose', 2059),
 ('purely', 2058),
 ('puodas', 2057),
 ('pulled', 2056),
 ('pueylioyus', 2055),
 ('puerto', 2054),
 ('puejsiopun', 2053),
 ('pue', 2052),
 ('published', 2051),
 ('publicfigurex', 2050),
 ('publications', 2049),
 ('public', 2048),
 ('pto', 2047),
 ('psychedelic', 2046),
 ('psom', 2045),
 ('ps3', 2044),
 ('proven', 2043),
 ('prove', 2042),
 ('protested', 2041),
 ('protective', 2040),
 ('prospects', 2039),
 ('propylene', 2038),
 ('proprietary', 2037),
 ('proof', 2036),
 ('pronounced', 2035),
 ('prompted', 2034),
 ('promise', 2033),
 ('prologues', 2032),
 ('projects', 2031),
 ('project', 2030),
 ('progress', 2029),
 ('prograr', 2028),
 ('programming', 2027),
 ('program', 2026),
 ('professor', 2025),
 ('professional', 2024),
 ('products', 2023),
 ('product', 2022),
 ('processes', 2021),
 ('proceeded', 2020),
 ('proceed', 2019),
 ('problems', 2018),
 ('problem', 2017),
 ('probably', 2016),
 ('pro', 2015),
 ('private', 2014),
 ('prism', 2013),
 ('prioritize', 2012),
 ('prior', 2011),
 ('printout', 2010),
 ('printing', 2009),
 ('principal', 2008),
 ('princeton', 2007),
 ('princess', 2006),
 ('primary', 2005),
 ('prices', 2004),
 ('price', 2003),
 ('prevention', 2002),
 ('pretty', 2001),
 ('presenting', 2000),
 ('preparing', 1999),
 ('prepared', 1998),
 ('prefer', 1997),
 ('predict', 1996),
 ('preacher', 1995),
 ('pre', 1994),
 ('practicing', 1993),
 ('practices', 1992),
 ('practice', 1991),
 ('powerful', 1990),
 ('powered', 1989),
 ('pound', 1988),
 ('potions', 1987),
 ('potentially', 1986),
 ('potential', 1985),
 ('potatoes', 1984),
 ('pot', 1983),
 ('posted', 1982),
 ('post', 1981),
 ('possibility', 1980),
 ('posse', 1979),
 ('positive', 1978),
 ('position', 1977),
 ('portfolios', 1976),
 ('portfolio', 1975),
 ('porch', 1974),
 ('popular', 1973),
 ('poor', 1972),
 ('ponds', 1971),
 ('pon', 1970),
 ('politicians', 1969),
 ('politician', 1968),
 ('policy', 1967),
 ('points', 1966),
 ('point', 1965),
 ('pocket', 1964),
 ('pm', 1963),
 ('pling', 1962),
 ('plenty', 1961),
 ('please', 1960),
 ('playstyle', 1959),
 ('playing', 1958),
 ('play', 1957),
 ('platforms', 1956),
 ('platform', 1955),
 ('planet', 1954),
 ('places', 1953),
 ('placebo', 1952),
 ('place', 1951),
 ('pipe', 1950),
 ('pills', 1949),
 ('piens', 1948),
 ('pieces', 1947),
 ('piece', 1946),
 ('picture', 1945),
 ('picking', 1944),
 ('pick', 1943),
 ('physics', 1942),
 ('phrases', 1941),
 ('phrase', 1940),
 ('phone', 1939),
 ('phoenix', 1938),
 ('philosophy', 1937),
 ('philip', 1936),
 ('phd', 1935),
 ('ph', 1934),
 ('peutiou', 1933),
 ('pet', 1932),
 ('pessimistic', 1931),
 ('perspective', 1930),
 ('personality', 1929),
 ('personalities', 1928),
 ('person', 1927),
 ('permutations', 1926),
 ('permafrost', 1925),
 ('perks', 1924),
 ('perk', 1923),
 ('period', 1922),
 ('performing', 1921),
 ('perform', 1920),
 ('perfect', 1919),
 ('percentage', 1918),
 ('peppers', 1917),
 ('pepper', 1916),
 ('people', 1915),
 ('peay', 1914),
 ('pdf', 1913),
 ('pd', 1912),
 ('pcs', 1911),
 ('paz', 1910),
 ('paysydurosse', 1909),
 ('pay', 1908),
 ('pausing', 1907),
 ('patrick', 1906),
 ('patients', 1905),
 ('pat', 1904),
 ('pasting', 1903),
 ('past', 1902),
 ('passenger', 1901),
 ('passed', 1900),
 ('pass', 1899),
 ('particularly', 1898),
 ('participates', 1897),
 ('part', 1896),
 ...]
In [120]:
all_text = ' '.join(all_df['clean'].values)
In [123]:
len(all_text)
all_text
Out[123]:
' iS 11 23 1 Q Search If shine a white LED light through a prism would see a spectrum or would see a red line a green line and a blue line I ve been thinking about this since got some of those little window hangers that put shine little rainbows into your room by refracting dispersing sunlight but don t have a white LED light bright enough to actually see anything if shine... 3.9k 171 Share r ZenHabits w u Lightfiend 14h Question Your Feelings The simple act of thinking about your emotions automatically distances yourself from them and makes them less powerful which then enables you to more easily step back analyze and channel the emotion in a more positive and constructive way. r theemotionmac 140 1 Share r Serendipity u serendipitybot 8h Cheating cunt X Post From r ImGoingToHellForThis v.redd.it 16 1 Share ee 6 51 . 0 forums.nexusmods.com ee BrettM fosaym 617 Apr 2012 Stealth archery works great there for me. It takes a long time but find can sneak around and pick off most of the draugr one at a time without much problem especially using the Eagle Eye perk to get some good long range shots at them. At your level you ought to be able to take out a Deathlord with a couple of sneak shots or even one shot them if you have the triple bow damage sneak perk. One thing that helps is to not go until dusk to maximize your sneak potential. If you arrive in daylight just crouch as soon as you dismount and use the T key to wait for dark. You should be far enough away from the first foes to do that or have enough room to back away until you can wait. One of my characters was so sneaky the dragons wouldn t even notice him at all in the dark. finally had to shout a quick Fus to get their attention so could get the battle started. After had taken out all the nearby draugr of course. You ought to either be able to pick off a couple of the closest dragur before the first dragon shows up or be able to engage that dragon far enough away not to draw the attention of the draugr. What sort of bow and arrows are you using and what enchant do you have on the bow My light armor characters are very fond of the elven bow which has a nice weight damage ratio and can be improved to the max with only a couple of perks invested in Smithing. Use the best arrows that you can get a good supply of right now which should hopefully be Ebony or at least Glass. Elrindir in the Drunken Huntsman is the best source of arrows in all of Skyrim. He often carries 40 or more of the best arrows that a merchant will offer to a character of a given level. LD 6 52 al Se forums.nexusmods.com 17 Apr 2012 Stealth archery works great there for me. It takes a long time but find can sneak around and pick off most of the draugr one at a time without much problem especially using the Eagle Eye perk to get some good long range shots at them. At your level you ought to be able to take out a Deathlord with a couple of sneak shots or even one shot them if you have the triple bow damage sneak perk. One thing that helps is to not go until dusk to maximize your sneak potential. If you arrive in daylight just crouch as soon as you dismount and use the T key to wait for dark. You should be far enough away from the first foes to do that or have enough room to back away until you can wait. One of my characters was so sneaky the dragons wouldn t even notice him at all in the dark. finally had to shout a quick Fus to get their attention so could get the battle started. After had taken out all the nearby draugr of course. You ought to either be able to pick off a couple of the closest dragur before the first dragon shows up or be able to engage that dragon far enough away not to draw the attention of the draugr. What sort of bow and arrows are you using and what enchant do you have on the bow My light armor characters are very fond of the elven bow which has a nice weight damage ratio and can be improved to the max with only a couple of perks invested in Smithing. Use the best arrows that you can get a good supply of right now which should hopefully be Ebony or at least Glass. Elrindir in the Drunken Huntsman is the best source of arrows in all of Skyrim. He often carries 40 or more of the best arrows that a merchant will offer to a character of a given level. like Using Sho ce eee Eade anchants 4 06 aw Fe spokesman.com 4 ILULL. LL 1S SUUULIIIS aAlLLU CULILALLIS propylene glycol and cetyl alcohol which have antifungal activity. One young woman said it was as effective for her as for men. Some people benefit from applying dandruff shampoo such as Selsun Blue to the area. Lather let it stay on the affected zone for five minutes and then rinse. Selenium sulfide the active ingredient has antifungal activity. Once the inflamed skin has healed you may want to try applications of vinegar or original amber Listerine. Listerine contains herbal oils that fight fungus while vinegar makes the skin too acidic to be hospitable to the fungus. Antiperspirant on the groin area can help keep it dry and discourage fungus overgrowth as well. Q Oo  Parents do not own their children. No one owns anything it s just on loan for the duration of the lifetime. Kids are not slaves just because someone donated their genetic material kids are individuals. Kids are not carbon copies of parents adoptive parents understand this but biological parents are always too slow to catch up to this. Kids have their own life their own likes and dislikes and parents have a job of ensuring that their kids are independent enough to survive on their own. Not sure about the spiritual extent of your beliefs but kids have a purpose too. Many parents in about a second can tell what is it exactly about their kids that drives them crazy and it is my belief that kids are here to teach parent a lesson. Ifa parent is lacking discipline kids will only push this person to the limit so that this parent will involuntarily face the need to change. If a parent is being hypocritical Do as I say and not as I do kids will pick this up and mock the parent with it often to the amusement of others. If a parent is indecisive kids would put this person in many situations where this person would have to make many immediate decisions on the spot. If a person is a workaholic chances are that the child will break this parent s heart by offering this person some money to come home early to have dinner with the family. Kids are using parents to test how the world works parents are the child s world at least at the beginning and kids are initially dependent on the parents to survive. This soon changes and if parents are not adjusting they will find themselves all alone without any relationship with their kids. Ownership applies toa commodity anda child s life is not one of them. o changed shampoos cut out dairy litres of water cut out fast food but had no change on my folliculitis. Finally took antibiotic pills made it completely gone but came back worse after. Folliculitis can be bacterial or fungal idk what mine was but imagined that mine was the worse like MRSA. So searched about antiseptics agents on MRSA a methicillin resistant staph usually antibiotics dont work the kind that hospitals used to keep their stations or tools clean. Found research papers saying things like chlorhexidine inhibits mrsa which is good. Then thought about Dettol a 4.8 choloroxylenol solution that is multipurpose and remembered that my grandpa bathes in dettol so thought that it must be safe right . Then remembered a post from guy saying he used Clorox to cure his folliculitis and it worked. Then searched clorox and choloroxylenol on mrsa . There was a study that showed how those two ingredients inhibits MRSA. Both of it has a pH of 12 13 super alkali know So a week ago started showering with 1 caps full of dettol diluted with 500m of water every day and since my hair is super oily didn t mind rinsing my hair every day and my folliculitis is finally gone like completely. Now I m just gonna rinse with the solution every other day and see what happens next. Will update more on my progress Vote J Bi it Share Oo M Add acomment WEIS 2P 40p o2ua8 yp i deyiaao SpJ0M Jo 3eq suunseayy z e 7 0 Aeazze YOTyM T T edeysez za dn aur suorsuaump IZUUT I19q Os uO Aq XN si0q994 UWIN OD OM UO 10 e19d0 pueylioYUs sity asn Ud NOA sodLeUT NXT 10 TXN O1UI pausing aq ued s10 99A Te adUTS Ioye19do ay 10 uonoung Thujew du ay yw Adumu ut paysydurosse oq ued YIYM fonpoud xwjyoum ayi 01 UaTeamba st yonposd 0p oy di XUQVUL XT UIYIIM Inpod sepess nod sindjno L tT T edeysez ta ssi oT Is 9 Sutsodsuen auijedid anoA uMop Mojs 03 juem noX ssajun Aem siy 10 29A Oz Y8nouy 9321331 3 Upjnoys Noy A Ta dt2 ut Z xX TxX 203 7X x ae ae O quaiaiya Aida Si ey UOIZeAadoO paZ140 22A CUE Chae aee e si skesae Aduinu jo uoije 1 d19jnNW Oz on T ROP TA LP c z Bi2e du pd hA Le T Aez21e du pd A st onpoid jop JeYM puejsIopun Nok ams ayeur 01 peay stuoyidg Inod ul UNI UPd nod jaddius uoyig s d19 sonpoid uoneorydnynu peutiou ssouy dn Surppe Udy pue 10199A puodas B JO sjUsWaTa BY TB Aq 10399A 9UO Jo SyUDUITA UW Te surdidynur hq parepnoyes aq ura yonpoad seyeos ayy Aq indjno onyea sJeypess ayy worwruayo psom Cunjnqnooa snok pling WaldvHO casispie hugealienpie thechubbynerd just shower thoughts Contractions function almost identically to the full two word phrase but are only appropriate in some places in a sentence. It s one of the weird quirks of this language we ve. This post needs some kind of warning sign. Idid not see that coming. Some people say the English language is confusing. To which Isay... It s. That s the kind of linguist m. 527k J 173k it Share Oo elfmere 16h Why do you work in a factory when you have an honors degree in physics.... Repy 83k J DillingerRadio 12h 2 Awards worked in a Home Depot a long time ago. distinctly recall a coworker of mine who worked in the tile department and was a bit eccentric. loved talking to the guy real great personality real outgoing super knowledgeable about just about anything he put his hands on. Ina discussion we were having at one point he mentioned his education a Master s in physics. Then also that he had several other master degrees lol. definitely asked him what the hell he was doing working at a Home Depot and he turned to me and said Dillinger sometimes you gotta do what you love. He went on to mention how he just didn t enjoy working in any of the fields in which his degrees were despite loving the various subjects. He just straight up loved tile and manual labor working with his hands. He wasn t the only one either. One of my hardware workers was a pretty great old man who didn t understand the definition of break and would work despite me having to remind him he was legally required to stop throughout the day lol. asked him one time vy what he was doing working Home Depot sinc he was of retirement age and he mentioned he aniuildhin ctannad vwinvliine Annadanc ancn cinnn 6 55 at google.com h Google strange women lying in ponds distrib X ALL IMAGES SHOPPING VIDEOS NEWS Latest GIF HD Product Gy GP moistened bint monty python holy grail Sponsored 24.99 17.91 19.9 Strange Women Lying Strange Women Lying Strang In Ponds Distributing... in Ponds Distributing... In Pon Etsy Redbubble Turtles STRANGE WOMEN AAI Ca 1 51 Mail glassdoor.com as GitHub Policy Details We encourage Hubbers to build amazing things with a high level of autonomy and self direction. Work life balance is important to us which is why we offer flexible work schedules and unlimited PTO. We believe that if a job allows for it people should work wherever they re happiest. Where Hiring Remote San Francisco CA What Roles Event Sponsorship Marketing Manager Technical Support Account Manager Director of Global Sales Development Manager of Workplace Operations Creative Director Strategic Finance Analyst Field Marketing Manager Remote Senior Analyst of Data Analytics Public Relations Manager more. What Employees Say Health insurance is the best I ve ever had. Not only does GitHub absorb the costs they also contribute to the HSA account Caaimictiijabietletbfeleiaesy Match 401K  fantasywriters u SlinkySlang 3h Writing advice...from a new writer I ve been following this sub for some time now... guess since February when started my first draft. One thing see most here is critiques and opinions on first chapters or prologues. 1 500 words into a manuscript and people are seeking feedback. Please stop. For the sake of your work stop. I m 24 000 words in and haven t reread a single word but ican assure you it s mostly garbage. Ona first go through yours probably is to. But the real magic is soldiering through and finishing that first draft setting it aside for a few weeks and coming back and saying WTF did do here and then making it into something amazing. For the sake of your self esteem your sanity and the completion of your manuscript stop asking for constant feedback until you finish that first draft. 31 12 Share BEST COMMENTS thudly 2h People shouldn t use their writing as a source of comfort for their own insecurities. This is what M 4 Add acomment 10 21 wil Mail jobs.capitalgroup.com Responsibilities Work in a team of Analysts Research Director Senior Business Manager and Portfolio Managers to deliver high impact applied research in support of multi asset portfolios solutions platforms. Research develop and maintain relevant analytical frameworks and quantitative models Contribute to initiate deep dive foundational and or solutions research that cuts across many portfolios and geographies US Europe and Asia Utilize industry publications industry databases internal proprietary databases and additional internal resources as needed to compile research Bring and apply domain knowledge to the broad research topics Identify new areas for research and find creative ways to solve problems as they apply to monitoring attribution asset allocation and portfolio construction Participates in development of computing data platform effort to support and enhance research processes in a collaborative environment Skills Requirements A minimum of 5 10 years of relevant investment experience involving building asset allocation and portfolio construction solutions using multiple asset classes spanning global equity and fixed income. Proven analytical and problem solving skills with strong understanding of time series analysis portfolio theory optimization monte carlo simulation and statistical models. Demonstrates skills with the ability to develop recommendations and support assertions with relevant quantitative and qualitative data. Strong written aae Sreereeeee skills with ability 177k 3.9k it Share Oo GalacticPingvin 3h lemlang 3h atlienk 2h Not me but a female friend knew 4 6 weeks into marriage. Her husband at the time basically spent a few years prior to marriage being a bit of a salesman to everyone. Once they got behind closed doors his traditional marriage roots really came out. They both had fast moving careers and while he was ok with her making money and having a career he also expected her to cook clean take care of the dogs etc. and never lifted a finger. He would just come home and pound drinks until dinner time. Reply 40k grumblell 2h This is a good reason why you should live with someone before you get married to them it s a trial period where you learn a lot about the other person. o 9 2k 4 Piens Haed 2h And take at least one vacation together. It s amazing what you learn about someone when they are outside their usual surroundings. Add acomment a. Ricky Montgomery am upset with my parents for making me exist. u just decided to make a person one day who s gonna pay my bills me didn t ask for this ig nyx5 i prefer guys who make small dick jokes about themselves over guys who make big dick jokes about themselves cg driad got a medium dick It can talk to ghosts eA princess laya nearly spat out my tea 427 27 it Share Oo mrtrouble22 4h Believer i read one story where a woman Saw a grey carrying 2 bigfoot babies. insinuating that they were being studied by the greys same as us. o 5 44 scrignutz 2h This combination of weird lights in the sky and monsters has been happening for a long long time. It s something on Earth don t know why we assume an extra very complicated layer like they re also space beings that traveled here from other star systems etc. o 5 34 EternalFuneral88 9m Cool point I ve often wondered myself if maybe these things are Earth bound and not from outer space at all. They just hide themselves very well it would make sense that a lot of reports of abductions mention that the Greys tell them we need to take care of our planet. Could it be because it s their planet too Never know. So F Vote AjarRaccoon 4h love Lyle blackburn. got to meet him ata Bigfoot convention and he signed something for me. Go F Vite  6 52 al Se forums.nexusmods.com 4 as Soon as you dismount and use the Key to wait tor dark. You should be far enough away from the first foes to do that or have enough room to back away until you can wait. One of my characters was so sneaky the dragons wouldn t even notice him at all in the dark. finally had to shout a quick Fus to get their attention so could get the battle started. After had taken out all the nearby draugr of course. You ought to either be able to pick off a couple of the closest dragur before the first dragon shows up or be able to engage that dragon far enough away not to draw the attention of the draugr. What sort of bow and arrows are you using and what enchant do you have on the bow My light armor characters are very fond of the elven bow which has a nice weight damage ratio and can be improved to the max with only a couple of perks invested in Smithing. Use the best arrows that you can get a good supply of right now which should hopefully be Ebony or at least Glass. Elrindir in the Drunken Huntsman is the best source of arrows in all of Skyrim. He often carries 40 or more of the best arrows that a merchant will offer to a character of a given level. like using Shock damage Stamina damage enchants on my bow because it has the best all around utility against all types of foes. If you don t yet have the Dual Effect enchanting perk just go with the Shock damage. Fire damage would be better against the draugr but the dragons are likely to be resistant to it and they are your worst worry. Q oO  11 54 a eG sciencedaily.com The medical community used to think that obesity was a result of consuming too many calories. However a series of studies over the past decade has confirmed that the microbes living in our gut are not only associ ated with obesity but also are one of the causes said Hariom Yadav Ph.D. lead author of the review and as sistant professor of molecular medicine at Wake Forest School of Medicine part of Wake Forest Baptist. In the United States the percentage of children and adolescents affected by obesity has more than tripled since the 1970s according to the Centers for Disease Control and Prevention. Obesity is increasing at 2.3 rate each year among school aged children which is unacceptably high and indicates worrisome prospects for the next generation s health the article states. Yadav s manuscript published in the current issue of the journal Obesity Reviews reviewed existing studies animal and human on how the interaction between gut microbiome and immune cells can be passed from mother to baby as early as gestation and can con tribute to childhood obesity. The review also described how a mother s health diet exercise level antibiotic use birth method natural or cesarean and feeding method formula or breast milk can affect the risk of obesity in her children. This compilation of current research should be very useful for doctors nutritionists and dietitians to dis cuss with their patients because so many of these fac tors can be changed if people have enough good infor mation Yadav said. We also wanted to identify gaps in the science for future research. In addition. havi nn of the role of  lore 54352452524 deactivated201 you can replenish your health by... drinking water breathing fresh air eating a wild strawberry... you can restore your mana by... listening to folk music breathing fresh air applies to this one as well... taking a walk... nerdgasrnz suddenly feel less pessimistic about these suggestions for improving mental health purely bc it s worded like this xanthera therapy is a longterm ritual to deal with debuffs 4128 8 it Share We Datadevourer 3h Recently started using Codewar for practicing and kinda like it. Sometimes get stuck then think then try again and if it takes too much time try googling to find sone clue and then try and voila So think time is an asset for everyone and we can t rely on ourselves always we should sometimes seek for help to save some time. However it s not at all good to rely on others all the time copy pasting may help you at that very time but not always . All the best and Happy learning Reply Vote J easyncheesy lh try to clearly and logically map out intermediate and end goals. While this might sound obvious many times have to actively take this step since sometimes just dive into the problem without thinking about it and end up wasting a lot of time as spiral into problem solving. With the goals in mind then proceed to work don t spend too much time trying to get the exact and perfect solutions in my head and instead start trying different things using whatever error messages get as an in for googling how other people have approached the problem. Don t refrain from spending some time looking up what other people have done. As Eleanor Roosevelt once said Learn from the mistakes of others. You can t live long enough tr make them all yourself . v Reply Vote J  53.2k 176k it Share o PyroSnakel41 13h Become a politician and raise my pay while creating taxes I m coincidentally exempt from. Reply 120k itsnunyabusiness 11h Any year that the nation accumulates more debt all members of congress and the senate should be disqualified from getting a pay raise betcha that debt we ve been accumulating so fast would begin to slow down real fucking quick. o 9 14k owningmclovin 10h Most of the federal level politicians are already rich and the ones that arent are younger from poor backgrounds. Thos are the politicians who would be most affected. Rich tools who have been in Congress forever who get campaign donations for 10x the salary they get paid would be largely unaffected by a raise or lack there of o 9 78224 hydra3a 10h Also I m under the impression the bulk of their money is coming from sources other than their actual salaries private businesses investments bribes etc. . mean the salary of a US congressman v these days isn t even 200k. That s wealthy but not rich rich. r nosurf u fibonacciseries 9d Turning on the Greyscale filter on your phone makes your phone less attractive. use an iPhone and in color filters there s a grey scales option. heard that Daniel Gross 28 yr old ex machine learning director at Apple leaves his grey scale filter on to reduce phone usage. tried it and it really made a night and day difference for me. The phone magically becomes less attractive and don t even think of checking my phone anymore. It s been the first time in years that haven t needed to charge my phone in 3 days because it s on standby most of the times. initially thought it may be some placebo effect because I m quite a fan of Daniel Gross but it s been more than a few days and the urge to use my phone has not came back. did some research on this and it seems that there are teams of engineers that work on choosing the most vibrant and attractive color to make their products more attractive. So guarantee you that you ll use your phone less if you keep this filter on. 204 J 52 it Share Oo 3 BEST COMMENTS w Ma tna 1 1 ni Add acomment  Vote J 10 it Share Oo it2051229 36m In the beginning there s no such thing as IT but there was Computer science. There were scientist who discovered how to take advantage of electricity to perform calculations. They designed algorithms on how to solve problems efficiently. They designed programming languages to efficiently talk to computers. They try to represent the real world using computations like artificial intelligence or machine learning and they do whatever it takes to push knowledge to the next level with computers... scientists made publications of their new discovery and theories for use in the future. Now came IT in which they take advantage of these knowledge. They apply it the industry in business and whatever information related stuff through the help of computers. Of course they need to understand this knowledge before they can efficiently apply it which is why the subjects in IT and CS are crossing over. Rey 24 binjamin2 36m Computer science is heavenly focused on things like data structures computational theory systems like c and assembly software development and lots of math. Than we take electives such as a course on DB administration Al Machine learning etc. I d say that you have a lot of studying to do.. would focus on picking a7 easy language python and build some prograr. Y so you can understand how data structures work. 4 50 Q Search News Home Popular 20k J 15 it Share r BasicBulletJournals u irenic rose 21h My bullet journal has returned back to its basic form. When don t have time to make it pretty use this format that saw in The Bullet Journal Method by Ryder Caroll 314 14 it Share r DeTrashed u tdgonex 18h imgur S 1 Award oe 140 e205 16 it Share o iseemath 18h think you ll find The Princeton Companion to Mathematics is exactly what you re looking for. It is comprehensive readily available and written by respected authors in the field of maths research. Rey 384 Hacksaw 203 9h If you re looking for an overview of the history of mathematics there are plenty of videos on YouTube If you search things like History of maths . Have a binge of these types of videos until you find some of the topics you want to understand further. Not 100 sure this is what you wanted but I m drunk so hope this was helpful lol Reply Vote J EulerFanGirl 6h Factorials aren t really in the beginning of Mathematics friend j k . Reply Vote J AddemF 5h I ve been researching similar things although not restricted to mathematics. I m curious how people have generally made all kinds of inference like how they figured out star navigation and iron smelting. But I ve also been reading up on how they made mathematical inferences. Osceandriiver has a vervw oand hank that haven t  124k J 49k it Share Oo Go 234 15 MORE REPLIES HonchoMinerva 6h CJ Moki 4h HonchoMinerva 6h HonchoMinerva 7h HonchoMinerva 6h Bing Crosby advocated for salmon conservation in the North Atlantic against Denmark s overfishing in the early 1970s. The Danish government banned everything Bing Crosby in Denmark but because he was so popular there the Danish people protested the government making them overturn the Bing Crosby law and even to enact salmon conservation legislation. Bing Crosby single handedly managed to create a piece of Danish law. Repy 16k J AtrociousThoughts 2h Read this as Bill Cosby at first. Was very confused. So 53 4 M 8 MORE REPLIES Vv Q AANDE DEDI IEC  124k J 49k it Share Oo 10 MORE REPLIES suddenly satire 6h On August 15 1977 the Ohio State radio telescope detected a radio signal so powerful that an astronomer was prompted to write Wow in the margin of the printout. To date it is the strongest candidate for an alien radio transmission. Nine hours later Elvis was dead. The Wow signal was detected on August 16 1977 at 11 17pm CDT. Though Elvis was pronounced dead on August 16 1977 at 3 00pm CDT it s most likely that he died shortly after leaving his bedroom to go to the bathroom at about 8 00 am. Reply 402 lenny face you 3h 1 Award That astronomer s name Owen Wilson. o 9 F334 6 MORE REPLIES 5 MORE REPLIES HonchoMinerva 6h Add acomment 2 07 al a 2 Messages Back Front Back NV I m not home right now and was going to give you a more detailed answer but for now highly recommend cracking the coding interview as required reading. read that and her other interview book. recommend CLRS as a reference for DS A it s huge and if you can read the whole thing it is amazing but you can find a PDF online if you are sneaky. If you do C instead of java there is a book recommended to me that will find let me know and will look up the name. As for practice did a million Leetcode problems but recommend starting with firecode.io to start which is Leetcode problems but it orders them by difficulty for you and revisits ones you have problems with for reinforced learning. promise to come back and extend the list when we are less busy I m back from vacation Tuesday . Happy fri yay Pat 527k J 173k it Share Oo Q Reply 56k H insertcaffeine 14h No questions or advice here just a heartfelt Damn. Go 9 16 4 3 MORE REPLIES 75 MORE REPLIES imk 14h Rough night last night Usually asked with a big smile. was asked that all the time when was younger because had a really obvious tremor. Thing is did not drink or do drugs at all and had no idea what was wrong with me. was eventually diagnosed with Graves Disease and treated for it but I still have a bit of a tremor. Reply 196k J kinky snorlax 11h had hyperthyroidism then underwent radioactive iodine treatment and am now hypothyroid. My lips were often blue and was always pale and cold with sunken looking eyes until we got my medicine on track. People asked me all the time if was sick because looked like was dying. Thyroid shit really fucks you up and when you try to explain it no one knows what a thyroid even is. Co 36k v v  124k J 49k it Share Oo 2 MORE REPLIES 1 MORE REPLY 5 MORE REPLIES HonchoMinerva 7h HonchoMinerva 6h Google s Deepmind self learning Al AlphaZero spent 4 hours learning chess and proceeded to beat the top chess engine in the world. The particularly interesting part is that it wins by playing in a very human way. Chess engines tend to run algorithms to assess a board after a move is chosen looking at millions of moves every second and decide who s better based on a set of parameters making their play very direct. AlphaZero seemed to develop a far more human playstyle somehow seeing something less quantifiable that led to an advantage in a position. Over the past 200 years numerous chess masters studied a style of play similar to AlphaZero. That means that in 4 hours AlphaZero developed a better understanding of the game than we could over the 1000 years it has been studied. Repy 17k J thedudeabides42069 3h This is fucking scary. v o Ft 374 f r stopdrinking u creaturefeaturel6 10h 689 days 1 83 A very sad text message received from my wife. keep it as a reminder of where was and gratitude of where lam now. lL can t talk about this in front of our daughter but feel very sad and angry about your drinking. You keep saying you re going to cut back and you never do. enjoy being moderate with alcoho and don t want to give it up but feel like that s the only option left for you. You never prioritize dealing with your problem let me guess you haven t found a therapist yet . You looked like you were going to pass out in the restaurant at 69m might add and yet you got in the driver s seat of the car. had to ask to drive. You sat in the passenger seat with the food that you forgot in the car and we had to throw away. Then you fell asleep 7 30pm without a word. put her to bed and didn t even know where you had went until checked and saw you passed out in the guest bedroom. There is so much wrong with this picture. don t want to ruin our day being upset with each other but have to say how feel. This needs to change. won t continue like this. Really. Done. That was my daughter s 4th birthday 2 years ago. And now Our text messages are comprised of love notes 18k 131 Share Add acomment 6 51 al Se forums.nexusmods.com 4 ToniPrufrock 17 Apr 2012 So was getting a little bored and decided that would try to finish the main quest and go to Skuldafn and holy hell was not ready ended up using up all my potions and essentially ended up running around everywhere screaming like a sissy and trying to avoid getting killed and get to the end only to be foiled by a etistertclarge group of draugrs while was trying to work out the first puzzle. Not exactly the most dignified way to go. faced 2 dragons from the offset outside each of which refused to go near me unless they had a whole posse of Drauger deathlords and wights and whatnot. The leap up in difficulty was huge as soon as touched down in Skuldafn and within moments had exhausted my supply of potions and soulgems to charge my weapons up with. Methinks shall have to reload and try again later when I m better prepared if can but shall have to face it all eventually. Does anyone who had completed the main quest have any tips am playing on a PS3 and am a level 42 Khajiit in nightinggale armour who s primary skills are sneak and archery. However in fighting normal combat use one handed and restoration magic for the other. My conjuration spells never seem to last long. Sneaking did prove effective...until the damn dragons turn up. Which alerted everyone to me. Fighting the dragons was next to impossible when the Draugrs teamed up and started summoning storm atronachs. Q Oo  12 40 1 Fe ice comftort ae iY wsyusuallyjread a chapter ofa book ras S lightsjout by 8 30 www. mystalk.net SY You tell em Angela angelamartin... Images may be subject to copyright. Learn More Related images 8 00 wl LTE google.com hy It was October 28 1977 his third birthday and Phoenix and his family were aboard a cargo ship bound for Miami from Venezuela. His parents had just abandoned their lives as followers of a notorious religious cult the Children of God which was led by a charismatic former preacher named David Berg who called himself Moses. Phoenix s parents who spent much of the late 1960s wandering the West Coast in a VW microbus had become missionaries traveling around the southern U.S. Venezuela and Puerto Rico and giving birth to Rain Joaquin and Liberty along the way. To sing about God Rain and first born River went busking on the street. The organization made Phoenix s parents the archbishops of Venezuela and Trinidad. A PSYCHEDELIC ME 86k 3.5k it Share Oo MrBOOMbabdtlc 7h flinty day off 6h LovesMeSomeRedhead 6h She said yes and we went out to dinner and saw a movie together. Dropping her off we kissed on her parents front porch for what seemed like forever. Her dad thought so too because he started flicking the porch lights on and off. Good times. Repy 99 JF antwoord food 3h Standing on your mama s porch you told me that you d wait forever oooh and when you held my hand knew it was now or never those were the best days of our lives. Go 9 2k y Mudders Milk Man 3h Fun fact Bryan Adams the singer of Summer of 69 was born in November 1959. An interviewer asked him how the song made sense given that Adams would have been 9 years old in the summer of 1969. Adams replied who says it s about the year 69 a a A mm  21k J 2.1k it Share Oo Ww DOO VYUNINIENISO WwW i fuckin luv it mate 17h Happy8Day 17h Mr RandomThoughts 18h fivekilometer22 18h Skydiving. It was fine. But one and done for me. Reply 4829 DangerousPuhson 17h Same. Skydiving timeline nerves nerves nerves super nerves INTENSITYYYYYYYYYYYYYYYYY LOUDNESSSSSSSSSS sudden crotch pain dealing with excess adrenaline in a calm situation bumpy landing hobble away with all the sound muted for the next few hours. o 9 Fey lisping lynx 17h This is super helpful descriptions like that should be added to some encyclopedia articles to give people some idea of how a certain thing feels. o 3224 Add acomment 4 147k J 314 it Share Oo mikevago 4h One of my favorite things about both shows is that the dad isn t in the Ralph Kramden model that s influenced every sitcom dad up to Homer Simpson. ie. the selfish short sighted schemer who grudgingly does the right thing in the end because deep deep deep down he has a heart of gold. Hank and Bob are more or less the opposite of that they essentially want to be left out of everyone else s schemes and get dragged in anyway. Reply 28k HJ action lawyer comics 2h Funny story the modern trope of the mom is the smart one and the dad is a useless oaf is actually a subversion of an even older trope. Back in the day the stern cardigan wearing pipe smoking life lessons teaching at the end of each episode dad was the archetype every show used. Then shows like Roseanne and The Simpsons became popular by subverting those tropes and everyone copied them and now here we are. o 9 18 4 DatalnTheStone lh Thats actually very interesting. Go 48 4 Add acomment 631 4 59 it Share Oo PublicFigurex 28d It s fried rice. Kinda Chicken mushrooms broccolini carrots onion pepper and a serrano and loooots of garlic. Get yer mis in it s place. steam the carrots and broccoli for a few minutes set that in the veggie bowl. Get a wok ripping hot toss in some oil and hey it smokey. Get the mushrooms in there and toss in some soy sauce and whatever other sauce you like and cook them until the sauce is mostly gone. Set those aside. Get the onions and peppers in there soften them up a bit before adding the garlic toasting it and throwing those in the veggie bowl too. usually marinate my chicken in soy sauce sesame oil and gochuchuang. Once all the veggies are cooked toss the chicken in and cook it till it s juuuust done. use chicken thighs and cut them in to 6 or so pieces depending on size. set all the cooked stuff aside and then toss in some cold rice. Lots of oil and soy sauce and basically just fry the rice for a minute before the rest goes back in. Toss it all together and then add whatever sauce it might still need at that point. Y keep teriyaki adam liaw has a great method r EatCheapAndHealthy u chickentender1995 12h Best tips have for eating cheap and healthy Food Health Fitness 1. Buy your meats on sale and freeze what you won t immediately use. I m sure this has been posted about many many times. But it s seriously a life saver. Today literally pulled out a corned beef from the freezer that s been in there since the sales after St. Patrick s Day and chucked it in the crock pot with cabbage potatoes celery carrots onion and water. All the veggies coincidentally came from last weeks food drive See number 2 so tonight s dinner is amazing and thoroughly cheap. 2. Look into your local food drives and see if you qualify for any. Where live there s one that s on the third Thursday of every month at the school and you don t need proof of income for it. Fresh veggies some cupboard items sometimes milk and eggs all sorts of things. It varies from month to month with what s in season and whatnot. know a lot of people don t get things from food drives because they feel they don t need it there s other people who are worse off. Which 100 understand the monthly one at the school is the only one we do for the following reason. used to work there as a janitor my fianc is dayshift 783 57 Share Add acomment 423 M6 it Share Oo specific or more detailed we probably wouldn t be talking now and you never would have heard of this treasure hunt because it would have all been solved by 1990. The fact that it is difficult and a lot of icons have been removed add to the lore of this hunt. predict one and only one more casque will ever be found and I m okay with that possibility. GO SRepy 234 md28usmc 21d It s like the metal art sculpture KRYPTOS at the CIA headquarters which has yet to be fully solved o 5 54 lilibie 10h thank you that was an interesting little rabbit hole. oO 9 F Vote rookhunter 22d It s a feature of doing a very old hunt that the author thought would be solved in a year. The landmarks as we have seen on some locations are gone and I m sure the casques are in a horrible condition. like to think of this quest as a 4th dimensional treasure hunt. You will need to look in the past via old media and archived records to really hunt these down. Some I m sure are no longer accessible because of the time tr y has passed. 9 01 li LTE 4 Clock Q Search ED SJ o Atwater Village Topics 23 neighborhoods opies a Courtney Morris Atwater Village Lead Vv Need a good Facial FOUND THE PLACE Y all have found THE best facial in LA and it s just around the corner from Atwater Village. If you know me you know that make most of my connections through dog rescue. found my realtor because she adopted a dog from me. know a good accountant because they adopted a dog from me. have a good printing company because they adopted a dog from me and NOW HAVE THE BEST SKIN BECAUSE SHE ADOPTED A DOG FROM ME. So she s a dog lover AND a skin healer. A in my books. She s wonderful and her prices are MORE than reasonable. Just wanted to share the good word Go get some pampering. You deserve it. All her products are cruelty free and non toxic and she s all about striking balance between clinical actives and botanicals for the best results. I ve gone 3 times and could not be happier. https www.vagaro.com restingwitchfacials Resting Witch Facials In Glendale CA ... vagaro.com 1 day ago Thank CO Reply Le 5 2 O Home For Sale Businesses Notifications More What is the most a dollar has ever gotten you EEae TLDR A wife three beautiful children and frankly my life as know it. Story time Back in Jr. High liked a girl and flirted with her quite a bit. One time during a band trip we stopped at a gas station and she bought me a pack of gum. tried to pay her back 1 but she refused. So slipped the 1 in her pocket. She then slipped the dollar in my backpack and so began the back and forth with the 1 bill. We found silly ways to give it back and forth. mailed it to her house. She stuffed it ina gum wrapper and offered me a piece. then decided that would ask her out on this 1. wrote Will you go out with me on it and put it in a note and gave it to her. She said yes of course it would be a terrible story otherwise suppose . About four years later still had the same dollar kept away. On our anniversary wrote Will you marry me on the bottom of the dollar. We have been married for 15 years and have three awesome kids. We still have that dollar stored away. Y 2 48 at O Ali Ho 8 Today 10 40 AM Recommendations Ask to be recommended Received Given 4 As Ali s professor at the Syracuse University graduate program in data science can attest to Ali s enthusiasm problem solving skills and analytical acumen. Ali and a teammate did an outstanding job preparing and presenting their data aggregation analysis ..see More Gregory Block PhD MBA ne Adjunct Professor Principal Big Data Engineer Oct 10 2019 Gregory taught Alison Super nice recc from block Today 1 57 PM ins. Overall you have d well. A grade of 97 o t dazzled me with your iess. This is GOOD GR We dazzled her eo c oc 27k 6.2k it Share Oo Empurpledprose 18h wrote this elsewhere but i think it s t terestine terrifying enough to repost Almost directly underneath the city of Yellowknife Northwest Territories Canada there is a little over a quarter of a million tons of arsenic trioxide which must remain forever frozen by man made glycol based cooling technology in order not to leak and potentially kill everyone. On. Earth. This incredible amount of carcinogen came from tailings from the depleted Giant gold mine which closed down recently after about sixty years in operation. Oh and didja know Arsenic is extremely water soluble and in that amount would be enough to kill every human on earth several times over pro rata . No Yellowknife is not far north enough for the ground to experience permafrost. And yes the infrastructure is crumbling. source Repy 29 J Zenmaster366 16h If the infrastructure fails catastrophically due to a natural disaster or just failure to maintain the system what are the ramifications Is it just a this area and 20 miles around it are fucked forever thing or is it a well the arsen . hit the ocean so all life is going to die horribly Y thing  6 52 wi Fe 4 forums.nexusmods.com best arrows that a merchant will offer to a character of a given level. like using Shock damage Stamina damage enchants on my bow because it has the best all around utility against all types of foes. If you don t yet have the Dual Effect enchanting perk just go with the Shock damage. Fire damage would be better against the draugr but the dragons are likely to be resistant to it and they are your worst worry. ToniPrufrock 17 Apr 2012 Hi Brett and thanks so much for the advice. I ve got the eagle eye perk and pretty much rely on it. Good tip about the darkness think shall have to reload stock up and do that. Especially since my main problem was that a dragon swept in straight away. lol And somehow completely missed Elrindir. I ve been getting the arrows from the Solitude Fletcher s mainly and they only ever seem to stock dwarven and elvish arrows and all of one ebony arrow every time go in. have two main bows an ebony bow charmed with soul capture and the nightingale bow that when fully charged will do 20 points frost damage. My enchanting isn t high enough for that perk think. Or I ve spent my perks elsewhere. But a shock damage bow would probably do well think I ll take your advice and try to craft one. Because of not being able to get better arrows I ve been relying on dwarven ones and elvish ones but do have the 3 x damage bonus on sneak shots since my sneak is at 98. Q oO  4128 8 it Share We UMNKINg ADOUL It ana e naQ UP WaSLIN d IO OF UME as spiral into problem solving. With the goals in mind then proceed to work don t spend too much time trying to get the exact and perfect solutions in my head and instead start trying different things using whatever error messages get as an in for googling how other people have approached the problem. Don t refrain from spending some time looking up what other people have done. As Eleanor Roosevelt once said Learn from the mistakes of others. You can t live long enough to make them all yourself . Reply Vote J toastedstapler 1h jchevertonwynne first you need to make sure that you understand the big picture what exactly is the problem asking you to do from there you should be able to break it down into a few medium scale steps. these might be the size of a class method or function it might be worth writing some tests for your code to make sure that each block does what you expect it to do this could just be something as simple as some assert Statements. it ll help you keep track of what you break as you make changes Reply Vote J Add acomment  124k J 49k it Share Oo 5 MORE REPLIES HonchoMinerva 7h HonchoMinerva 6h Google s Deepmind self learning Al AlphaZero spent 4 hours learning chess and proceeded to beat the top chess engine in the world. The particularly interesting part is that it wins by playing in a very human way. Chess engines tend to run algorithms to assess a board after a move is chosen looking at millions of moves every second and decide who s better based on a set of parameters making their play very direct. AlphaZero seemed to develop a far more human playstyle somehow seeing something less quantifiable that led to an advantage in a position. Over the past 200 years numerous chess masters studied a style of play similar to AlphaZero. That means that in 4 hours AlphaZero developed a better understanding of the game than we could over the 1000 years it has been studied. Repy 17k J thedudeabides42069 3h This is fucking scary. eo 4 344 4 MORE REPLIES WA 18 MORE REPLIES Being a pet owner is like being a sugar daddy. You waste all of your money on keeping them happy and the only thing they do is look cute and give you attention sometimes. EN Ricky Montgomery am upset with my parents for making me exist. u just decided to make a person one day who s gonna pay my bills me didn t ask for this 9 5 15 7 42 PM a Jack In The Jungle JACKRYAN S2 of Jack Ryan brings the unstop Watch Now x Pre eRe etic EUs. e Dana Schwartz DanaSchwartzzz BELLE There goes the baker with his tray like always BAKER well there goes Belle singing her DAILY MEAN SONG about us 6 15 17 19 26 7 42 all All inboxes N Parker from Interview Cake 5 00PM To yesthisiskendra gmail.com Interview Cake Weekly Problem 267 Recursive String Permutations Time for this week s practice coding interview question Recursive String Permutations This problem will be free for 2 weeks then it ll go back to being available only in our full course. Do the problem this week and we ll review it together next week Let s review last week s question Parenthesis Matching The trick to many parsing questions like this is using a stack to track which brackets phrases etc are open as you go. So next time you get a parsing question one of your first thoughts should be use a stack In this problem we can realize our stack would only hold characters. So instead of storing each of those characters in a stack we can store the number of items our stack would be holding. That gets us from O n space to O 1 space. Oo aA stupid bumps are that painful pus filled kinda like acne but not for a year now. My hair thinned bcs of it I m sad but it is what it is. Also to anyone suffering with this find a cure ASAP bcs now have some bald lines really embarrassing especially for a girl and don t want others to go thru what went thru. So changed shampoos cut out dairy drank litres of water cut out fast food but had no change on my folliculitis. Finally took antibiotic pills made it completely gone but came back worse after. Folliculitis can be bacterial or fungal idk what mine was but imagined that mine was the worse like MRSA. So searched about antiseptics agents on MRSA a methicillin resistant staph usually antibiotics dont work the kind that hospitals used to keep their stations or tools clean. Found research papers saying things like chlorhexidine inhibits mrsa which is good. Then thought about Dettol a 4.8 choloroxylenol solution that is multipurpose and remembered that my grandpa bathes in dettol so thought that it must be safe right . Then remembered a post from guy saying he used Clorox to cure his folliculitis and it worked. Then searched clorox and choloroxylenol on mrsa . There was a study that showed how those two ingredients inhibits MRSA. Both of it has a pH of 12 13 super alkali know Vote J Bi it Share Oo Add acomment  spokesman.com 4 A. Jock itch is normally caused by a fungal infection. Neosporin which contains topical antibiotics is not likely to cure a fungus. Lotrimin Ultra contains butenafine an antifungal ingredient so it should have helped. You may want to alternate it with other OTC antifungal products such as clotrimazole Lotrimin AF Mycelex miconazole Micatin Zeasorb AF or tolnaftate Aftate Tinactin . Many readers of this column report that a skin cleanser called Cetaphil can be very helpful against chronic jock itch. It is soothing and contains propylene glycol and cetyl alcohol which have antifungal activity. One young woman said it was as effective for her as for men. Q Oo  10 21 Mail ef jobs.capitalgroup.com Date Oct 11 2019 Location Los Angeles CA US 90071 Company Capital Group Capital Group Background Capital s investment organization culture is collegial honest and intellectually rigorous. All investment group associates are expected to contribute as individuals and as a member ofateam. A hallmark of Capital s philosophy is an overriding focus on the long term whether regarding investment results relationship management or the stewardship of the company itself. The culture of Capital Group is built on a foundation of shared values that influence associates decision making and the way they interact with clients investors and one another e Integrity Accountability Rigorous analysis Collaboration Humility Long term focus Consistency Respect for individuals Solutions Unit Background Capital Group s Solutions Unit the Unit is the investment team responsible for performing research and making investment recommendations in support of Capital Group s multi asset portfolio solutions effort. The multi asset portfolio solutions include but is not limited to American Funds and Insurance Series based 40 Act fund of funds and solutions Global target date and model platforms PCS model portfolios institutional solutions overseen by Capital Group. 47g Mo it Share Oo VULTESIUPIILVIAVII CII Stephen Grider 100 . I ve been a professional React developer for 6 years full time with equity not a contract dev or freelancer moved to Engineering Manager then decided to get back to coding full time as our Lead Software Engineer. took Stephen Grider s modern react redux 2019 course just as a refresher and it was insanely good. then took about 4 of his other courses in on react redux and node. It was kind of a review for me but the way he explains the topics are just awesome for memorizing how they work and the different ways to use them. He also talks about different advanced css and html things along the way. It s just a general great intro to front end react development and by the end you ll be very confident. There s also a pretty thorough hooks section at the end. Just keep in mind if he starts you doing something funky it s because he wants to show you why that way is broken or why it would fail and then shows you the best practices after he explains it all. It s honestly amazing and I d hire a junior dev if he took those courses because they set you up to make your own projects and are worth every minute and every dollar. Check out the curriculums for his courseson Y rallycoding a couple of scenes later when Mr. Robot is wearing the scarf and hat protective mode. Protective of fake elliot who is only a month old sub theory the reason for this is something to do with what Whiterose s project actually does which think is not some stein sgate type time travel thing but actually instead more like Westworld or Person Of Interest where they re creating people and not stealing them from alternate timelines or dimensions. think the young girl that interviewed angela was actually angela and that s why Angela went 100 in beleiving it think that philip price has also seen some proof it s real and that s the only reason he was willing to sacrifice his daughter since he clearly doesn t care for the game anymore. TL DR You re not elliot you re the alter think is the full quote that we didn t get to hear. think the entire time we ve been assuming we the watcher the friend said hello to was a character created by the elliot we ve been watching on screen when he first says hello friend at 00 01 of S1 E1 and the main part of this theory is that assumption is wrong. The watcher is actually whichever of elliott s personalities Sam Esmail thinks we need to see the perspective of and the narration isn t for us.. it s for those personalities. Meta as hell. bravo sam. 121 43 Share Add acomment 27k 6.2k it Share Oo Tell Rachel said Hi. o 9 3324 2 MORE REPLIES 20 MORE REPLIES Empurpledprose 18h wrote this elsewhere but i think it s i terestine terrifying enough to repost Almost directly underneath the city of Yellowknife Northwest Territories Canada there is a little over a quarter of a million tons of arsenic trioxide which must remain forever frozen by man made glycol based cooling technology in order not to leak and potentially kill everyone. On. Earth. This incredible amount of carcinogen came from tailings from the depleted Giant gold mine which closed down recently after about sixty years in operation. Oh and didja know Arsenic is extremely water soluble and in that amount would be enough to kill every human on earth several times over pro rata . No Yellowknife is not far north enough for the ground to experience permafrost. And yes the infrastructure is crumbling. source Reply 29 J Zenmaster366 16h If the infrastructure fails catastrophically due 10 01 WF 4 google.com Google deacon fallout 4 voice actor x ALL NEWS IMAGES VIDEOS MAPS Your related activity Only you can see this www.behindthevoiceactors.com Ryan Alosio Ryan Alosio is an actor who voiced Deacon and H2 22 in Fallout 4. Fandom fallout wiki Ryan Al... Ryan Alosio Fallout Wiki FANDOM powered by Wikia Q Oo '
In [129]:
from collections import Counter, OrderedDict
dict_all_text = Counter(all_text.split(' '))
dict_all_text

sorted_dict = sorted(dict_all_text.items(), key=lambda kv: kv[1], reverse=True)
In [130]:
sorted_dict
Out[130]:
[('the', 344),
 ('and', 275),
 ('to', 245),
 ('a', 241),
 ('of', 219),
 ('it', 128),
 ('in', 125),
 ('that', 106),
 ('you', 102),
 ('is', 95),
 ('s', 84),
 ('for', 77),
 ('on', 64),
 ('be', 56),
 ('with', 54),
 ('was', 54),
 ('have', 52),
 ('are', 52),
 ('as', 50),
 ('t', 47),
 ('my', 45),
 ('this', 44),
 ('at', 42),
 ('or', 41),
 ('but', 41),
 ('from', 39),
 ('all', 37),
 ('can', 36),
 ('not', 35),
 ('I', 33),
 ('Share', 33),
 ('like', 32),
 ('about', 31),
 ('time', 31),
 ('your', 29),
 ('they', 29),
 ('we', 29),
 ('which', 28),
 ('one', 28),
 ('what', 28),
 ('he', 28),
 ('some', 27),
 ('get', 27),
 ('4', 27),
 ('would', 26),
 ('just', 26),
 ('their', 26),
 ('J', 26),
 ('Oo', 25),
 ('has', 24),
 ('do', 23),
 ('so', 23),
 ('up', 23),
 ('been', 22),
 ('by', 22),
 ('had', 22),
 ('enough', 21),
 ('out', 21),
 ('who', 21),
 ('me', 21),
 ('1', 20),
 ('The', 20),
 ('there', 20),
 ('It', 20),
 ('because', 20),
 ('', 19),
 ('.', 19),
 ('You', 18),
 ('very', 18),
 ('only', 18),
 ('best', 18),
 ('her', 18),
 ('how', 18),
 ('an', 18),
 ('them', 17),
 ('then', 17),
 ('more', 17),
 ('back', 17),
 ('first', 17),
 ('o', 17),
 ('if', 16),
 ('bow', 16),
 ('arrows', 16),
 ('MORE', 16),
 ('ve', 15),
 ('using', 15),
 ('should', 15),
 ('will', 15),
 ('people', 15),
 ('parents', 15),
 ('make', 15),
 ('his', 15),
 ('m', 15),
 ('other', 15),
 ('Reply', 15),
 ('think', 15),
 ('If', 14),
 ('don', 14),
 ('use', 14),
 ('This', 14),
 ('9', 14),
 ('years', 14),
 ('REPLIES', 14),
 ('problem', 13),
 ('damage', 13),
 ('So', 13),
 ('work', 13),
 ('every', 13),
 ('good', 12),
 ('go', 12),
 ('things', 12),
 ('know', 12),
 ('Add', 12),
 ('acomment', 12),
 ('10', 12),
 ('A', 12),
 ('when', 12),
 ('2', 12),
 ('Q', 11),
 ('6', 11),
 ('sneak', 11),
 ('even', 11),
 ('One', 11),
 ('thing', 11),
 ('until', 11),
 ('now', 11),
 ('research', 11),
 ('day', 11),
 ('Vote', 11),
 ('also', 11),
 ('those', 10),
 ('u', 10),
 ('long', 10),
 ('find', 10),
 ('most', 10),
 ('able', 10),
 ('take', 10),
 ('far', 10),
 ('away', 10),
 ('said', 10),
 ('try', 10),
 ('made', 10),
 ('5', 10),
 ('our', 10),
 ('6h', 10),
 ('see', 9),
 ('since', 9),
 ('into', 9),
 ('r', 9),
 ('less', 9),
 ('me.', 9),
 ('much', 9),
 ('couple', 9),
 ('keep', 9),
 ('kids', 9),
 ('need', 9),
 ('many', 9),
 ('Then', 9),
 ('We', 9),
 ('3h', 9),
 ('making', 9),
 ('solutions', 9),
 ('HonchoMinerva', 9),
 ('around', 8),
 ('dragons', 8),
 ('him', 8),
 ('My', 8),
 ('He', 8),
 ('too', 8),
 ('being', 8),
 ('came', 8),
 ('were', 8),
 ('In', 8),
 ('re', 8),
 ('something', 8),
 ('over', 8),
 ('these', 8),
 ('better', 8),
 ('learning', 8),
 ('phone', 8),
 ('Capital', 8),
 ('got', 7),
 ('actually', 7),
 ('off', 7),
 ('draugr', 7),
 ('level', 7),
 ('T', 7),
 ('characters', 7),
 ('attention', 7),
 ('could', 7),
 ('before', 7),
 ('dragon', 7),
 ('right', 7),
 ('want', 7),
 ('understand', 7),
 ('life', 7),
 ('sure', 7),
 ('parent', 7),
 ('person', 7),
 ('where', 7),
 ('food', 7),
 ('mine', 7),
 ('thought', 7),
 ('didn', 7),
 ('i', 7),
 ('3', 7),
 ('That', 7),
 ('why', 7),
 ('Manager', 7),
 ('2h', 7),
 ('asset', 7),
 ('really', 7),
 ('human', 7),
 ('8', 7),
 ('end', 7),
 ('looking', 7),
 ('ll', 7),
 ('after', 7),
 ('little', 6),
 ('great', 6),
 ('pick', 6),
 ('perk', 6),
 ('dark.', 6),
 ('shows', 6),
 ('What', 6),
 ('source', 6),
 ('often', 6),
 ('40', 6),
 ('here', 6),
 ('cut', 6),
 ('water', 6),
 ('no', 6),
 ('used', 6),
 ('saying', 6),
 ('There', 6),
 ('super', 6),
 ('full', 6),
 ('Repy', 6),
 ('real', 6),
 ('asked', 6),
 ('went', 6),
 ('old', 6),
 ('few', 6),
 ('did', 6),
 ('investment', 6),
 ('They', 6),
 ('never', 6),
 ('lot', 6),
 ('Go', 6),
 ('against', 6),
 ('than', 6),
 ('each', 6),
 ('next', 6),
 ('trying', 6),
 ('whatever', 6),
 ('down', 6),
 ('Y', 6),
 ('15', 6),
 ('18h', 6),
 ('AlphaZero', 6),
 ('chess', 6),
 ('sauce', 6),
 ('light', 5),
 ('through', 5),
 ('put', 5),
 ('way.', 5),
 ('16', 5),
 ('forums.nexusmods.com', 5),
 ('without', 5),
 ('shots', 5),
 ('ought', 5),
 ('least', 5),
 ('offer', 5),
 ('character', 5),
 ('given', 5),
 ('antifungal', 5),
 ('may', 5),
 ('help', 5),
 ('own', 5),
 ('always', 5),
 ('Not', 5),
 ('took', 5),
 ('gone', 5),
 ('worse', 5),
 ('kind', 5),
 ('week', 5),
 ('started', 5),
 ('e', 5),
 ('01', 5),
 ('O', 5),
 ('Home', 5),
 ('amazing', 5),
 ('But', 5),
 ('problems', 5),
 ('portfolio', 5),
 ('data', 5),
 ('skills', 5),
 ('spent', 5),
 ('And', 5),
 ('am', 5),
 ('decided', 5),
 ('pay', 5),
 ('yet', 5),
 ('review', 5),
 ('it.', 5),
 ('All', 5),
 ('might', 5),
 ('instead', 5),
 ('v', 5),
 ('forever', 5),
 ('days', 5),
 ('d', 5),
 ('hours', 5),
 ('give', 5),
 ('based', 5),
 ('set', 5),
 ('found', 5),
 ('main', 5),
 ('She', 5),
 ('dad', 5),
 ('dog', 5),
 ('she', 5),
 ('Ryan', 5),
 ('stack', 5),
 ('Group', 5),
 ('11', 4),
 ('room', 4),
 ('Apr', 4),
 ('2012', 4),
 ('takes', 4),
 ('them.', 4),
 ('soon', 4),
 ('wait', 4),
 ('wouldn', 4),
 ('finally', 4),
 ('course.', 4),
 ('nice', 4),
 ('perks', 4),
 ('supply', 4),
 ('merchant', 4),
 ('level.', 4),
 ('52', 4),
 ('al', 4),
 ('17', 4),
 ('glycol', 4),
 ('such', 4),
 ('skin', 4),
 ('contains', 4),
 ('while', 4),
 ('No', 4),
 ('Kids', 4),
 ('exactly', 4),
 ('say', 4),
 ('break', 4),
 ('money', 4),
 ('dinner', 4),
 ('themselves', 4),
 ('fast', 4),
 ('MRSA.', 4),
 ('searched', 4),
 ('resistant', 4),
 ('inhibits', 4),
 ('mrsa', 4),
 ('choloroxylenol', 4),
 ('remembered', 4),
 ('must', 4),
 ('cure', 4),
 ('two', 4),
 ('12', 4),
 ('mind', 4),
 ('7', 4),
 ('x', 4),
 ('To', 4),
 ('bit', 4),
 ('doing', 4),
 ('working', 4),
 ('sometimes', 4),
 ('pretty', 4),
 ('Google', 4),
 ('high', 4),
 ('self', 4),
 ('ever', 4),
 ('does', 4),
 ('haven', 4),
 ('weeks', 4),
 ('support', 4),
 ('develop', 4),
 ('deep', 4),
 ('apply', 4),
 ('theory', 4),
 ('friend', 4),
 ('etc.', 4),
 ('live', 4),
 ('ask', 4),
 ('4h', 4),
 ('well', 4),
 ('Shock', 4),
 ('likely', 4),
 ('obesity', 4),
 ('past', 4),
 ('part', 4),
 ('year', 4),
 ('health', 4),
 ('method', 4),
 ('feel', 4),
 ('everyone', 4),
 ('times', 4),
 ('different', 4),
 ('As', 4),
 ('getting', 4),
 ('10h', 4),
 ('ones', 4),
 ('isn', 4),
 ('advantage', 4),
 ('course', 4),
 ('100', 4),
 ('124k', 4),
 ('49k', 4),
 ('7h', 4),
 ('Bing', 4),
 ('Crosby', 4),
 ('1977', 4),
 ('later', 4),
 ('Back', 4),
 ('going', 4),
 ('last', 4),
 ('still', 4),
 ('play', 4),
 ('away.', 4),
 ('way', 4),
 ('yes', 4),
 ('17h', 4),
 ('nerves', 4),
 ('Get', 4),
 ('toss', 4),
 ('month', 4),
 ('dollar', 4),
 ('wrote', 4),
 ('Ali', 4),
 ('Yellowknife', 4),
 ('kill', 4),
 ('amount', 4),
 ('infrastructure', 4),
 ('Search', 3),
 ('line', 3),
 ('thinking', 3),
 ('anything', 3),
 ('14h', 3),
 ('yourself', 3),
 ('makes', 3),
 ('51', 3),
 ('works', 3),
 ('especially', 3),
 ('dismount', 3),
 ('foes', 3),
 ('wait.', 3),
 ('sneaky', 3),
 ('notice', 3),
 ('shout', 3),
 ('quick', 3),
 ('Fus', 3),
 ('battle', 3),
 ('started.', 3),
 ('After', 3),
 ('taken', 3),
 ('nearby', 3),
 ('either', 3),
 ('closest', 3),
 ('dragur', 3),
 ('engage', 3),
 ('draw', 3),
 ('draugr.', 3),
 ('sort', 3),
 ('enchant', 3),
 ('armor', 3),
 ('fond', 3),
 ('elven', 3),
 ('weight', 3),
 ('ratio', 3),
 ('improved', 3),
 ('max', 3),
 ('invested', 3),
 ('Smithing.', 3),
 ('Use', 3),
 ('hopefully', 3),
 ('Ebony', 3),
 ('Glass.', 3),
 ('Elrindir', 3),
 ('Drunken', 3),
 ('Huntsman', 3),
 ('Skyrim.', 3),
 ('carries', 3),
 ('Se', 3),
 ('Fe', 3),
 ('activity.', 3),
 ('young', 3),
 ('woman', 3),
 ('Some', 3),
 ('let', 3),
 ('Once', 3),
 ('someone', 3),
 ('job', 3),
 ('second', 3),
 ('tell', 3),
 ('drives', 3),
 ('face', 3),
 ('others.', 3),
 ('child', 3),
 ('come', 3),
 ('home', 3),
 ('early', 3),
 ('world', 3),
 ('beginning', 3),
 ('any', 3),
 ('changed', 3),
 ('antibiotic', 3),
 ('completely', 3),
 ('fungal', 3),
 ('usually', 3),
 ('antibiotics', 3),
 ('tools', 3),
 ('good.', 3),
 ('solution', 3),
 ('dettol', 3),
 ('post', 3),
 ('guy', 3),
 ('folliculitis', 3),
 ('hair', 3),
 ('Now', 3),
 ('gonna', 3),
 ('Will', 3),
 ('M', 3),
 ('Aq', 3),
 ('du', 3),
 ('ae', 3),
 ('c', 3),
 ('word', 3),
 ('language', 3),
 ('16h', 3),
 ('Depot', 3),
 ('having', 3),
 ('several', 3),
 ('man', 3),
 ('google.com', 3),
 ('Mail', 3),
 ('us', 3),
 ('CA', 3),
 ('Director', 3),
 ('new', 3),
 ('manuscript', 3),
 ('For', 3),
 ('probably', 3),
 ('BEST', 3),
 ('multi', 3),
 ('portfolios', 3),
 ('relevant', 3),
 ('analytical', 3),
 ('US', 3),
 ('industry', 3),
 ('knowledge', 3),
 ('topics', 3),
 ('ways', 3),
 ('development', 3),
 ('experience', 3),
 ('understanding', 3),
 ('analysis', 3),
 ('everyone.', 3),
 ('closed', 3),
 ('cook', 3),
 ('care', 3),
 ('time.', 3),
 ('reason', 3),
 ('upset', 3),
 ('dick', 3),
 ('big', 3),
 ('talk', 3),
 ('read', 3),
 ('story', 3),
 ('grey', 3),
 ('studied', 3),
 ('space', 3),
 ('F', 3),
 ('types', 3),
 ('enchanting', 3),
 ('damage.', 3),
 ('oO', 3),
 ('However', 3),
 ('Yadav', 3),
 ('children', 3),
 ('school', 3),
 ('wanted', 3),
 ('science', 3),
 ('googling', 3),
 ('rely', 3),
 ('start', 3),
 ('messages', 3),
 ('done.', 3),
 ('Learn', 3),
 ('raise', 3),
 ('fucking', 3),
 ('filter', 3),
 ('night', 3),
 ('times.', 3),
 ('products', 3),
 ('IT', 3),
 ('algorithms', 3),
 ('Al', 3),
 ('focus', 3),
 ('saw', 3),
 ('helpful', 3),
 ('similar', 3),
 ('Danish', 3),
 ('August', 3),
 ('radio', 3),
 ('00', 3),
 ('recommend', 3),
 ('coding', 3),
 ('interview', 3),
 ('look', 3),
 ('million', 3),
 ('interesting', 3),
 ('playing', 3),
 ('seemed', 3),
 ('somehow', 3),
 ('led', 3),
 ('game', 3),
 ('sad', 3),
 ('front', 3),
 ('daughter', 3),
 ('add', 3),
 ('quest', 3),
 ('shall', 3),
 ('42', 3),
 ('More', 3),
 ('Phoenix', 3),
 ('porch', 3),
 ('Adams', 3),
 ('model', 3),
 ('carrots', 3),
 ('oil', 3),
 ('soy', 3),
 ('chicken', 3),
 ('veggies', 3),
 ('goes', 3),
 ('Today', 3),
 ('hunt', 3),
 ('solved', 3),
 ('Atwater', 3),
 ('THE', 3),
 ('adopted', 3),
 ('girl', 3),
 ('2019', 3),
 ('20', 3),
 ('question', 3),
 ('Unit', 3),
 ('react', 3),
 ('elliot', 3),
 ('Alosio', 3),
 ('23', 2),
 ('shine', 2),
 ('white', 2),
 ('LED', 2),
 ('blue', 2),
 ('3.9k', 2),
 ('w', 2),
 ('Your', 2),
 ('simple', 2),
 ('powerful', 2),
 ('step', 2),
 ('140', 2),
 ('X', 2),
 ('ee', 2),
 ('0', 2),
 ('Stealth', 2),
 ('archery', 2),
 ('Eagle', 2),
 ('Eye', 2),
 ('range', 2),
 ('At', 2),
 ('Deathlord', 2),
 ('shot', 2),
 ('triple', 2),
 ('perk.', 2),
 ('helps', 2),
 ('dusk', 2),
 ('maximize', 2),
 ('potential.', 2),
 ('arrive', 2),
 ('daylight', 2),
 ('crouch', 2),
 ('key', 2),
 ('spokesman.com', 2),
 ('propylene', 2),
 ('cetyl', 2),
 ('alcohol', 2),
 ('effective', 2),
 ('men.', 2),
 ('affected', 2),
 ('minutes', 2),
 ('ingredient', 2),
 ('vinegar', 2),
 ('fungus', 2),
 ('fungus.', 2),
 ('area', 2),
 ('well.', 2),
 ('children.', 2),
 ('slow', 2),
 ('this.', 2),
 ('too.', 2),
 ('Many', 2),
 ('push', 2),
 ('change.', 2),
 ('Do', 2),
 ('heart', 2),
 ('initially', 2),
 ('changes', 2),
 ('relationship', 2),
 ('kids.', 2),
 ('applies', 2),
 ('shampoos', 2),
 ('dairy', 2),
 ('litres', 2),
 ('change', 2),
 ('folliculitis.', 2),
 ('Finally', 2),
 ('pills', 2),
 ('after.', 2),
 ('Folliculitis', 2),
 ('bacterial', 2),
 ('idk', 2),
 ('imagined', 2),
 ('antiseptics', 2),
 ('agents', 2),
 ('MRSA', 2),
 ('methicillin', 2),
 ('staph', 2),
 ('dont', 2),
 ('hospitals', 2),
 ('stations', 2),
 ('clean.', 2),
 ('Found', 2),
 ('papers', 2),
 ('chlorhexidine', 2),
 ('Dettol', 2),
 ('4.8', 2),
 ('multipurpose', 2),
 ('grandpa', 2),
 ('bathes', 2),
 ('safe', 2),
 ('Clorox', 2),
 ('worked.', 2),
 ('clorox', 2),
 ('study', 2),
 ('showed', 2),
 ('ingredients', 2),
 ('Both', 2),
 ('pH', 2),
 ('13', 2),
 ('alkali', 2),
 ('ago', 2),
 ('Bi', 2),
 ('Jo', 2),
 ('z', 2),
 ('edeysez', 2),
 ('dn', 2),
 ('aq', 2),
 ('ued', 2),
 ('Te', 2),
 ('ay', 2),
 ('ut', 2),
 ('st', 2),
 ('nod', 2),
 ('Is', 2),
 ('Oz', 2),
 ('203', 2),
 ('pd', 2),
 ('Le', 2),
 ('ayy', 2),
 ('thoughts', 2),
 ('function', 2),
 ('weird', 2),
 ('needs', 2),
 ('527k', 2),
 ('173k', 2),
 ('12h', 2),
 ('worked', 2),
 ('ago.', 2),
 ('tile', 2),
 ('loved', 2),
 ('talking', 2),
 ('on.', 2),
 ('point', 2),
 ('mentioned', 2),
 ('degrees', 2),
 ('lol.', 2),
 ('hell', 2),
 ('mention', 2),
 ('enjoy', 2),
 ('despite', 2),
 ('straight', 2),
 ('required', 2),
 ('stop', 2),
 ('ALL', 2),
 ('IMAGES', 2),
 ('VIDEOS', 2),
 ('NEWS', 2),
 ('python', 2),
 ('holy', 2),
 ('Strange', 2),
 ('Women', 2),
 ('Lying', 2),
 ('Ponds', 2),
 ('Distributing...', 2),
 ('GitHub', 2),
 ('build', 2),
 ('Work', 2),
 ('balance', 2),
 ('Where', 2),
 ('Remote', 2),
 ('Marketing', 2),
 ('Global', 2),
 ('Analyst', 2),
 ('Senior', 2),
 ('Data', 2),
 ('Health', 2),
 ('contribute', 2),
 ('following', 2),
 ('sub', 2),
 ('guess', 2),
 ('draft.', 2),
 ('words', 2),
 ('stop.', 2),
 ('sake', 2),
 ('24', 2),
 ('single', 2),
 ('mostly', 2),
 ('magic', 2),
 ('aside', 2),
 ('coming', 2),
 ('asking', 2),
 ('finish', 2),
 ('COMMENTS', 2),
 ('People', 2),
 ('writing', 2),
 ('21', 2),
 ('jobs.capitalgroup.com', 2),
 ('team', 2),
 ('Research', 2),
 ('maintain', 2),
 ('quantitative', 2),
 ('dive', 2),
 ('publications', 2),
 ('databases', 2),
 ('internal', 2),
 ('needed', 2),
 ('solve', 2),
 ('allocation', 2),
 ('construction', 2),
 ('equity', 2),
 ('solving', 2),
 ('series', 2),
 ('ability', 2),
 ('recommendations', 2),
 ('written', 2),
 ('knew', 2),
 ('Her', 2),
 ('basically', 2),
 ('marriage', 2),
 ('both', 2),
 ('expected', 2),
 ('married', 2),
 ('learn', 2),
 ('2k', 2),
 ('vacation', 2),
 ('together.', 2),
 ('outside', 2),
 ('Ricky', 2),
 ('Montgomery', 2),
 ('exist.', 2),
 ('bills', 2),
 ('guys', 2),
 ('jokes', 2),
 ('medium', 2),
 ('same', 2),
 ('lights', 2),
 ('Earth', 2),
 ('star', 2),
 ('systems', 2),
 ('bound', 2),
 ('all.', 2),
 ('sense', 2),
 ('love', 2),
 ('Stamina', 2),
 ('enchants', 2),
 ('utility', 2),
 ('foes.', 2),
 ('Dual', 2),
 ('Effect', 2),
 ('Fire', 2),
 ('worst', 2),
 ('worry.', 2),
 ('54', 2),
 ('studies', 2),
 ('gut', 2),
 ('author', 2),
 ('professor', 2),
 ('medicine', 2),
 ('Wake', 2),
 ('Forest', 2),
 ('Disease', 2),
 ('Obesity', 2),
 ('current', 2),
 ('journal', 2),
 ('between', 2),
 ('passed', 2),
 ('mother', 2),
 ('birth', 2),
 ('natural', 2),
 ('milk', 2),
 ('research.', 2),
 ('lore', 2),
 ('by...', 2),
 ('breathing', 2),
 ('fresh', 2),
 ('air', 2),
 ('eating', 2),
 ('suddenly', 2),
 ('4128', 2),
 ('kinda', 2),
 ('again', 2),
 ('others', 2),
 ('Happy', 2),
 ('lh', 2),
 ('clearly', 2),
 ('sound', 2),
 ('obvious', 2),
 ('spiral', 2),
 ('solving.', 2),
 ('With', 2),
 ('goals', 2),
 ('proceed', 2),
 ('spend', 2),
 ('exact', 2),
 ('perfect', 2),
 ('head', 2),
 ('error', 2),
 ('approached', 2),
 ('problem.', 2),
 ('Don', 2),
 ('refrain', 2),
 ('spending', 2),
 ('Eleanor', 2),
 ('Roosevelt', 2),
 ('once', 2),
 ('mistakes', 2),
 ('tr', 2),
 ('creating', 2),
 ('coincidentally', 2),
 ('11h', 2),
 ('debt', 2),
 ('politicians', 2),
 ('rich', 2),
 ('younger', 2),
 ('salary', 2),
 ('Also', 2),
 ('attractive.', 2),
 ('color', 2),
 ('heard', 2),
 ('Daniel', 2),
 ('Gross', 2),
 ('28', 2),
 ('machine', 2),
 ('scale', 2),
 ('tried', 2),
 ('attractive', 2),
 ('anymore.', 2),
 ('charge', 2),
 ('quite', 2),
 ('36m', 2),
 ('Computer', 2),
 ('designed', 2),
 ('efficiently', 2),
 ('computers.', 2),
 ('related', 2),
 ('stuff', 2),
 ('Of', 2),
 ('Rey', 2),
 ('structures', 2),
 ('314', 2),
 ('S', 2),
 ('Award', 2),
 ('Mathematics', 2),
 ('available', 2),
 ('maths', 2),
 ('videos', 2),
 ('lol', 2),
 ('234', 2),
 ('salmon', 2),
 ('conservation', 2),
 ('Denmark', 2),
 ...]
In [125]:
test = Counter(all_text.split(' '))
test
Out[125]:
Counter({'': 19,
         'iS': 1,
         '11': 4,
         '23': 2,
         '1': 20,
         'Q': 11,
         'Search': 3,
         'If': 14,
         'shine': 2,
         'a': 241,
         'white': 2,
         'LED': 2,
         'light': 5,
         'through': 5,
         'prism': 1,
         'would': 26,
         'see': 9,
         'spectrum': 1,
         'or': 41,
         'red': 1,
         'line': 3,
         'green': 1,
         'and': 275,
         'blue': 2,
         'I': 33,
         've': 15,
         'been': 22,
         'thinking': 3,
         'about': 31,
         'this': 44,
         'since': 9,
         'got': 7,
         'some': 27,
         'of': 219,
         'those': 10,
         'little': 6,
         'window': 1,
         'hangers': 1,
         'that': 106,
         'put': 5,
         'rainbows': 1,
         'into': 9,
         'your': 29,
         'room': 4,
         'by': 22,
         'refracting': 1,
         'dispersing': 1,
         'sunlight': 1,
         'but': 41,
         'don': 14,
         't': 47,
         'have': 52,
         'bright': 1,
         'enough': 21,
         'to': 245,
         'actually': 7,
         'anything': 3,
         'if': 16,
         'shine...': 1,
         '3.9k': 2,
         '171': 1,
         'Share': 33,
         'r': 9,
         'ZenHabits': 1,
         'w': 2,
         'u': 10,
         'Lightfiend': 1,
         '14h': 3,
         'Question': 1,
         'Your': 2,
         'Feelings': 1,
         'The': 20,
         'simple': 2,
         'act': 1,
         'emotions': 1,
         'automatically': 1,
         'distances': 1,
         'yourself': 3,
         'from': 39,
         'them': 17,
         'makes': 3,
         'less': 9,
         'powerful': 2,
         'which': 28,
         'then': 17,
         'enables': 1,
         'you': 102,
         'more': 17,
         'easily': 1,
         'step': 2,
         'back': 17,
         'analyze': 1,
         'channel': 1,
         'the': 344,
         'emotion': 1,
         'in': 125,
         'positive': 1,
         'constructive': 1,
         'way.': 5,
         'theemotionmac': 1,
         '140': 2,
         'Serendipity': 1,
         'serendipitybot': 1,
         '8h': 1,
         'Cheating': 1,
         'cunt': 1,
         'X': 2,
         'Post': 1,
         'From': 1,
         'ImGoingToHellForThis': 1,
         'v.redd.it': 1,
         '16': 5,
         'ee': 2,
         '6': 11,
         '51': 3,
         '.': 19,
         '0': 2,
         'forums.nexusmods.com': 5,
         'BrettM': 1,
         'fosaym': 1,
         '617': 1,
         'Apr': 4,
         '2012': 4,
         'Stealth': 2,
         'archery': 2,
         'works': 3,
         'great': 6,
         'there': 20,
         'for': 77,
         'me.': 9,
         'It': 20,
         'takes': 4,
         'long': 10,
         'time': 31,
         'find': 10,
         'can': 36,
         'sneak': 11,
         'around': 8,
         'pick': 6,
         'off': 7,
         'most': 10,
         'draugr': 7,
         'one': 28,
         'at': 42,
         'without': 5,
         'much': 9,
         'problem': 13,
         'especially': 3,
         'using': 15,
         'Eagle': 2,
         'Eye': 2,
         'perk': 6,
         'get': 27,
         'good': 12,
         'range': 2,
         'shots': 5,
         'them.': 4,
         'At': 2,
         'level': 7,
         'ought': 5,
         'be': 56,
         'able': 10,
         'take': 10,
         'out': 21,
         'Deathlord': 2,
         'with': 54,
         'couple': 9,
         'even': 11,
         'shot': 2,
         'triple': 2,
         'bow': 16,
         'damage': 13,
         'perk.': 2,
         'One': 11,
         'thing': 11,
         'helps': 2,
         'is': 95,
         'not': 35,
         'go': 12,
         'until': 11,
         'dusk': 2,
         'maximize': 2,
         'potential.': 2,
         'arrive': 2,
         'daylight': 2,
         'just': 26,
         'crouch': 2,
         'as': 50,
         'soon': 4,
         'dismount': 3,
         'use': 14,
         'T': 7,
         'key': 2,
         'wait': 4,
         'dark.': 6,
         'You': 18,
         'should': 15,
         'far': 10,
         'away': 10,
         'first': 17,
         'foes': 3,
         'do': 23,
         'wait.': 3,
         'my': 45,
         'characters': 7,
         'was': 54,
         'so': 23,
         'sneaky': 3,
         'dragons': 8,
         'wouldn': 4,
         'notice': 3,
         'him': 8,
         'all': 37,
         'finally': 4,
         'had': 22,
         'shout': 3,
         'quick': 3,
         'Fus': 3,
         'their': 26,
         'attention': 7,
         'could': 7,
         'battle': 3,
         'started.': 3,
         'After': 3,
         'taken': 3,
         'nearby': 3,
         'course.': 4,
         'either': 3,
         'closest': 3,
         'dragur': 3,
         'before': 7,
         'dragon': 7,
         'shows': 6,
         'up': 23,
         'engage': 3,
         'draw': 3,
         'draugr.': 3,
         'What': 6,
         'sort': 3,
         'arrows': 16,
         'are': 52,
         'what': 28,
         'enchant': 3,
         'on': 64,
         'My': 8,
         'armor': 3,
         'very': 18,
         'fond': 3,
         'elven': 3,
         'has': 24,
         'nice': 4,
         'weight': 3,
         'ratio': 3,
         'improved': 3,
         'max': 3,
         'only': 18,
         'perks': 4,
         'invested': 3,
         'Smithing.': 3,
         'Use': 3,
         'best': 18,
         'supply': 4,
         'right': 7,
         'now': 11,
         'hopefully': 3,
         'Ebony': 3,
         'least': 5,
         'Glass.': 3,
         'Elrindir': 3,
         'Drunken': 3,
         'Huntsman': 3,
         'source': 6,
         'Skyrim.': 3,
         'He': 8,
         'often': 6,
         'carries': 3,
         '40': 6,
         'merchant': 4,
         'will': 15,
         'offer': 5,
         'character': 5,
         'given': 5,
         'level.': 4,
         'LD': 1,
         '52': 4,
         'al': 4,
         'Se': 3,
         '17': 4,
         'like': 32,
         'Using': 1,
         'Sho': 1,
         'ce': 1,
         'eee': 1,
         'Eade': 1,
         'anchants': 1,
         '4': 27,
         '06': 1,
         'aw': 1,
         'Fe': 3,
         'spokesman.com': 2,
         'ILULL.': 1,
         'LL': 1,
         '1S': 1,
         'SUUULIIIS': 1,
         'aAlLLU': 1,
         'CULILALLIS': 1,
         'propylene': 2,
         'glycol': 4,
         'cetyl': 2,
         'alcohol': 2,
         'antifungal': 5,
         'activity.': 3,
         'young': 3,
         'woman': 3,
         'said': 10,
         'it': 128,
         'effective': 2,
         'her': 18,
         'men.': 2,
         'Some': 3,
         'people': 15,
         'benefit': 1,
         'applying': 1,
         'dandruff': 1,
         'shampoo': 1,
         'such': 4,
         'Selsun': 1,
         'Blue': 1,
         'area.': 1,
         'Lather': 1,
         'let': 3,
         'stay': 1,
         'affected': 2,
         'zone': 1,
         'five': 1,
         'minutes': 2,
         'rinse.': 1,
         'Selenium': 1,
         'sulfide': 1,
         'active': 1,
         'ingredient': 2,
         'Once': 3,
         'inflamed': 1,
         'skin': 4,
         'healed': 1,
         'may': 5,
         'want': 7,
         'try': 10,
         'applications': 1,
         'vinegar': 2,
         'original': 1,
         'amber': 1,
         'Listerine.': 1,
         'Listerine': 1,
         'contains': 4,
         'herbal': 1,
         'oils': 1,
         'fight': 1,
         'fungus': 2,
         'while': 4,
         'too': 8,
         'acidic': 1,
         'hospitable': 1,
         'fungus.': 2,
         'Antiperspirant': 1,
         'groin': 1,
         'area': 2,
         'help': 5,
         'keep': 9,
         'dry': 1,
         'discourage': 1,
         'overgrowth': 1,
         'well.': 2,
         'Oo': 25,
         'Parents': 1,
         'own': 5,
         'children.': 2,
         'No': 4,
         'owns': 1,
         's': 84,
         'loan': 1,
         'duration': 1,
         'lifetime.': 1,
         'Kids': 4,
         'slaves': 1,
         'because': 20,
         'someone': 3,
         'donated': 1,
         'genetic': 1,
         'material': 1,
         'kids': 9,
         'individuals.': 1,
         'carbon': 1,
         'copies': 1,
         'parents': 15,
         'adoptive': 1,
         'understand': 7,
         'biological': 1,
         'always': 5,
         'slow': 2,
         'catch': 1,
         'this.': 2,
         'life': 7,
         'likes': 1,
         'dislikes': 1,
         'job': 3,
         'ensuring': 1,
         'independent': 1,
         'survive': 1,
         'own.': 1,
         'Not': 5,
         'sure': 7,
         'spiritual': 1,
         'extent': 1,
         'beliefs': 1,
         'purpose': 1,
         'too.': 2,
         'Many': 2,
         'second': 3,
         'tell': 3,
         'exactly': 4,
         'drives': 3,
         'crazy': 1,
         'belief': 1,
         'here': 6,
         'teach': 1,
         'parent': 7,
         'lesson.': 1,
         'Ifa': 1,
         'lacking': 1,
         'discipline': 1,
         'push': 2,
         'person': 7,
         'limit': 1,
         'involuntarily': 1,
         'face': 3,
         'need': 9,
         'change.': 2,
         'being': 8,
         'hypocritical': 1,
         'Do': 2,
         'say': 4,
         'mock': 1,
         'amusement': 1,
         'others.': 3,
         'indecisive': 1,
         'many': 9,
         'situations': 1,
         'where': 7,
         'make': 15,
         'immediate': 1,
         'decisions': 1,
         'spot.': 1,
         'workaholic': 1,
         'chances': 1,
         'child': 3,
         'break': 4,
         'heart': 2,
         'offering': 1,
         'money': 4,
         'come': 3,
         'home': 3,
         'early': 3,
         'dinner': 4,
         'family.': 1,
         'test': 1,
         'how': 18,
         'world': 3,
         'beginning': 3,
         'initially': 2,
         'dependent': 1,
         'survive.': 1,
         'This': 14,
         'changes': 2,
         'adjusting': 1,
         'they': 29,
         'themselves': 4,
         'alone': 1,
         'any': 3,
         'relationship': 2,
         'kids.': 2,
         'Ownership': 1,
         'applies': 2,
         'toa': 1,
         'commodity': 1,
         'anda': 1,
         'o': 17,
         'changed': 3,
         'shampoos': 2,
         'cut': 6,
         'dairy': 2,
         'litres': 2,
         'water': 6,
         'fast': 4,
         'food': 7,
         'no': 6,
         'change': 2,
         'folliculitis.': 2,
         'Finally': 2,
         'took': 5,
         'antibiotic': 3,
         'pills': 2,
         'made': 10,
         'completely': 3,
         'gone': 5,
         'came': 8,
         'worse': 5,
         'after.': 2,
         'Folliculitis': 2,
         'bacterial': 2,
         'fungal': 3,
         'idk': 2,
         'mine': 7,
         'imagined': 2,
         'MRSA.': 4,
         'So': 13,
         'searched': 4,
         'antiseptics': 2,
         'agents': 2,
         'MRSA': 2,
         'methicillin': 2,
         'resistant': 4,
         'staph': 2,
         'usually': 3,
         'antibiotics': 3,
         'dont': 2,
         'work': 13,
         'kind': 5,
         'hospitals': 2,
         'used': 6,
         'stations': 2,
         'tools': 3,
         'clean.': 2,
         'Found': 2,
         'research': 11,
         'papers': 2,
         'saying': 6,
         'things': 12,
         'chlorhexidine': 2,
         'inhibits': 4,
         'mrsa': 4,
         'good.': 3,
         'Then': 9,
         'thought': 7,
         'Dettol': 2,
         '4.8': 2,
         'choloroxylenol': 4,
         'solution': 3,
         'multipurpose': 2,
         'remembered': 4,
         'grandpa': 2,
         'bathes': 2,
         'dettol': 3,
         'must': 4,
         'safe': 2,
         'post': 3,
         'guy': 3,
         'he': 28,
         'Clorox': 2,
         'cure': 4,
         'his': 15,
         'folliculitis': 3,
         'worked.': 2,
         'clorox': 2,
         'There': 6,
         'study': 2,
         'showed': 2,
         'two': 4,
         'ingredients': 2,
         'Both': 2,
         'pH': 2,
         '12': 4,
         '13': 2,
         'super': 6,
         'alkali': 2,
         'know': 12,
         'week': 5,
         'ago': 2,
         'started': 5,
         'showering': 1,
         'caps': 1,
         'full': 6,
         'diluted': 1,
         '500m': 1,
         'every': 13,
         'day': 11,
         'hair': 3,
         'oily': 1,
         'didn': 7,
         'mind': 4,
         'rinsing': 1,
         'completely.': 1,
         'Now': 3,
         'm': 15,
         'gonna': 3,
         'rinse': 1,
         'other': 15,
         'happens': 1,
         'next.': 1,
         'Will': 3,
         'update': 1,
         'progress': 1,
         'Vote': 11,
         'J': 26,
         'Bi': 2,
         'M': 3,
         'Add': 12,
         'acomment': 12,
         'WEIS': 1,
         '2P': 1,
         '40p': 1,
         'o2ua8': 1,
         'yp': 1,
         'i': 7,
         'deyiaao': 1,
         'SpJ0M': 1,
         'Jo': 2,
         '3eq': 1,
         'suunseayy': 1,
         'z': 2,
         'e': 5,
         '7': 4,
         'Aeazze': 1,
         'YOTyM': 1,
         'edeysez': 2,
         'za': 1,
         'dn': 2,
         'aur': 1,
         'suorsuaump': 1,
         'IZUUT': 1,
         'I19q': 1,
         'Os': 1,
         'uO': 1,
         'Aq': 3,
         'XN': 1,
         'si0q994': 1,
         'UWIN': 1,
         'OD': 1,
         'OM': 1,
         'UO': 1,
         '10': 12,
         'e19d0': 1,
         'pueylioYUs': 1,
         'sity': 1,
         'asn': 1,
         'Ud': 1,
         'NOA': 1,
         'sodLeUT': 1,
         'NXT': 1,
         'TXN': 1,
         'O1UI': 1,
         'pausing': 1,
         'aq': 2,
         'ued': 2,
         's10': 1,
         '99A': 1,
         'Te': 2,
         'adUTS': 1,
         'Ioye19do': 1,
         'ay': 2,
         'uonoung': 1,
         'Thujew': 1,
         'du': 3,
         'yw': 1,
         'Adumu': 1,
         'ut': 2,
         'paysydurosse': 1,
         'oq': 1,
         'YIYM': 1,
         'fonpoud': 1,
         'xwjyoum': 1,
         'ayi': 1,
         '01': 5,
         'UaTeamba': 1,
         'st': 2,
         'yonposd': 1,
         '0p': 1,
         'oy': 1,
         'di': 1,
         'XUQVUL': 1,
         'XT': 1,
         'UIYIIM': 1,
         'Inpod': 1,
         'sepess': 1,
         'nod': 2,
         'sindjno': 1,
         'L': 1,
         'tT': 1,
         'ta': 1,
         'ssi': 1,
         'oT': 1,
         'Is': 2,
         '9': 14,
         'Sutsodsuen': 1,
         'auijedid': 1,
         'anoA': 1,
         'uMop': 1,
         'Mojs': 1,
         '03': 1,
         'juem': 1,
         'noX': 1,
         'ssajun': 1,
         'Aem': 1,
         'siy': 1,
         '29A': 1,
         'Oz': 2,
         'Y8nouy': 1,
         '9321331': 1,
         '3': 7,
         'Upjnoys': 1,
         'Noy': 1,
         'A': 12,
         'Ta': 1,
         'dt2': 1,
         'Z': 1,
         'xX': 1,
         'TxX': 1,
         '203': 2,
         '7X': 1,
         'x': 4,
         'ae': 3,
         'O': 5,
         'quaiaiya': 1,
         'Aida': 1,
         'Si': 1,
         'ey': 1,
         'UOIZeAadoO': 1,
         'paZ140': 1,
         '22A': 1,
         'CUE': 1,
         'Chae': 1,
         'aee': 1,
         'si': 1,
         'skesae': 1,
         'Aduinu': 1,
         'jo': 1,
         'uoije': 1,
         'd19jnNW': 1,
         'ROP': 1,
         'TA': 1,
         'LP': 1,
         'c': 3,
         'Bi2e': 1,
         'pd': 2,
         'hA': 1,
         'Le': 2,
         'Aez21e': 1,
         'onpoid': 1,
         'jop': 1,
         'JeYM': 1,
         'puejsIopun': 1,
         'Nok': 1,
         'ams': 1,
         'ayeur': 1,
         'peay': 1,
         'stuoyidg': 1,
         'Inod': 1,
         'ul': 1,
         'UNI': 1,
         'UPd': 1,
         'jaddius': 1,
         'uoyig': 1,
         'd19': 1,
         'sonpoid': 1,
         'uoneorydnynu': 1,
         'peutiou': 1,
         'ssouy': 1,
         'Surppe': 1,
         'Udy': 1,
         'pue': 1,
         '10199A': 1,
         'puodas': 1,
         'B': 1,
         'JO': 1,
         'sjUsWaTa': 1,
         'BY': 1,
         'TB': 1,
         '10399A': 1,
         '9UO': 1,
         'SyUDUITA': 1,
         'UW': 1,
         'surdidynur': 1,
         'hq': 1,
         'parepnoyes': 1,
         'ura': 1,
         'yonpoad': 1,
         'seyeos': 1,
         'ayy': 2,
         'indjno': 1,
         'onyea': 1,
         'sJeypess': 1,
         'worwruayo': 1,
         'psom': 1,
         'Cunjnqnooa': 1,
         'snok': 1,
         'pling': 1,
         'WaldvHO': 1,
         'casispie': 1,
         'hugealienpie': 1,
         'thechubbynerd': 1,
         'shower': 1,
         'thoughts': 2,
         'Contractions': 1,
         'function': 2,
         'almost': 1,
         'identically': 1,
         'word': 3,
         'phrase': 1,
         'appropriate': 1,
         'places': 1,
         'sentence.': 1,
         'weird': 2,
         'quirks': 1,
         'language': 3,
         'we': 29,
         've.': 1,
         'needs': 2,
         'warning': 1,
         'sign.': 1,
         'Idid': 1,
         'coming.': 1,
         'English': 1,
         'confusing.': 1,
         'To': 4,
         'Isay...': 1,
         's.': 1,
         'That': 7,
         'linguist': 1,
         'm.': 1,
         '527k': 2,
         '173k': 2,
         'elfmere': 1,
         '16h': 3,
         'Why': 1,
         'factory': 1,
         'when': 12,
         'an': 18,
         'honors': 1,
         'degree': 1,
         'physics....': 1,
         'Repy': 6,
         '83k': 1,
         'DillingerRadio': 1,
         '12h': 2,
         '2': 12,
         'Awards': 1,
         'worked': 2,
         'Home': 5,
         'Depot': 3,
         'ago.': 2,
         'distinctly': 1,
         'recall': 1,
         'coworker': 1,
         'who': 21,
         'tile': 2,
         'department': 1,
         'bit': 4,
         'eccentric.': 1,
         'loved': 2,
         'talking': 2,
         'real': 6,
         'personality': 1,
         'outgoing': 1,
         'knowledgeable': 1,
         'hands': 1,
         'on.': 2,
         'Ina': 1,
         'discussion': 1,
         'were': 8,
         'having': 3,
         'point': 2,
         'mentioned': 2,
         'education': 1,
         'Master': 1,
         'physics.': 1,
         'also': 11,
         'several': 3,
         'master': 1,
         'degrees': 2,
         'lol.': 2,
         'definitely': 1,
         'asked': 6,
         'hell': 2,
         'doing': 4,
         'working': 4,
         'turned': 1,
         'me': 21,
         'Dillinger': 1,
         'sometimes': 4,
         'gotta': 1,
         'love.': 1,
         'went': 6,
         'mention': 2,
         'enjoy': 2,
         'fields': 1,
         'despite': 2,
         'loving': 1,
         'various': 1,
         'subjects.': 1,
         'straight': 2,
         'manual': 1,
         'labor': 1,
         'hands.': 1,
         'wasn': 1,
         'either.': 1,
         'hardware': 1,
         'workers': 1,
         'pretty': 4,
         'old': 6,
         'man': 3,
         'definition': 1,
         'remind': 1,
         'legally': 1,
         'required': 2,
         'stop': 2,
         'throughout': 1,
         'vy': 1,
         'sinc': 1,
         'retirement': 1,
         'age': 1,
         'aniuildhin': 1,
         'ctannad': 1,
         'vwinvliine': 1,
         'Annadanc': 1,
         'ancn': 1,
         'cinnn': 1,
         '55': 1,
         'google.com': 3,
         'h': 1,
         'Google': 4,
         'strange': 1,
         'women': 1,
         'lying': 1,
         'ponds': 1,
         'distrib': 1,
         'ALL': 2,
         'IMAGES': 2,
         'SHOPPING': 1,
         'VIDEOS': 2,
         'NEWS': 2,
         'Latest': 1,
         'GIF': 1,
         'HD': 1,
         'Product': 1,
         'Gy': 1,
         'GP': 1,
         'moistened': 1,
         'bint': 1,
         'monty': 1,
         'python': 2,
         'holy': 2,
         'grail': 1,
         'Sponsored': 1,
         '24.99': 1,
         '17.91': 1,
         '19.9': 1,
         'Strange': 2,
         'Women': 2,
         'Lying': 2,
         'Strang': 1,
         'In': 8,
         'Ponds': 2,
         'Distributing...': 2,
         'Pon': 1,
         'Etsy': 1,
         'Redbubble': 1,
         'Turtles': 1,
         'STRANGE': 1,
         'WOMEN': 1,
         'AAI': 1,
         'Ca': 1,
         'Mail': 3,
         'glassdoor.com': 1,
         'GitHub': 2,
         'Policy': 1,
         'Details': 1,
         'We': 9,
         'encourage': 1,
         'Hubbers': 1,
         'build': 2,
         'amazing': 5,
         'high': 4,
         'autonomy': 1,
         'self': 4,
         'direction.': 1,
         'Work': 2,
         'balance': 2,
         'important': 1,
         'us': 3,
         'why': 7,
         'flexible': 1,
         'schedules': 1,
         'unlimited': 1,
         'PTO.': 1,
         'believe': 1,
         'allows': 1,
         'wherever': 1,
         're': 8,
         'happiest.': 1,
         'Where': 2,
         ...})
In [133]:
from sklearn.decomposition import TruncatedSVD
def get_lsa(data, index):
    tfidf = TfidfVectorizer(tokenizer=casual_tokenize)
    tfidf_docs = tfidf.fit_transform(raw_documents=data).toarray()
    # len(tfidf.vocabulary_)

    tfidf_docs = pd.DataFrame(tfidf_docs)
    tfidf_docs = tfidf_docs - tfidf_docs.mean()
    
    

#     columns = ['topic{}'.format(i) for i in range(pca.n_components)]    
#     pca = PCA(n_components = 16)
#     pca = pca.fit(tfidf_docs)
#     pca_topic_vectors = pca.transform(tfidf_docs)
#     columns = ['topic{}'.format(i) for i in range(pca.n_components)]
#     pca_topic_vectors = pd.DataFrame(pca_topic_vectors, columns = columns, index = index)
#     pca_topic_vectors.round(3).head(6)
#     column_nums, terms = zip(*sorted(zip(tfidf.vocabulary_.values(), 
#                                          tfidf.vocabulary_.keys())))
#     weights = pd.DataFrame(pca.components_, columns = terms, 
#                        index = ['topic{}'.format(i) for i in range(16)])
    
    svd = TruncatedSVD(n_components=16, n_iter=100)
    svd_topic_vectors = svd.fit_transform(tfidf_docs.values)
    svd_topic_vectors = pd.DataFrame(svd_topic_vectors, columns = columns, index = index)
    svd_topic_vectors.round(3).head(6)
    return terms, tfidf.vocabulary_, weights



tfidf_docs = pd.DataFrame(tfidf_docs)
tfidf_docs = tfidf_docs - tfidf_docs.mean()
    
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pandas/core/internals/managers.py in create_block_manager_from_blocks(blocks, axes)
   1680 
-> 1681         mgr = BlockManager(blocks, axes)
   1682         mgr._consolidate_inplace()

~/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pandas/core/internals/managers.py in __init__(self, blocks, axes, do_integrity_check)
    142         if do_integrity_check:
--> 143             self._verify_integrity()
    144 

~/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pandas/core/internals/managers.py in _verify_integrity(self)
    344             if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
--> 345                 construction_error(tot_items, block.shape[1:], self.axes)
    346         if len(self.items) != tot_items:

~/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pandas/core/internals/managers.py in construction_error(tot_items, block_shape, axes, e)
   1718     raise ValueError(
-> 1719         "Shape of passed values is {0}, indices imply {1}".format(passed, implied)
   1720     )

ValueError: Shape of passed values is (4837, 16), indices imply (58, 16)

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-133-1e1bbf590300> in <module>
      5 svd = TruncatedSVD(n_components=16, n_iter=100)
      6 svd_topic_vectors = svd.fit_transform(tfidf_docs.values)
----> 7 svd_topic_vectors = pd.DataFrame(svd_topic_vectors, columns = columns, index = index)
      8 svd_topic_vectors.round(3).head(6)

~/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
    438                 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
    439             else:
--> 440                 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
    441 
    442         # For data is list-like, or Iterable (will consume into list)

~/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pandas/core/internals/construction.py in init_ndarray(values, index, columns, dtype, copy)
    211         block_values = [values]
    212 
--> 213     return create_block_manager_from_blocks(block_values, [columns, index])
    214 
    215 

~/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pandas/core/internals/managers.py in create_block_manager_from_blocks(blocks, axes)
   1686         blocks = [getattr(b, "values", b) for b in blocks]
   1687         tot_items = sum(b.shape[0] for b in blocks)
-> 1688         construction_error(tot_items, blocks[0].shape[1:], axes, e)
   1689 
   1690 

~/anaconda3/envs/nlpiaenv/lib/python3.6/site-packages/pandas/core/internals/managers.py in construction_error(tot_items, block_shape, axes, e)
   1717         raise ValueError("Empty data passed with indices specified.")
   1718     raise ValueError(
-> 1719         "Shape of passed values is {0}, indices imply {1}".format(passed, implied)
   1720     )
   1721 

ValueError: Shape of passed values is (4837, 16), indices imply (58, 16)
In [132]:
len(columns)
Out[132]:
16
In [ ]: