dailylog 4-03-20
To be converted to a daily script
import camelot
import requests
import pandas as pd
url = "https://www.who.int/docs/default-source/coronaviruse/situation-reports/20200403-sitrep-74-covid-19-mp.pdf?"
# url = 'http://www.hrecos.org//images/Data/forweb/HRTVBSH.Metadata.pdf'
r = requests.get(url, stream=True)
# camelot --format csv --output ./foo.csv --pages 1-end lattice r.content
chunk_size = 2000
with open('metadata_3.pdf', 'wb') as fd:
for chunk in r.iter_content(chunk_size):
fd.write(chunk)
tables = camelot.read_pdf('metadata_3.pdf', pages='1-end')
big_df = []
for table in tables:
df = table.df
print('*****', df.shape)
if df.shape[1] >= 7:
num = df.shape[1]
df.columns = list(range(0,num))
if num > 7:
print('+++++++',num)
print(df.head())
smdf = df.drop(1, axis=1)
smdf.columns = list(range(0,smdf.shape[1]))
big_df.append(smdf)
else:
big_df.append(df)
frame = pd.concat(big_df, axis=0, ignore_index=True)
frame.to_csv('frametest_9.csv', index=False)
# print(frame)
# frame[0]
# if