In [2]:
import re
import urllib
from bs4 import BeautifulSoup
In [116]:
def get_shirts(category):
    url = "https://www.jcrew.com/c/womens_category/"+category+"?Npge=1&Nrpp=1000"
    html = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(html, 'html.parser')
    text = soup.findAll("div", {"class": "product-tile"})
    items = []
    for num,t in enumerate(text):
        if t.get('data-product') != None:
            d = eval(t.get('data-product'))
            items.append(str(d['id']) + '_' + str(d['color']))
        else:
            print(t)
    return items
In [133]:
categories = ['shirts_tops','pants','denim_jeans','dressesandjumpsuits','shoes']
In [134]:
items = get_shirts('shoes')
In [135]:
len(items)
Out[135]:
308
In [136]:
items
Out[136]:
['AJ767_BK0001',
 'AJ920_YL5596',
 'AK042_BL8133',
 'AJ768_PK5652',
 'L5473_YL5596',
 'L5472_BK0001',
 'L5472_BK0001',
 'AJ839_WX9602',
 'AJ143_EB0226',
 'AJ755_BK0001',
 'AJ755_BK0001',
 'AJ757_EB4970',
 'AJ758_WX9647',
 'AJ756_BR6669',
 'AJ756_BR6669',
 'AJ151_EB0226',
 'AJ759_YL5612',
 'AJ843_BK0001',
 'AJ843_BK0001',
 'AJ843_BK0001',
 'AJ843_BK0001',
 'AK039_PK6065',
 'AK040_EB4977',
 'AK041_EB0226',
 'AJ845_PK5561',
 'AJ845_PK5561',
 'AK043_NA6434',
 'AK043_NA6434',
 'AJ838_BK0001',
 'AJ838_BK0001',
 'AJ838_BK0001',
 'AL166_PK5652',
 'AK796_BL8133',
 'AK796_BL8133',
 'AJ793_YL5596',
 'AJ793_YL5596',
 'AK685_EB5818',
 'AK685_EB5818',
 'L5198_BK0001',
 'L5198_BK0001',
 'L5198_BK0001',
 'L5197_EG7663',
 'L5484_YL5596',
 'L5483_BK0001',
 'L5483_BK0001',
 'L5425_EB2713',
 'L5471_EB2710',
 'L5467_EB2711',
 'K5912_EB0936',
 'M0284_BK0001',
 'M0283_BK0001',
 'K5858_EB0928',
 'H7174_EG6433',
 'H7174_EG6433',
 'H7174_EG6433',
 'M0438_EF2044',
 'M0438_EF2044',
 'AJ814_BK0001',
 'AJ814_BK0001',
 'AJ814_BK0001',
 'AJ814_BK0001',
 'AJ147_EB0226',
 'AJ813_BK0001',
 'AJ813_BK0001',
 'AJ815_BL8133',
 'AJ815_BL8133',
 'AJ745_BK0001',
 'AJ745_BK0001',
 'AJ745_BK0001',
 'AJ745_BK0001',
 'AJ816_BR6129',
 'AJ816_BR6129',
 'AJ823_BR6129',
 'AJ823_BR6129',
 'AJ823_BR6129',
 'L5458_null',
 'L5458_null',
 'AJ808_WX9647',
 'AJ811_WX9597',
 'AL939_BK0001',
 'AL939_BK0001',
 'AB095_BR6923',
 'AB095_BR6923',
 'K9477_BK0001',
 'K9477_BK0001',
 'L3368_EB0226',
 'H5523_BK0001',
 'H5523_BK0001',
 'H5523_BK0001',
 'H5523_BK0001',
 'J8497_BK0001',
 'J8497_BK0001',
 'L5490_NA6434',
 'H5486_BK0001',
 'AJ737_WX9645',
 'AJ802_EB4970',
 'AJ800_WX9647',
 'AJ747_BL5494',
 'AJ747_BL5494',
 'AJ748_WX9647',
 'AJ742_WX9597',
 'AK360_EB0226',
 'AK359_NA6445',
 'AJ746_WX9619',
 'A4969_BK0001',
 'A4969_BK0001',
 'A4969_BK0001',
 'K7890_EB0226',
 'AJ752_EB4939',
 'K9476_EB0226',
 'J8205_BK0001',
 'J8205_BK0001',
 'J8205_BK0001',
 'J8205_BK0001',
 'J8205_BK0001',
 'J8205_BK0001',
 'J8205_BK0001',
 'J8205_BK0001',
 'J8206_EF7730',
 'AB064_BK0001',
 'AB064_BK0001',
 'AB080_EB0226',
 'AJ164_EB0226',
 'AK038_NA6488',
 'AK038_NA6488',
 'AJ166_EB0226',
 'AE925_BK0001',
 'AE922_EB0226',
 'AL820_EB5809',
 'AL820_EB5809',
 'AL906_EB5809',
 'AL906_EB5809',
 'L5483_BK0001',
 'L5483_BK0001',
 'L5484_YL5596',
 'G8128_BK0001',
 'G8128_BK0001',
 'G8128_BK0001',
 'L5424_EB2713',
 'L5425_EB2713',
 'AB093_EB0226',
 'AL821_EB5810',
 'AM080_EB6540',
 'L7197_EB1877',
 'L6712_EB1880',
 'AM135_EB6539',
 'AI254_EB5986',
 'AI255_EB5502',
 'AI256_EB5987',
 'AI257_EB5499',
 'AO579_EB7138',
 'AI291_EB6192',
 'AI291_EB6192',
 'AI258_EB5987',
 'AD996_EB3676',
 'AD996_EB3676',
 'E8592_EF9319',
 'E8592_EF9319',
 'AI280_EB5817',
 'AI280_EB5817',
 'AI279_EB5817',
 'AI169_EB6201',
 'AI170_EB6202',
 'AG179_EB5285',
 'AG179_EB5285',
 'AG180_EB5287',
 'M0478_null',
 'K7501_EB1678',
 'AC246_EB3828',
 'L3433_EB2314',
 'J9122_EG9262',
 'M0476_EB3670',
 'M0477_EB3669',
 'K0724_EG5387',
 'G8302_EF0173',
 'G8302_EF0173',
 'H7498_EG5849',
 'H7498_EG5849',
 'H7498_EG5849',
 'H7498_EG5849',
 'L8741_EB3181',
 'L8737_EB3180',
 'K1104_EG5387',
 'J4393_PK0171',
 'J4369_WT0001',
 'J4395_WT0001',
 'J4392_BK0001',
 'J4370_GY0007',
 'J4394_BK0001',
 'L3603_DM0574',
 'J4368_GY0007',
 'J4308_BK0001',
 'M0633_EB3673',
 'L7979_WC6066',
 'L5490_NA6434',
 'AJ166_EB0226',
 '64498_BK0001',
 'L2637_YL5596',
 'L0335_EB0226',
 'L9358_BR0968',
 'L3690_BL0021',
 'L3688_BL0021',
 'L3687_BK0001',
 'AJ153_EB0226',
 'AE851_EB0226',
 'AB101_BK0001',
 'AB101_BK0001',
 'AE853_BK0001',
 'AE854_GY6841',
 'AE855_EG7920',
 'AE909_BK0001',
 'AB114_EB0226',
 'K2741_BK0001',
 'K2741_BK0001',
 'AE958_BK0001',
 'AE958_BK0001',
 'K2716_BK0001',
 'AE920_BK0001',
 'AB115_BR6558',
 'AB111_NA6434',
 'AB129_BK0001',
 'AB113_EB0226',
 'AB944_BK0001',
 'J9988_BK0001',
 'AB131_BK0001',
 'AB131_BK0001',
 'AC329_BK0001',
 'AB108_NA6434',
 'AC328_BR6948',
 'AH101_EB4680',
 'AH101_EB4680',
 'K0055_BK0001',
 'K0043_EG6849',
 'AB078_NA6445',
 'AB133_BK0001',
 'AC041_BK0001',
 'AC041_BK0001',
 'K5335_EB0226',
 'AE998_YL5602',
 'F8444_BR5735',
 'F8444_BR5735',
 'F8444_BR5735',
 'H1891_BR5735',
 'H1891_BR5735',
 'AD458_EB3683',
 'B4254_BK0001',
 'J9522_BK0001',
 'E2599_EF4127',
 'M2732_BK0001',
 'M2733_BR0866',
 'AK041_EB0226',
 'AK040_EB4977',
 'AK039_PK6065',
 'AJ840_EB4956',
 'AJ841_EB4970',
 'AJ732_WX9501',
 'AK556_BK0001',
 'L5458_null',
 'L5458_null',
 'AB095_BR6923',
 'AB095_BR6923',
 'AB093_EB0226',
 'J8497_BK0001',
 'J8497_BK0001',
 'L9357_null',
 'L9259_null',
 'AJ811_WX9597',
 'AJ808_WX9647',
 'AJ823_BR6129',
 'AJ823_BR6129',
 'AJ823_BR6129',
 'AL939_BK0001',
 'AL939_BK0001',
 'AL940_BR6755',
 'AL941_GY6589',
 'AJ745_BK0001',
 'AJ745_BK0001',
 'AJ745_BK0001',
 'AJ745_BK0001',
 'AB071_BK0001',
 'H5523_BK0001',
 'H5523_BK0001',
 'H5523_BK0001',
 'H5523_BK0001',
 'J8497_BK0001',
 'J8497_BK0001',
 'L1310_BK5307',
 'L1310_BK5307',
 'L1310_BK5307',
 'L1310_BK5307',
 'L1310_BK5307',
 'L1310_BK5307',
 'L1310_BK5307',
 'L8728_BL0004',
 'L8728_BL0004',
 'L8728_BL0004',
 'L8728_BL0004',
 'L8728_BL0004',
 'AE916_EB0226',
 'K4997_BR6269',
 'K4997_BR6269',
 'L8296_null',
 'M3898_null',
 'M3899_null',
 'M2734_EC5461',
 'M2735_EC8687',
 'M2736_WC6066',
 'M2620_EF2044']
In [185]:
import urllib.request 

category = 'shoes'
for item in items:
    url = 'https://www.jcrew.com/s7-img-facade/' + item + '?fmt=jpeg&qlt=90,0&resMode=sharp&op_usm=.1,0,0,0&crop=0,0,0,0&wid=160&hei=160'
    filename = category + '_'+ item + '.jpeg'
    urllib.request.urlretrieve(url, filename)
In [285]:
import matplotlib.image
read_img = matplotlib.image.imread('test_shoe.jpeg')
In [284]:
read_img
Out[284]:
array([[[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       ...,

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]]], dtype=uint8)
In [286]:
read_img = matplotlib.image.imread('test_shoe.png')
read_img
Out[286]:
array([[[0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        ...,
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549]],

       [[0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        ...,
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549]],

       [[0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        ...,
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549]],

       ...,

       [[0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        ...,
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549]],

       [[0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        ...,
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549]],

       [[0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        ...,
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549],
        [0.972549, 0.972549, 0.972549]]], dtype=float32)
In [268]:
from PIL import Image
# image = Image.open("jcrew/shoes_M2620_EF2044.png")
# image = Image.open("jcrew/shoes_M2620_EF2044_bw.png")
image = Image.open("test_shoe.png")
# image = Image.open("test_shoe.jpeg")
# image = image.convert('1')
# image.show()
In [269]:
import numpy as np
np_im = np.array(image)
In [270]:
# bw = np_im/255.0
np_im
Out[270]:
array([[[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       ...,

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]]], dtype=uint8)
In [254]:
import matplotlib.pyplot as plt
In [255]:
plt.imshow(bw, cmap=plt.cm.binary)
plt.show()
In [271]:
import imageio
# image = imageio.imread(r'test_shoe.png', as_gray=True)
image = imageio.imread(r'test_shoe.png')
In [272]:
image
Out[272]:
Array([[[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       ...,

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]]], dtype=uint8)
In [273]:
bw = image
In [274]:
bw
Out[274]:
Array([[[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       ...,

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]],

       [[248, 248, 248],
        [248, 248, 248],
        [248, 248, 248],
        ...,
        [248, 248, 248],
        [248, 248, 248],
        [248, 248, 248]]], dtype=uint8)
In [275]:
plt.imshow(bw)
plt.show()
In [276]:
bw2 = bw/255.0
In [277]:
plt.imshow(bw2)
plt.show()
In [227]:
bw2
Out[227]:
Array([[[0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        ...,
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902]],

       [[0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        ...,
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902]],

       [[0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        ...,
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902]],

       ...,

       [[0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        ...,
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902]],

       [[0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        ...,
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902]],

       [[0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        ...,
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902],
        [0.97254902, 0.97254902, 0.97254902]]])
In [219]:
bw2
Out[219]:
Array([[0.972549, 0.972549, 0.972549, ..., 0.972549, 0.972549, 0.972549],
       [0.972549, 0.972549, 0.972549, ..., 0.972549, 0.972549, 0.972549],
       [0.972549, 0.972549, 0.972549, ..., 0.972549, 0.972549, 0.972549],
       ...,
       [0.972549, 0.972549, 0.972549, ..., 0.972549, 0.972549, 0.972549],
       [0.972549, 0.972549, 0.972549, ..., 0.972549, 0.972549, 0.972549],
       [0.972549, 0.972549, 0.972549, ..., 0.972549, 0.972549, 0.972549]],
      dtype=float32)
In [ ]: