- requestsとbs4をインストール
$ pip install requests $ pip install beautifulsoup4
- コードを書く
import requests from bs4 import BeautifulSoup import re import pandas as pd def parse_solaroff(url): soup = BeautifulSoup(requests.get(url).text, 'html.parser') records = [] for li in soup.select('.category-list li'): if not li.select_one('.price'): continue url = 'https://www.solar-off.com/' + li.select_one('.name').select_one('a').get('href') title = li.select_one('.name').get_text() m = re.match('【([^0-9]+)([0-9]+)W( (.*)発電)?】(.*)( | )太陽光パネル( | )([a-zA-Z0-9-]+)', title) material = m.group(1) if m else None power = float(m.group(2)) if m else None face = (m.group(4) or '片面') if m else None manufacture = m.group(5) if m else None model = m.group(8) if m else None price = li.select_one('.price').get_text() m = re.match('([0-9,]+)円\\(税込\\)', price) price = float(m.group(1).replace(',', '')) if m else None records.append({ 'url': url, 'title': title, 'material': material, 'power': power, 'face': face, 'manufacture': manufacture, 'model': model, 'price': price, }) return records if __name__ == '__main__': records = [] url = "https://www.solar-off.com/shopbrand/solarpanel/" records += parse_solaroff(url) url = "https://www.solar-off.com/shopbrand/solarpanel/page2/recommend/" records += parse_solaroff(url) df = pd.DataFrame(records) df = df.dropna() df['コスパ'] = df['price'] / df['power'] df.to_excel('output.xlsx')
- 実行する
$ python solar-off.py $ open output.xlsx
見る
これがW単価49.5円で最安だった。注文50枚からだったけど。