ソーラーオフで最もコスパの高い太陽光パネルを探す

  1. requestsとbs4をインストール
$ pip install requests
$ pip install beautifulsoup4
  1. コードを書く
import requests
from bs4 import BeautifulSoup

import re

import pandas as pd


def parse_solaroff(url):
    soup = BeautifulSoup(requests.get(url).text, 'html.parser')

    records = []
    for li in soup.select('.category-list li'):
        if not li.select_one('.price'):
            continue

        url = 'https://www.solar-off.com/' + li.select_one('.name').select_one('a').get('href')

        title = li.select_one('.name').get_text()
        m = re.match('【([^0-9]+)([0-9]+)W( (.*)発電)?】(.*)( | )太陽光パネル( | )([a-zA-Z0-9-]+)', title)
        material = m.group(1) if m else None
        power = float(m.group(2)) if m else None
        face = (m.group(4) or '片面') if m else None
        manufacture = m.group(5) if m else None
        model = m.group(8) if m else None

        price = li.select_one('.price').get_text()
        m = re.match('([0-9,]+)円\\(税込\\)', price)
        price = float(m.group(1).replace(',', '')) if m else None

        records.append({
            'url': url,
            'title': title,
            'material': material,
            'power': power,
            'face': face,
            'manufacture': manufacture,
            'model': model,
            'price': price,
        })
    return records


if __name__ == '__main__':
    records = []
    url = "https://www.solar-off.com/shopbrand/solarpanel/"
    records += parse_solaroff(url)
    url = "https://www.solar-off.com/shopbrand/solarpanel/page2/recommend/"
    records += parse_solaroff(url)

    df = pd.DataFrame(records)
    df = df.dropna()

    df['コスパ'] = df['price'] / df['power']
    df.to_excel('output.xlsx')
  1. 実行する
$ python solar-off.py
$ open output.xlsx
  1. 見る

  2. これがW単価49.5円で最安だった。注文50枚からだったけど。

www.solar-off.com