Запросить помощь по вопросам веб-скраппинга

Я пытался найти данные на двух веб-сайтах, но столкнулся с проблемами. Я буду очень рад, если кто-нибудь сможет помочь в решении проблемы

1.https://online.capitalcube.com/ На сайте необходимо войти в систему. Я придумал следующий код после просмотра обучающих материалов на youtube в течение последних 2 дней.

from bs4 import BeautifulSoup
import pandas as pd
import requests

URL = 'https://online.capitalcube.com/'
LOGIN_ROUTE = '/login'

import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:93.0) Gecko/20100101 Firefox/93.0',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'none',
    'Sec-Fetch-User': '?1',
    'TE': 'trailers',
}

s = requests.session()

login_payload = {
    'email': '<intentionally removed it>',
    'password': '<intentionally removed it>'
}

login_req = s.post(URL + LOGIN_ROUTE, headers = headers, data = login_payload)

print(login_req.status_code)

Ошибка, которую я получаю, выглядит следующим образом

  1. The other website I am trying is stockedge.com I have come up with the following code
import requests
from bs4 import BeautifulSoup
import pandas as pd
import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:93.0) Gecko/20100101 Firefox/93.0',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Upgrade-Insecure-Requests': '1',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'none',
    'Sec-Fetch-User': '?1',
    'Connection': 'keep-alive',
}

ticker = 'hdfc-bank/5051?'
urls = {}
urls['balancesheet consolidated'] = f"https://web.stockedge.com/share/{ticker}section=balance-sheet"
urls['balancesheet standalone'] = f"https://web.stockedge.com/share/{ticker}section=balance-sheet&statement-type=standalone"
urls['profitloss consolidated'] = f"https://web.stockedge.com/share/{ticker}section=profit-loss&statement-type=consolidated"
urls['profitloss standalone'] = f"https://web.stockedge.com/share/{ticker}section=profit-loss&statement-type=standalone"
urls['cashflow consolidated'] = f"https://web.stockedge.com/share/{ticker}section=cash-flow"
urls['cashflow standalone'] = f"https://web.stockedge.com/share/{ticker}section=cash-flow&statement-type=standalone"
urls['quarterlyresults consolidated'] = f"https://web.stockedge.com/share/{ticker}section=results"
urls['quarterlyresults standalone'] = f"https://web.stockedge.com/share/{ticker}section=results&active-statement-type=Standalone"
urls['shareholding pattern'] = f"https://web.stockedge.com/share/{ticker}section=pattern"
urls['return ratios'] = f"https://web.stockedge.com/share/{ticker}section=ratios&ratio-id=roe"
urls['efficiency ratios'] = f"https://web.stockedge.com/share/{ticker}section=ratios&ratio-id=roe&ratio-category=efficiencyratios"
urls['growth ratios'] = f"https://web.stockedge.com/share/{ticker}section=ratios&ratio-id=roe&ratio-category=growthratios"
urls['solvency ratios'] = f"https://web.stockedge.com/share/{ticker}section=ratios&ratio-id=net_sales_growth&ratio-category=solvencyratios"
urls['cashflow ratios'] = f"https://web.stockedge.com/share/{ticker}section=ratios&ratio-id=net_sales_growth&ratio-category=cashflowratios"
urls['valuation ratios'] = f"https://web.stockedge.com/share/{ticker}section=ratios&ratio-id=net_sales_growth&ratio-category=valuationratios"

xlwriter = pd.ExcelWriter(f'financial statements ({ticker}).xlsx', engine='xlsxwriter')

for key in urls.keys():
    response = requests.get(urls[key], headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    df = pd.read_html(str(soup), attrs={'class': 'background md list-md hydrated'})[0]
    df.to_excel(xlwriter, sheet_name=key, index=False)

xlwriter.save()

Ошибка, которую я получаю

runfile('/Users/rafatsiddiqui/Downloads/scientificProject/Company Financial Webscrape.py', wdir='/Users/rafatsiddiqui/Downloads/scientificProject') Traceback (последний последний вызов): Файл "", строка 1, in File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 198, in runfile pydev_imports.execfile(filename, global_vars, local_vars) # выполнить сценарий Файл "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", строка 18, в execfile exec(compile(contents+"\n", file, 'exec'), glob, loc) Файл "/Users/rafatsiddiqui/Downloads/scientificProject/Company Financial Webscrape.py", строка 36, in xlwriter = pd.ExcelWriter(f'финансовые отчеты ({ticker}).xlsx', engine='xlsxwriter') Файл "/Users/rafatsiddiqui/opt/anaconda3/envs/scientificProject/lib/python3.9/site-packages/pandas/io/excel/_xlsxwriter.py", строка 191, in init. super().init( Файл "/Users/rafatsiddiqui/opt/anaconda3/envs/scientificProject/lib/python3.9/site-packages/pandas/io/excel/_base.py", строка 925, in init self.handles = get_handle( Файл "/Users/rafatsiddiqui/opt/anaconda3/envs/scientificProject/lib/python3.9/site-packages/pandas/io/common.py", строка 711, in get_handle handle = open(handle, ioargs.mode) FileNotFoundError: [Errno 2] Нет такого файла или каталога: 'financial statements (hdfc-bank/5051?).xlsx'

Вернуться на верх