Python

WB Parser profitable for goods

TiHostLiza · Jun 15, 2025

Код
(фиксы от 22.08.25)
- Не шлет дубли
- Чекает все разделы
import asyncio
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse, urlunparse, parse_qs, urlencode
import telebot
import os

OUTPUT_FILENAME = "wildberries_cashback_products.txt"
PROFITABLE_OUTPUT_FILENAME = "wildberries_profitable_products.txt"
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
PROFITABILITY_THRESHOLD = 30.0 # Процент выгоды (30%)

# --- Telegram Bot Configuration ---
TELEGRAM_BOT_TOKEN = "" # вставь сюда токен
TELEGRAM_CHAT_ID = "" # вставь сюда ID/username канала

bot = telebot.TeleBot(TELEGRAM_BOT_TOKEN)

# --- глобальное множество уникальных товаров ---
processed_urls = set()

def clean_text(text):
return ' '.join(text.strip().split())

def parse_product_card_sync(card_html_soup):
try:
cashback_tag_container = card_html_soup.select_one('span.feedbacks-points-sum')
if not cashback_tag_container:
return None

cashback_text = cashback_tag_container.get_text(strip=True)
cashback_amount_match = re.search(r'(\d+)', cashback_text)
if not cashback_amount_match:
return None
cashback_amount_str = cashback_amount_match.group(1)
cashback_amount_num = int(cashback_amount_str)

link_tag = card_html_soup.select_one('a.product-card__link.j-card-link')
product_url = None
if link_tag and 'href' in link_tag.attrs:
url_value = link_tag['href']
if url_value.startswith('http'):
product_url = url_value
elif url_value.startswith('/'):
product_url = "https://www.wildberries.ru" + url_value
else:
product_url = "https://www.wildberries.ru/" + url_value.lstrip('/')

if not product_url:
return None

img_tag = card_html_soup.select_one('img.j-thumbnail')
product_name = 'N/A'
if img_tag and 'alt' in img_tag.attrs:
product_name = clean_text(img_tag['alt'])

full_price_str = 'N/A'
full_price_num = 0
full_price_tag = card_html_soup.select_one('ins.price__lower-price')
if full_price_tag:
price_text = full_price_tag.get_text(strip=True)
price_match = re.search(r'([\d\s]+)', price_text)
if price_match:
full_price_str = re.sub(r'\s+', '', price_match.group(1))
try:
full_price_num = int(full_price_str)
except:
full_price_num = 0

return {
'url': product_url, 'name': product_name,
'full_price_str': full_price_str, 'full_price_num': full_price_num,
'cashback_str': cashback_amount_str, 'cashback_num': cashback_amount_num,
}
except Exception as e:
print(f"[!] Ошибка при парсинге карточки товара: {e}")
return None

def scrape_page_html_content(page_html):
soup = BeautifulSoup(page_html, 'lxml')
product_card_elements = soup.select('div.product-card__wrapper')
found_products_with_cashback = []
for card_element in product_card_elements:
product_data = parse_product_card_sync(card_element)
if product_data:
found_products_with_cashback.append(product_data)
return found_products_with_cashback, len(product_card_elements) > 0

async def fetch_page_content_with_playwright(context, url_to_fetch):
page = await context.new_page()
print(f" Переход на: {url_to_fetch}")
try:
await page.goto(url_to_fetch, wait_until="domcontentloaded", timeout=60000)
await page.wait_for_selector('div.product-card__wrapper, div.skeleton-card__wrapper, .catalog-page__not-found, .search-page__text-empty', timeout=25000)
return await page.content()
except PlaywrightTimeoutError:
print(f"[!] Тайм-аут на {url_to_fetch}")
return None
except Exception as e:
print(f"[!] Ошибка при загрузке {url_to_fetch}: {e}")
return None
finally:
await page.close()

def construct_next_page_url(base_category_url, page_number):
parsed_original_url = urlparse(base_category_url)
query_params = parse_qs(parsed_original_url.query)
query_params['page'] = [str(page_number)]
if 'sort' not in query_params:
query_params['sort'] = ['popular']
query_params['ffeedbackpoints'] = ['1']
new_query_string = urlencode(query_params, doseq=True)
return urlunparse((
parsed_original_url.scheme, parsed_original_url.netloc, parsed_original_url.path,
parsed_original_url.params, new_query_string, parsed_original_url.fragment
))

async def send_telegram_notification(product_info, is_profitable):
message_text = (
f"Новый товар с кэшбэком {'(ВЫГОДНЫЙ)' if is_profitable else ''}:\n"
f"Название: {product_info['name']}\n"
f"Цена: {product_info['full_price_str']} ₽\n"
f"Кэшбэк: {product_info['cashback_str']} ₽\n"
f"Ссылка: {product_info['url']}"
)
try:
await asyncio.to_thread(bot.send_message, TELEGRAM_CHAT_ID, message_text)
except Exception as e:
print(f"[!!!] Ошибка Telegram: {e}")

async def process_paginated_category(context, base_url):
current_page = 1
items_processed_in_this_run = 0
empty_pages_in_row = 0

while True:
target_url = construct_next_page_url(base_url, current_page)
print(f"\nОбрабатываю страницу {current_page}: {target_url}")
html_content = await fetch_page_content_with_playwright(context, target_url)
if not html_content:
empty_pages_in_row += 1
if empty_pages_in_row >= 3:
print(" [*] Три пустые страницы подряд — останавливаюсь.")
break
current_page += 1
continue

cashback_items, were_any_cards = scrape_page_html_content(html_content)
if not were_any_cards:
empty_pages_in_row += 1
if empty_pages_in_row >= 3:
print(" [*] Три страницы без карточек подряд — конец категории.")
break
current_page += 1
continue

empty_pages_in_row = 0 # сбрасываем, так как нашли карточки

if cashback_items:
print(f" [+] Найдено {len(cashback_items)} товаров с кэшбэком")
for item in cashback_items:
# --- уникальность товаров ---
if item['url'] in processed_urls:
continue
processed_urls.add(item['url'])

items_processed_in_this_run += 1
is_profitable = False
if item['full_price_num'] > 0:
profit_percentage = (item['cashback_num'] / item['full_price_num']) * 100
if profit_percentage >= PROFITABILITY_THRESHOLD:
is_profitable = True

file_to_write = PROFITABLE_OUTPUT_FILENAME if is_profitable else OUTPUT_FILENAME
with open(file_to_write, 'a', encoding='utf-8') as f:
f.write(f"{item['url']} | {item['name']} | {item['full_price_str']} | {item['cashback_str']}\n")

if is_profitable:
await send_telegram_notification(item, is_profitable)

else:
print(" [-] Товары с кэшбэком не найдены.")

current_page += 1
await asyncio.sleep(2)
return items_processed_in_this_run

async def main():
urls_to_process = []
use_links_file = input("Хотите использовать файл links.txt? (y/n): ").lower()
if use_links_file == 'y':
with open("links.txt", 'r', encoding='utf-8') as f:
urls_to_process = [line.strip() for line in f if line.strip()]
else:
initial_url = input("Введите ссылку: ")
urls_to_process.append(initial_url)

# Инициализация файлов
header_line = "Ссылка | Название | Цена (₽) | Кэшбэк (₽)\n" + "="*100 + "\n"
with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as f:
f.write(header_line)
with open(PROFITABLE_OUTPUT_FILENAME, 'w', encoding='utf-8') as f:
f.write(header_line)

total_items_found = 0

async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(user_agent=USER_AGENT)

for url in urls_to_process:
print(f"\n===== Обработка ссылки: {url} =====")
total_items_found += await process_paginated_category(context, url)

await context.close()
await browser.close()

print(f"\nСканирование завершено. Найдено товаров: {total_items_found}")

if __name__ == "__main__":
asyncio.run(main())

Python
import asyncio

from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError

from bs4 import BeautifulSoup

import re

from urllib.parse import urlparse, urlunparse, parse_qs, urlencode

import telebot

import os



OUTPUT_FILENAME = "wildberries_cashback_products.txt"

PROFITABLE_OUTPUT_FILENAME = "wildberries_profitable_products.txt"

USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'

PROFITABILITY_THRESHOLD = 30.0 # Процент выгоды (30%)



# --- Telegram Bot Configuration ---

TELEGRAM_BOT_TOKEN = ""  # вставь сюда токен

TELEGRAM_CHAT_ID = ""    # вставь сюда ID/username канала



bot = telebot.TeleBot(TELEGRAM_BOT_TOKEN)



# --- глобальное множество уникальных товаров ---

processed_urls = set()



def clean_text(text):

    return ' '.join(text.strip().split())



def parse_product_card_sync(card_html_soup):

    try:

        cashback_tag_container = card_html_soup.select_one('span.feedbacks-points-sum')

        if not cashback_tag_container:

            return None



        cashback_text = cashback_tag_container.get_text(strip=True)

        cashback_amount_match = re.search(r'(\d+)', cashback_text)

        if not cashback_amount_match:

            return None

        cashback_amount_str = cashback_amount_match.group(1)

        cashback_amount_num = int(cashback_amount_str)



        link_tag = card_html_soup.select_one('a.product-card__link.j-card-link')

        product_url = None

        if link_tag and 'href' in link_tag.attrs:

            url_value = link_tag['href']

            if url_value.startswith('http'):

                product_url = url_value

            elif url_value.startswith('/'):

                product_url = "https://www.wildberries.ru" + url_value

            else:

                product_url = "https://www.wildberries.ru/" + url_value.lstrip('/')



        if not product_url:

            return None



        img_tag = card_html_soup.select_one('img.j-thumbnail')

        product_name = 'N/A'

        if img_tag and 'alt' in img_tag.attrs:

            product_name = clean_text(img_tag['alt'])



        full_price_str = 'N/A'

        full_price_num = 0

        full_price_tag = card_html_soup.select_one('ins.price__lower-price')

        if full_price_tag:

            price_text = full_price_tag.get_text(strip=True)

            price_match = re.search(r'([\d\s]+)', price_text)

            if price_match:

                full_price_str = re.sub(r'\s+', '', price_match.group(1))

                try:

                    full_price_num = int(full_price_str)

                except:

                    full_price_num = 0



        return {

            'url': product_url, 'name': product_name,

            'full_price_str': full_price_str, 'full_price_num': full_price_num,

            'cashback_str': cashback_amount_str, 'cashback_num': cashback_amount_num,

        }

    except Exception as e:

        print(f"[!] Ошибка при парсинге карточки товара: {e}")

        return None



def scrape_page_html_content(page_html):

    soup = BeautifulSoup(page_html, 'lxml')

    product_card_elements = soup.select('div.product-card__wrapper')

    found_products_with_cashback = []

    for card_element in product_card_elements:

        product_data = parse_product_card_sync(card_element)

        if product_data:

            found_products_with_cashback.append(product_data)

    return found_products_with_cashback, len(product_card_elements) > 0



async def fetch_page_content_with_playwright(context, url_to_fetch):

    page = await context.new_page()

    print(f"  Переход на: {url_to_fetch}")

    try:

        await page.goto(url_to_fetch, wait_until="domcontentloaded", timeout=60000)

        await page.wait_for_selector('div.product-card__wrapper, div.skeleton-card__wrapper, .catalog-page__not-found, .search-page__text-empty', timeout=25000)

        return await page.content()

    except PlaywrightTimeoutError:

        print(f"[!] Тайм-аут на {url_to_fetch}")

        return None

    except Exception as e:

        print(f"[!] Ошибка при загрузке {url_to_fetch}: {e}")

        return None

    finally:

        await page.close()



def construct_next_page_url(base_category_url, page_number):

    parsed_original_url = urlparse(base_category_url)

    query_params = parse_qs(parsed_original_url.query)

    query_params['page'] = [str(page_number)]

    if 'sort' not in query_params:

        query_params['sort'] = ['popular']

    query_params['ffeedbackpoints'] = ['1']

    new_query_string = urlencode(query_params, doseq=True)

    return urlunparse((

        parsed_original_url.scheme, parsed_original_url.netloc, parsed_original_url.path,

        parsed_original_url.params, new_query_string, parsed_original_url.fragment

    ))



async def send_telegram_notification(product_info, is_profitable):

    message_text = (

        f"Новый товар с кэшбэком {'(ВЫГОДНЫЙ)' if is_profitable else ''}:\n"

        f"Название: {product_info['name']}\n"

        f"Цена: {product_info['full_price_str']} ₽\n"

        f"Кэшбэк: {product_info['cashback_str']} ₽\n"

        f"Ссылка: {product_info['url']}"

    )

    try:

        await asyncio.to_thread(bot.send_message, TELEGRAM_CHAT_ID, message_text)

    except Exception as e:

        print(f"[!!!] Ошибка Telegram: {e}")



async def process_paginated_category(context, base_url):

    current_page = 1

    items_processed_in_this_run = 0

    empty_pages_in_row = 0



    while True:

        target_url = construct_next_page_url(base_url, current_page)

        print(f"\nОбрабатываю страницу {current_page}: {target_url}")

        html_content = await fetch_page_content_with_playwright(context, target_url)

        if not html_content:

            empty_pages_in_row += 1

            if empty_pages_in_row >= 3:

                print("  [*] Три пустые страницы подряд — останавливаюсь.")

                break

            current_page += 1

            continue



        cashback_items, were_any_cards = scrape_page_html_content(html_content)

        if not were_any_cards:

            empty_pages_in_row += 1

            if empty_pages_in_row >= 3:

                print("  [*] Три страницы без карточек подряд — конец категории.")

                break

            current_page += 1

            continue



        empty_pages_in_row = 0  # сбрасываем, так как нашли карточки



        if cashback_items:

            print(f"  [+] Найдено {len(cashback_items)} товаров с кэшбэком")

            for item in cashback_items:

                # --- уникальность товаров ---

                if item['url'] in processed_urls:

                    continue

                processed_urls.add(item['url'])



                items_processed_in_this_run += 1

                is_profitable = False

                if item['full_price_num'] > 0:

                    profit_percentage = (item['cashback_num'] / item['full_price_num']) * 100

                    if profit_percentage >= PROFITABILITY_THRESHOLD:

                        is_profitable = True



                file_to_write = PROFITABLE_OUTPUT_FILENAME if is_profitable else OUTPUT_FILENAME

                with open(file_to_write, 'a', encoding='utf-8') as f:

                    f.write(f"{item['url']} | {item['name']} | {item['full_price_str']} | {item['cashback_str']}\n")



                if is_profitable:

                    await send_telegram_notification(item, is_profitable)



        else:

            print("  [-] Товары с кэшбэком не найдены.")



        current_page += 1

        await asyncio.sleep(2)

    return items_processed_in_this_run



async def main():

    urls_to_process = []

    use_links_file = input("Хотите использовать файл links.txt? (y/n): ").lower()

    if use_links_file == 'y':

        with open("links.txt", 'r', encoding='utf-8') as f:

            urls_to_process = [line.strip() for line in f if line.strip()]

    else:

        initial_url = input("Введите ссылку: ")

        urls_to_process.append(initial_url)



    # Инициализация файлов

    header_line = "Ссылка | Название | Цена (₽) | Кэшбэк (₽)\n" + "="*100 + "\n"

    with open(OUTPUT_FILENAME, 'w', encoding='utf-8') as f:

        f.write(header_line)

    with open(PROFITABLE_OUTPUT_FILENAME, 'w', encoding='utf-8') as f:

        f.write(header_line)



    total_items_found = 0



    async with async_playwright() as p:

        browser = await p.chromium.launch(headless=True)

        context = await browser.new_context(user_agent=USER_AGENT)



        for url in urls_to_process:

            print(f"\n===== Обработка ссылки: {url} =====")

            total_items_found += await process_paginated_category(context, url)



        await context.close()

        await browser.close()



    print(f"\nСканирование завершено. Найдено товаров: {total_items_found}")



if __name__ == "__main__":

    asyncio.run(main())
Зависимости:

pip install playwright beautifulsoup4 lxml pyTelegramBotAPI

Настройки:

Процент выгоды - на живом примере объясню
Ставишь 30% - будет приxодить в тг бота товар не менее 30% от суммы товара, пример - платье за 1000, с кэшбеком 300
Ставишь 110% - придет платье за 1000, но с кэшбеком за 1100

Процесс работы:

Можно создать links.txt рядом с скриптом с нужными разделами

Можно парсить вручную одну категорию или главную страницу

Что приxодит в канал/чат

morphosed · Jun 15, 2025

Проверка пройдена

Y4sperMaglot · Jun 15, 2025

Выглядит круто, красава

onlycash_pay · Jun 16, 2025

ебать ты придумал , красавчик

ЧАЛЛИК · Jun 24, 2025

ВБ лев

Lexax5l6 · Jul 24, 2025

Работает криво, парсит только с главной страницы. Если указать категорию, то непонятно, что он парсит. Скорее всего в фильтр добавляется в ссылке (за отзыв). Тогда зачем дальше парсить, если товаров не найдено. Если появляется ошибка, скрипт останавливает. Взять за основу, переделать, будет норм

Automatic raising of topics via API | Python

Authorized Railway for freebie, luxury, for the elite

Random VKontakte status

Autostatus VKontakte with your audio recordings

Telegram channel event notifications (e.g. new/left members)

Pyfragment - async Python SDK for Fragment (Stars, Premium, Ads, Marketplace)

AsyncPayments | Accept payments in your projects.

Telegram bot: Gemini retells all voice messages that you receive in DM

Fake session of any device in Telegram

Flutter + Python password manager

Telegram | Getting StringSessionn by Auth Key and DC ID with the subsequent entrance to the Telethon account

Script for viewing/cleaning VK conversations

Autostatus in VK and autobio in Telegram with the current Spotify song

Automatically send gifts to yourself from autoregistrations

A script that recovers your deleted VKontakte messages

Script for downloading all VK music + with cover + with text MusixMatch or Genius

Quick country change on multiple VK accounts

Cleaning VK friends: Deleting all those with whom there are no mutual friends

First library Fragment.com Full Free

Notifications that the user is online in VK

DonationAlerts notifications in Telegram V2

Automatically invite a participant to a VK conversation as soon as possible

[Free] Plugins for Funpay Cardinal - Lot Cleaner, AutoTicket, Chat Sync, GPT and 6 more pieces

Bot template VKontakte

Script for those who distribute free VK gifts

Script for those who complete the "Active 90" trophy

YouTube Video Archives | Python

Telegram bot for drawings

Telegram bot template

PyPlayerokAPI - SDK for working with Playerok

Python

WB Parser profitable for goods

Automatic raising of topics via API | Python

Authorized Railway for freebie, luxury, for the elite

Random VKontakte status

Autostatus VKontakte with your audio recordings

Telegram channel event notifications (e.g. new/left members)

Pyfragment - async Python SDK for Fragment (Stars, Premium, Ads, Marketplace)

AsyncPayments | Accept payments in your projects.

Telegram bot: Gemini retells all voice messages that you receive in DM

Fake session of any device in Telegram

Flutter + Python password manager

Telegram | Getting StringSessionn by Auth Key and DC ID with the subsequent entrance to the Telethon account

Script for viewing/cleaning VK conversations

Autostatus in VK and autobio in Telegram with the current Spotify song

Automatically send gifts to yourself from autoregistrations

A script that recovers your deleted VKontakte messages

Script for downloading all VK music + with cover + with text MusixMatch or Genius

Quick country change on multiple VK accounts

Cleaning VK friends: Deleting all those with whom there are no mutual friends

First library Fragment.com Full Free

Notifications that the user is online in VK

DonationAlerts notifications in Telegram V2

Automatically invite a participant to a VK conversation as soon as possible

[Free] Plugins for Funpay Cardinal - Lot Cleaner, AutoTicket, Chat Sync, GPT and 6 more pieces

Bot template VKontakte

Script for those who distribute free VK gifts

Script for those who complete the "Active 90" trophy

YouTube Video Archives | Python

Telegram bot for drawings

Telegram bot template

PyPlayerokAPI - SDK for working with Playerok

Python WB Parser profitable for goods

Python

WB Parser profitable for goods