利用playwright爬取boss直聘岗位信息

介绍

实训课上做的

老师用selenium,我用的playwright

感觉也还行,可以作为学习使用

WX20230622-224123@2x

环境

python3.10+(我也不知道以前的python能不能安装playwright)

playwright

pandas

playwright安装方式

python环境安装好后:

命令行:

pip install playwright

等待完成后继续:

playwright install

等待完成即可

pandas安装方式

命令行:

pip install pandas

等待完成即可

代码

import time
from typing import List, Any

from playwright.sync_api import Playwright, sync_playwright, expect

import pandas as pd

# def SearchWrok():

def run(playwright: Playwright,jobname,pageNum) -> list[list[Any]]:

    browser = playwright.chromium.launch(headless=False)
    context = browser.new_context()
    page = context.new_page()

    Jobs_Info_List = []

    for i in range(1, pageNum):
        page.goto(f"https://www.zhipin.com/web/geek/job?query={jobname}&city=101100100&page={i}")
        time.sleep(10)
        Jobs = page.query_selector_all(".job-card-wrapper")
        # print(Jobs)
        # Job = Jobs[0]


        for Job in Jobs:
            company_name = Job.query_selector('.company-name').text_content()
            company_tag_list = Job.query_selector('.company-tag-list').text_content()
            company_desc = Job.query_selector('.info-desc').text_content()
            job_name = Job.query_selector('.job-name').text_content()
            job_area = Job.query_selector('.job-area').text_content()
            job_salary = Job.query_selector('.salary').text_content()
            job_ask = Job.query_selector('.job-info .tag-list').text_content()
            job_tag = Job.query_selector('.job-card-footer .tag-list').text_content()
        # TagList = Job.query_selector('.tag-list').inner_text()


            Jobs_Info_List.append([company_name,job_name,job_area,job_salary, job_ask, company_tag_list ,job_tag , company_desc])

    context.close()
    browser.close()

    return Jobs_Info_List


with sync_playwright() as playwright:

    # 爬取页数
    pageNum = 3

    # 搜索工作名称
    jobname = '数据分析'
    data = run(playwright, jobname, pageNum)
    ls = pd.DataFrame(data, columns=['公司名称', '工作名称', '工作区域', '薪酬', '职位要求', '公司信息', '技能要求',  '公司福利'])
    print(ls)
    ls.to_excel('数据分析师.xlsx')

 

 

 

 

阅读剩余
THE END