【Python爬虫】利用爬虫抓取双色球开奖号码,获取完整数据,简洁45行代码实现,更新时间2023-06-28
作者:mmseoamin日期:2023-12-05

链接:https://pan.baidu.com/s/18oE308_NVNPaCOACw_H5Hw?pwd=abc1 

利用爬虫抓取双色球开奖号码,获取完整数据,简洁45行代码实现,更新时间2023-06-28

这是网上的数据,怎么将它爬取下来

【Python爬虫】利用爬虫抓取双色球开奖号码,获取完整数据,简洁45行代码实现,更新时间2023-06-28,第1张

它将只爬取最近30期的双色球开奖号码,并将结果写入到名为 "双色球开奖结果.csv" 的文件中。

import requests
import os
from bs4 import BeautifulSoup
def download(url, page):
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'html.parser')
    list = soup.select('div.ball_box01 ul li')
    ball = []
    for li in list:
        ball.append(li.string)
    write_to_excel(page, ball)
    print(f"第{page}期开奖结果录入完成")
def write_to_excel(page, ball):
    f = open('双色球开奖结果.csv', 'a', encoding='utf_8_sig')
    f.write(f'第{page}期,{ball[0]},{ball[1]},{ball[2]},{ball[3]},{ball[4]},{ball[5]},{ball[6]}\n')
    f.close()
def turn_page():
    url = "http://kaijiang.500.com/ssq.shtml"
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'html.parser')
    pageList = soup.select("div.iSelectList a")
    # 获取最近30期的页码
    recent_pages = pageList[:30]
    for p in recent_pages:
        url = p['href']
        page = p.string
        download(url, page)
def main():
    if os.path.exists('双色球开奖结果.csv'):
        os.remove('双色球开奖结果.csv')
    turn_page()
if __name__ == '__main__':
    main()

【Python爬虫】利用爬虫抓取双色球开奖号码,获取完整数据,简洁45行代码实现,更新时间2023-06-28,第2张 【Python爬虫】利用爬虫抓取双色球开奖号码,获取完整数据,简洁45行代码实现,更新时间2023-06-28,第3张

生成的表格打开验证数据正确性

非常好,下面写出神经网络进行预测

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
# 读取数据
data = pd.read_csv('双色球开奖结果.csv')
# 提取特征和标签
features = data.iloc[:, 1:7]  # 红色球特征
labels = data.iloc[:, 1:7]  # 红色球标签
# 创建随机森林回归模型
model = RandomForestRegressor(n_estimators=100, random_state=1)
# 拟合模型
model.fit(features, labels)
# 预测下一期的红色球号码
next_features = model.predict(features.iloc[-1].values.reshape(1, -1))
next_features = np.round(next_features).astype(int)
# 打印预测的红色球号码
print("预测的红色球号码:", next_features)

 这个预测方式好像有点简单了,弄一个多层感知机进行预测

import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
# 读取数据
data = pd.read_csv('双色球开奖结果.csv')
# 提取特征和标签
features = data.iloc[:, 1:7]  # 红色球特征
labels = data.iloc[:, 1:7]  # 红色球标签
# 创建多层感知机回归模型
model = MLPRegressor(hidden_layer_sizes=(100,), random_state=1)
# 拟合模型
model.fit(features, labels)
# 预测下一期的红色球号码
next_features = model.predict(features.iloc[[-1]])
next_features = np.round(next_features).astype(int)
# 打印预测的红色球号码
print("预测的红色球号码:", next_features)

这两个代码预测的号码不一样,可能是因为使用的模型不一样导致的。随便玩玩吧。

再做一个模拟双色球游戏,如果选择的号码中了,会显示绿色。

【Python爬虫】利用爬虫抓取双色球开奖号码,获取完整数据,简洁45行代码实现,更新时间2023-06-28,第4张

【Python爬虫】利用爬虫抓取双色球开奖号码,获取完整数据,简洁45行代码实现,更新时间2023-06-28,第5张

import tkinter as tk
import random
class DoubleColorBallGame:
    def __init__(self, master):
        self.master = master
        self.master.title("双色球游戏")
        self.master.geometry("600x400")
        self.red_balls = []
        self.blue_balls = []
        self.selected_red_balls = []
        self.selected_blue_ball = None
        self.create_red_ball_buttons()
        self.create_blue_ball_buttons()
        self.create_draw_button()
        self.create_clear_button()
        self.create_result_label()
    def create_red_ball_buttons(self):
        red_frame = tk.Frame(self.master)
        red_frame.pack(pady=10)
        for i in range(1, 34):
            button = tk.Button(red_frame, text=str(i), width=2, command=lambda i=i: self.select_red_ball(i))
            button.grid(row=(i - 1) // 11, column=(i - 1) % 11, padx=2, pady=2)
            self.red_balls.append(button)
    def create_blue_ball_buttons(self):
        blue_frame = tk.Frame(self.master)
        blue_frame.pack(pady=10)
        for i in range(1, 17):
            button = tk.Button(blue_frame, text=str(i), width=2, command=lambda i=i: self.select_blue_ball(i))
            button.grid(row=0, column=i - 1, padx=2, pady=2)
            self.blue_balls.append(button)
    def create_draw_button(self):
        draw_frame = tk.Frame(self.master)
        draw_frame.pack(pady=10)
        draw_button = tk.Button(draw_frame, text="开奖", command=self.generate_draw_result)
        draw_button.pack()
    def create_clear_button(self):
        clear_frame = tk.Frame(self.master)
        clear_frame.pack(pady=10)
        clear_button = tk.Button(clear_frame, text="清除", command=self.clear_selection)
        clear_button.pack()
    def create_result_label(self):
        self.result_label = tk.Label(self.master, text="")
        self.result_label.pack(pady=20)
    def select_red_ball(self, number):
        if number in self.selected_red_balls:
            self.selected_red_balls.remove(number)
            self.red_balls[number - 1].config(bg="SystemButtonFace")
        else:
            self.selected_red_balls.append(number)
            self.red_balls[number - 1].config(bg="red")
    def select_blue_ball(self, number):
        if self.selected_blue_ball == number:
            self.selected_blue_ball = None
            self.blue_balls[number - 1].config(bg="SystemButtonFace")
        else:
            self.selected_blue_ball = number
            self.blue_balls[number - 1].config(bg="blue")
    def generate_draw_result(self):
        draw_result = []
        while len(draw_result) < 6:
            number = random.randint(1, 33)
            if number not in draw_result:
                draw_result.append(number)
        draw_result.sort()
        draw_result.append(random.randint(1, 16))
        self.result_label.config(
            text="红球:" + " ".join(str(ball) for ball in draw_result[:6]) + "\n蓝球:" + str(draw_result[6]))
        # 清除之前的标记
        for red_ball in self.red_balls:
            red_ball.config(bg="SystemButtonFace")
        for blue_ball in self.blue_balls:
            blue_ball.config(bg="SystemButtonFace")
        # 框选选择的号码和开奖号码
        for number in self.selected_red_balls:
            if number in draw_result[:6]:
                self.red_balls[number - 1].config(bg="green")
        if self.selected_blue_ball is not None:
            if self.selected_blue_ball == draw_result[6]:
                self.blue_balls[self.selected_blue_ball - 1].config(bg="green")
    def clear_selection(self):
        self.selected_red_balls = []
        self.selected_blue_ball = None
        for red_ball in self.red_balls:
            red_ball.config(bg="SystemButtonFace")
        for blue_ball in self.blue_balls:
            blue_ball.config(bg="SystemButtonFace")
        self.result_label.config(text="")
if __name__ == "__main__":
    root = tk.Tk()
    game = DoubleColorBallGame(root)
    root.mainloop()
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
# 读取数据
data = pd.read_csv('双色球开奖结果.csv')
# 提取号码数据,转换成PyTorch张量
numbers = data.iloc[:, 1:8].values.astype(float)
numbers = torch.tensor(numbers, dtype=torch.float32)
# 归一化数据
numbers /= 33.0  # 因为双色球号码范围是1-33
# 构建训练集和测试集
train_data = numbers[:-1, :6]  # 使用前6期红球号码作为输入特征
train_red_balls = numbers[1:, :6]  # 使用第2到第7期红球号码作为训练的目标值
train_blue_balls = numbers[1:, 6]  # 使用第2到第7期蓝球号码作为训练的目标值
# 定义预测神经元模型
class PredictNeuronNet(nn.Module):
    def __init__(self, input_dim):
        super(PredictNeuronNet, self).__init__()
        self.predict_red_balls = nn.Linear(input_dim, 6)  # 输入维度为input_dim,输出维度为6,用于预测红球号码
        self.predict_blue_ball = nn.Linear(input_dim, 1)  # 输入维度为input_dim,输出维度为1,用于预测蓝球号码
    def forward(self, x):
        red_balls = self.predict_red_balls(x)
        blue_ball = self.predict_blue_ball(x)
        return red_balls, blue_ball
# 训练函数
def train_predict_neuron_model(model, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        red_balls, blue_ball = model(train_data)
        loss_red = criterion(red_balls, train_red_balls)  # 红球号码的损失
        loss_blue = criterion(blue_ball.view(-1), train_blue_balls)  # 蓝球号码的损失
        loss = loss_red + loss_blue
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
# 主函数
if __name__ == '__main__':
    input_dim = 6  # 输入特征维度为6,即前6期红球号码
    model = PredictNeuronNet(input_dim)
    # 定义损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    # 训练模型
    num_epochs = 1000
    train_predict_neuron_model(model, criterion, optimizer, num_epochs)
    # 使用模型进行预测
    test_data = numbers[-1, :6]  # 使用最后6期红球号码作为测试数据的输入
    with torch.no_grad():
        test_data = test_data.view(1, -1)  # 将测试数据转换成(1, 6)的形状
        red_balls, blue_ball = model(test_data)
    # 将预测结果转换回原始范围
    red_balls *= 33.0
    blue_ball *= 33.0
    print("Predicted numbers for the next period (Red Balls):")
    print(red_balls)
    print("Predicted number for the next period (Blue Ball):")
    print(blue_ball)