# 统计学习方法-逻辑斯蒂回归笔记

Logistic回归本质还是一个基于条件概率的判别模型，但其有利用了Sigma函数将任意的输入映射到[0,1]区间内

Logistic回归的优缺点，也有人整理了下 逻辑回归算法的优缺点，其中有个缺点比较明显：特征空间很大或者处理大量多类特征或变量时效果不是很好

class logisticRegression:
# Sigmoid函数
def sigmoid(self, z):
return 1/(1+np.exp(-z))
# 在对数似然函数导数前面加了负号，因此用梯度下降法，运用矩阵计算，迭代1000次，求解w
def gradient_descent(self, X, y, item = 1000, alpha = 0.001):
w = np.ones((X.shape[1],1))
for i in range(item):
h = self.sigmoid(np.dot(X, w))
error = h - y
w -= alpha * np.dot(X.T, error)
return w
# 计算wx，通过公式6.3确定分类值
def predict(self, w, test):
p1 = self.sigmoid(np.dot(test, w))
if p1 >= 0.5:
return 1
else:
return 0

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
dataset = np.mat(dataset)
X = dataset[:,:-1]
y = dataset[:,-1]
X = np.insert(X, X.shape[1], values = 1, axis = 1)

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 123456)

LR = logisticRegression()
w = LR.gradient_descent(train_X, train_y, alpha = 0.001, item = 1000)
error = 0
for i in range(len(test_X)):
predict_y = LR.predict(w, test_X[i,:])
if predict_y != test_y[i]:
error += 1
print(error / len(test_y) * 100)

Logistic Regression 的前世今生（理论篇）