Heim >Backend-Entwicklung >Python-Tutorial >Verifizierungscode für die Python-Identifikation

Verifizierungscode für die Python-Identifikation

php中世界最好的语言Original: 2018-04-09 10:42:052589Durchsuche

Dieses Mal bringe ich Ihnen den Python-Erkennungs-Verifizierungscode. Was sind die Vorsichtsmaßnahmen für den Python-Erkennungs-Verifizierungscode? Das Folgende ist ein praktischer Fall, werfen wir einen Blick darauf.

Zusätzlich zur Verwendung des herkömmlichen PIL-Pakets zur Bildverarbeitung und der anschließenden Verwendung von Pytessert + OCR zur Identifizierung von Unfällen können Sie auch Tessorflow-Training zur Identifizierung von Verifizierungscodes verwenden.

Der größte Teil des Codes in diesem Artikel ist mit nur wenigen Änderungen nachgedruckt.

Der Code läuft in einer Linux-Umgebung und Tessorflow unterstützt Python 2.7 für Windows nicht.

gen_captcha.py-Code.

#coding=utf-8
from captcha.image import ImageCaptcha # pip install captcha
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import random
# 验证码中的字符, 就不用汉字了
number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
      'v', 'w', 'x', 'y', 'z']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
      'V', 'W', 'X', 'Y', 'Z']
'''
number=['0','1','2','3','4','5','6','7','8','9']
alphabet =[]
ALPHABET =[]
'''
# 验证码一般都无视大小写；验证码长度4个字符
def random_captcha_text(char_set=number + alphabet + ALPHABET, captcha_size=4):
  captcha_text = []
  for i in range(captcha_size):
    c = random.choice(char_set)
    captcha_text.append(c)
  return captcha_text
# 生成字符对应的验证码
def gen_captcha_text_and_image():
  while(1):
    image = ImageCaptcha()
    captcha_text = random_captcha_text()
    captcha_text = ''.join(captcha_text)
    captcha = image.generate(captcha_text)
    #image.write(captcha_text, captcha_text + '.jpg') # 写到文件
    captcha_image = Image.open(captcha)
    #captcha_image.show()
    captcha_image = np.array(captcha_image)
    if captcha_image.shape==(60,160,3):
      break
  return captcha_text, captcha_image
if name == 'main':
  # 测试
  text, image = gen_captcha_text_and_image()
  print image
  gray = np.mean(image, -1)
  print gray
  print image.shape
  print gray.shape
  f = plt.figure()
  ax = f.add_subplot(111)
  ax.text(0.1, 0.9, text, ha='center', va='center', transform=ax.transAxes)
  plt.imshow(image)
  plt.show()

train.py-Code.

#coding=utf-8
from gen_captcha import gen_captcha_text_and_image
from gen_captcha import number
from gen_captcha import alphabet
from gen_captcha import ALPHABET
import numpy as np
import tensorflow as tf
"""
text, image = gen_captcha_text_and_image()
print "验证码图像channel:", image.shape # (60, 160, 3)
# 图像大小
IMAGE_HEIGHT = 60
IMAGE_WIDTH = 160
MAX_CAPTCHA = len(text)
print  "验证码文本最长字符数", MAX_CAPTCHA # 验证码最长4字符; 我全部固定为4,可以不固定. 如果验证码长度小于4，用'_'补齐
"""
IMAGE_HEIGHT = 60
IMAGE_WIDTH = 160
MAX_CAPTCHA = 4
# 把彩色图像转为灰度图像（色彩对识别验证码没有什么用）
def convert2gray(img):
  if len(img.shape) > 2:
    gray = np.mean(img, -1)
    # 上面的转法较快，正规转法如下
    # r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]
    # gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return gray
  else:
    return img
"""
cnn在图像大小是2的倍数时性能最高, 如果你用的图像大小不是2的倍数，可以在图像边缘补无用像素。
np.pad(image,((2,3),(2,2)), 'constant', constant_values=(255,)) # 在图像上补2行，下补3行，左补2行，右补2行
"""
# 文本转向量
char_set = number + alphabet + ALPHABET + ['_'] # 如果验证码长度小于4, '_'用来补齐
CHAR_SET_LEN = len(char_set)
def text2vec(text):
  text_len = len(text)
  if text_len > MAX_CAPTCHA:
    raise ValueError('验证码最长4个字符')
  vector = np.zeros(MAX_CAPTCHA * CHAR_SET_LEN)
  def char2pos(c):
    if c == '_':
      k = 62
      return k
    k = ord(c) - 48
    if k > 9:
      k = ord(c) - 55
      if k > 35:
        k = ord(c) - 61
        if k > 61:
          raise ValueError('No Map')
    return k
  for i, c in enumerate(text):
    #print text
    idx = i * CHAR_SET_LEN + char2pos(c)
    #print i,CHAR_SET_LEN,char2pos(c),idx
    vector[idx] = 1
  return vector
#print text2vec('1aZ_')
# 向量转回文本
def vec2text(vec):
  char_pos = vec.nonzero()[0]
  text = []
  for i, c in enumerate(char_pos):
    char_at_pos = i # c/63
    char_idx = c % CHAR_SET_LEN
    if char_idx < 10:
      char_code = char_idx + ord(&#39;0&#39;)
    elif char_idx < 36:
      char_code = char_idx - 10 + ord(&#39;A&#39;)
    elif char_idx < 62:
      char_code = char_idx - 36 + ord(&#39;a&#39;)
    elif char_idx == 62:
      char_code = ord(&#39;_&#39;)
    else:
      raise ValueError(&#39;error&#39;)
    text.append(chr(char_code))
  return "".join(text)
"""
#向量（大小MAX_CAPTCHA*CHAR_SET_LEN）用0,1编码 每63个编码一个字符，这样顺利有，字符也有
vec = text2vec("F5Sd")
text = vec2text(vec)
print(text) # F5Sd
vec = text2vec("SFd5")
text = vec2text(vec)
print(text) # SFd5
"""
# 生成一个训练batch
def get_next_batch(batch_size=128):
  batch_x = np.zeros([batch_size, IMAGE_HEIGHT * IMAGE_WIDTH])
  batch_y = np.zeros([batch_size, MAX_CAPTCHA * CHAR_SET_LEN])
  # 有时生成图像大小不是(60, 160, 3)
  def wrap_gen_captcha_text_and_image():
    while True:
      text, image = gen_captcha_text_and_image()
      if image.shape == (60, 160, 3):
        return text, image
  for i in range(batch_size):
    text, image = wrap_gen_captcha_text_and_image()
    image = convert2gray(image)
    batch_x[i, :] = image.flatten() / 255 # (image.flatten()-128)/128 mean为0
    batch_y[i, :] = text2vec(text)
  return batch_x, batch_y
####################################################################
X = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT * IMAGE_WIDTH])
Y = tf.placeholder(tf.float32, [None, MAX_CAPTCHA * CHAR_SET_LEN])
keep_prob = tf.placeholder(tf.float32) # dropout
# 定义CNN
def crack_captcha_cnn(w_alpha=0.01, b_alpha=0.1):
  x = tf.reshape(X, shape=[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
  # w_c1_alpha = np.sqrt(2.0/(IMAGE_HEIGHT*IMAGE_WIDTH)) #
  # w_c2_alpha = np.sqrt(2.0/(3*3*32))
  # w_c3_alpha = np.sqrt(2.0/(3*3*64))
  # w_d1_alpha = np.sqrt(2.0/(8*32*64))
  # out_alpha = np.sqrt(2.0/1024)
  # 3 conv layer
  w_c1 = tf.Variable(w_alpha * tf.random_normal([3, 3, 1, 32]))
  b_c1 = tf.Variable(b_alpha * tf.random_normal([32]))
  conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding=&#39;SAME&#39;), b_c1))
  conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=&#39;SAME&#39;)
  conv1 = tf.nn.dropout(conv1, keep_prob)
  w_c2 = tf.Variable(w_alpha * tf.random_normal([3, 3, 32, 64]))
  b_c2 = tf.Variable(b_alpha * tf.random_normal([64]))
  conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding=&#39;SAME&#39;), b_c2))
  conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=&#39;SAME&#39;)
  conv2 = tf.nn.dropout(conv2, keep_prob)
  w_c3 = tf.Variable(w_alpha * tf.random_normal([3, 3, 64, 64]))
  b_c3 = tf.Variable(b_alpha * tf.random_normal([64]))
  conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding=&#39;SAME&#39;), b_c3))
  conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding=&#39;SAME&#39;)
  conv3 = tf.nn.dropout(conv3, keep_prob)
  # Fully connected layer
  w_d = tf.Variable(w_alpha * tf.random_normal([8 * 32 * 40, 1024]))
  b_d = tf.Variable(b_alpha * tf.random_normal([1024]))
  dense = tf.reshape(conv3, [-1, w_d.get_shape().as_list()[0]])
  dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
  dense = tf.nn.dropout(dense, keep_prob)
  w_out = tf.Variable(w_alpha * tf.random_normal([1024, MAX_CAPTCHA * CHAR_SET_LEN]))
  b_out = tf.Variable(b_alpha * tf.random_normal([MAX_CAPTCHA * CHAR_SET_LEN]))
  out = tf.add(tf.matmul(dense, w_out), b_out)
  # out = tf.nn.softmax(out)
  return out
# 训练
def train_crack_captcha_cnn():
  import time
  start_time=time.time()
  output = crack_captcha_cnn()
  # loss
  #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, Y))
  loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
  # 最后一层用来分类的softmax和sigmoid有什么不同？
  # optimizer 为了加快训练 learning_rate应该开始大，然后慢慢衰
  optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
  predict = tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])
  max_idx_p = tf.argmax(predict, 2)
  max_idx_l = tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
  correct_pred = tf.equal(max_idx_p, max_idx_l)
  accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  saver = tf.train.Saver()
  with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    step = 0
    while True:
      batch_x, batch_y = get_next_batch(64)
      _, loss_ = sess.run([optimizer, loss], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
      print time.strftime(&#39;%Y-%m-%d %H:%M:%S&#39;,time.localtime(time.time())),step, loss_
      # 每100 step计算一次准确率
      if step % 100 == 0:
        batch_x_test, batch_y_test = get_next_batch(100)
        acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
        print u&#39;***************************************************************第%s次的准确率为%s&#39;%(step, acc)
        # 如果准确率大于50%,保存模型,完成训练
        if acc > 0.9:         ##我这里设了0.9，设得越大训练要花的时间越长，如果设得过于接近1，很难达到。如果使用cpu，花的时间很长，cpu占用很高电脑发烫。
          saver.save(sess, "crack_capcha.model", global_step=step)
          print time.time()-start_time
          break
      step += 1
train_crack_captcha_cnn()

Testcode:

output = crack_captcha_cnn()
saver = tf.train.Saver()
sess = tf.Session()
saver.restore(sess, tf.train.latest_checkpoint('.'))
while(1):
  
  text, image = gen_captcha_text_and_image()
  image = convert2gray(image)
  image = image.flatten() / 255
  predict = tf.argmax(tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)
  text_list = sess.run(predict, feed_dict={X: [image], keep_prob: 1})
  predict_text = text_list[0].tolist()
  vector = np.zeros(MAX_CAPTCHA * CHAR_SET_LEN)
  i = 0
  for t in predict_text:
    vector[i * 63 + t] = 1
    i += 1
    # break
  print("正确: {} 预测: {}".format(text, vec2text(vector)))

Wenn Sie die Codewirkung schnell testen möchten, legen Sie nicht zu viele Zeichen im Bestätigungscode fest, z. B. 0123 reicht aus.

Ich glaube, dass Sie die Methode beherrschen, nachdem Sie den Fall in diesem Artikel gelesen haben. Weitere spannende Informationen finden Sie in anderen verwandten Artikeln auf der chinesischen PHP-Website!

Empfohlene Lektüre:

So implementieren Sie die Mahalanobis-Distanz in Python

So stapeln Sie TXT-Dateien in das DataFrame-Format ein Python

Das obige ist der detaillierte Inhalt vonVerifizierungscode für die Python-Identifikation. Für weitere Informationen folgen Sie bitte anderen verwandten Artikeln auf der PHP chinesischen Website!

Stellungnahme：

Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn

Vorheriger Artikel：So schreiben Sie Variablen Zeile für Zeile in Python in das TXT-FormatNächster Artikel：So schreiben Sie Variablen Zeile für Zeile in Python in das TXT-Format

In Verbindung stehende Artikel

Mehr sehen