前言
ThreadPoolExecutor是Python对Thread做的进一步的封装,它使得多线程开发变得更简单。另外,它可以让多线程和多进程编码接口一致
一、ThreadPoolExecutor线程池使用示例:
from concurrent.futures import ThreadPoolExecutor,as_completed,wait,FIRST_COMPLETED
#线程池,为什么要用线程池
#例如要在主线程中获取一个某一个线程的状态或者某一个任务的状态以及返回值
#当一个线程完成的时候主线程能立即知道
#futures可以让多线程和多进程编码接口一致
import time
def get_html(times):
time.sleep(times)
print("get page {} success".format(times))
return times
executor = ThreadPoolExecutor(max_workers=2)#线程池中可同时运行的线程数量
#通过submit函数提交执行的函数到线程池中,submit是立即返回
# task1 = executor.submit(get_html, (3))
# task2 = executor.submit(get_html, (2))
#要获取已经成功的task的返回
urls = [3,2,4]
all_task = [executor.submit(get_html, (url)) for url in urls]
wait(all_task, return_when=FIRST_COMPLETED)
print("all tasks have finished.")
#通过as_completed方法获取已完成的task的值
for future in as_completed(all_task):
data = future.result()
print("get {} page".format(data))
#通过executor的map获取已经完成的task的值
# for data in executor.map(get_html, urls):
# print("get {} page".format(data))
# #done方法用于判定某个任务是否完成
# print(task1.done())
# #cancel方法可以取消线程池中未开始执行的线程
# print(task2.cancel())
# time.sleep(3)
# print(task1.done())
#
# #result方法可以获取task的执行结果(如函数的返回值)
# print(task1.result())

1.submit()方法
在上面的示例程序中,executor = ThreadPoolExecutor(max_workers=2) 表示创建一个线程池,而它的管理者就是实例对象executor,executor有一个submit()方法,它会将线程提交给线程池,与原本直接使用Thread类的多线程编程不同,线程池中的线程提交后,若线程数量不超过max_workers,就会启动新提交的线程,而使用Thread类需要调用start()方法去启动
submit()和_adjust_thread_count()方法的源码如下:
def submit(self, fn, *args, **kwargs):
with self._shutdown_lock:
if self._shutdown:
raise RuntimeError('cannot schedule new futures after shutdown')
f = _base.Future()
w = _WorkItem(f, fn, args, kwargs) #创建一个workitem实例
self._work_queue.put(w)
self._adjust_thread_count()#调用_adjust_thread_count()方法调整线程数量
return f #返回future对象
submit.__doc__ = _base.Executor.submit.__doc__
def _adjust_thread_count(self):
def weakref_cb(_, q=self._work_queue):
q.put(None)
num_threads = len(self._threads)
if num_threads < self._max_workers:
"""若当前线程数量小于线程池可容纳数量,则创建并启动一个启动新线程"""
thread_name = '%s_%d' % (self._thread_name_prefix or self,
num_threads)
t = threading.Thread(name=thread_name, target=_worker, #执行的是_worker()方法
args=(weakref.ref(self, weakref_cb),
self._work_queue)) #从_work_queue中获取work_item
t.daemon = True
t.start()
self._threads.add(t)
_threads_queues[t] = self._work_queue
shutdown的wait参数为True时会将主线程阻塞,本质上是调用了每个子线程的join()方法,源码如下:
# shutdown源码
class ThreadPoolExecutor(_base.Executor):
...
def shutdown(self, wait=True):
...
if wait:
for t in self._threads: # self._threads是用来存放子线程的集合
t.join() # 调用每个子线程的join方法
2. _WorkItem类
源码如下:
class _WorkItem(object):
def __init__(self, future, fn, args, kwargs):
self.future = future #future对象:被称为未来对象或是task容器
self.fn = fn #提交的线程函数名
self.args = args #提交的线程函数参数
self.kwargs = kwargs
def run(self):
if not self.future.set_running_or_notify_cancel():
return
try:
result = self.fn(*self.args, **self.kwargs) #将函数执行结果返回给result
except BaseException as exc:
self.future.set_exception(exc)
self = None
else:
self.future.set_result(result) #将执行结果设置到到future对象中
def _worker(executor_reference, work_queue):
try:
while True:
work_item = work_queue.get(block=True)
if work_item is not None:
work_item.run() #执行线程函数
del work_item
continue
executor = executor_reference()
work_queue.put(None)
return
del executor
except BaseException:
_base.LOGGER.critical('Exception in worker', exc_info=True)
3. Future类
submit()方法会返回Future对象,Future类里的几个主要方法是:
(1)cancel()方法:当线程状态不是正在运行或运行成功时,可将future对象取消
源码如下:
def cancel(self):
with self._condition:
if self._state in [RUNNING, FINISHED]:
return False
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
return True
self._state = CANCELLED
self._condition.notify_all() #将通过该condition阻塞的线程都唤醒
self._invoke_callbacks()
return True
(2) add_done_back()方法:接收一个函数名,当线程执行完后调用传入函数
源码如下:
def add_done_callback(self, fn):
"""Attaches a callable that will be called when the future finishes."""
with self._condition:
if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]:
self._done_callbacks.append(fn)
return
fn(self)
(3) result()方法:获取线程执行的结果
源码如下:
def result(self, timeout=None):
"""Return the result of the call that the future represents."""
with self._condition:
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self.__get_result()
self._condition.wait(timeout) #阻塞,等待返回结果
if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
raise CancelledError()
elif self._state == FINISHED:
return self.__get_result()
else:
raise TimeoutError()
此外,Future还提供了获取线程状态的几个方法:
cancelled()方法:当线程状态是CANCELLED或者CANCELLED_AND_NOTIFIED,返回True;running()方法:当线程状态是RUNNING,返回True;done()方法:当线程状态是CANCELLED,CANCELLED_AND_NOTIFIED或者FINISHED,返回True。
本文介绍了Python的ThreadPoolExecutor线程池,它是对多线程的封装,简化了编程。主要内容包括submit()方法的使用,线程池的管理,_WorkItem类,以及Future类的cancel()、add_done_callback()和result()等方法。
1892

被折叠的 条评论
为什么被折叠?



