others - python 如何使用 PyQt5 QWebEngineView对html进行"渲染"处理

如何使用PyQt5 v5.6 QWebEngineView"渲染"HTML?


def render(source_html):


"""Fully render HTML, JavaScript and all."""



 import sys


 from PyQt5.QtWidgets import QApplication


 from PyQt5.QtWebKitWidgets import QWebPage



 class Render(QWebPage):


 def __init__(self, html):


 self.html = None


 self.app = QApplication(sys.argv)


 QWebPage.__init__(self)


 self.loadFinished.connect(self._loadFinished)


 self.mainFrame().setHtml(html)


 self.app.exec_()



 def _loadFinished(self, result):


 self.html = self.mainFrame().toHtml()


 self.app.quit()



 return Render(source_html).html



import requests


sample_html = requests.get(dummy_url).text


print(render(sample_html))



下面是我尝试使用QWebEngineView的尝试,首先,在Ubuntu上安装和设置pyqt5 v5.6


# install PyQt5 v5.6 wheel from PyPI


pip3 install --user pyqt5



# link missing resources


ln -s ../resources/icudtl.dat ../resources/qtwebengine_resources.pak ../resources/qtwebengine_resources_100p.pak ../resources/qtwebengine_resources_200p.pak ../translations/qtwebengine_locales ~/.local/lib/python3.5/site-packages/PyQt5/Qt/libexec/



现在,python出现以下错误:


def render(source_html):


"""Fully render HTML, JavaScript and all."""



 import sys


 from PyQt5.QtWidgets import QApplication


 from PyQt5.QtWebEngineWidgets import QWebEngineView



 class Render(QWebEngineView):


 def __init__(self, html):


 self.html = None


 self.app = QApplication(sys.argv)


 QWebEngineView.__init__(self)


 self.loadFinished.connect(self._loadFinished)


 self.setHtml(html)


 self.app.exec_()



 def _loadFinished(self, result):


 # what's going on here? how can I get the HTML from toHtml?


 self.page().toHtml(self.callable)


 self.app.quit()



 def callable(self, data):


 self.html = data



 return Render(source_html).html



import requests


sample_html = requests.get(dummy_url).text


print(render(sample_html))



我怎样才能得到HTML?

时间:

新的QWebEngine接口考虑到底层Chromium引擎是异步的,因此,必须将异步API转换为同步API。

看起来是这样的:


def render(source_html):


"""Fully render HTML, JavaScript and all."""



 import sys


 from PyQt5.QtCore import QEventLoop


 from PyQt5.QtWidgets import QApplication


 from PyQt5.QtWebEngineWidgets import QWebEngineView



 class Render(QWebEngineView):


 def __init__(self, html):


 self.html = None


 self.app = QApplication(sys.argv)


 QWebEngineView.__init__(self)


 self.loadFinished.connect(self._loadFinished)


 self.setHtml(html)


 while self.html is None:


 self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents)


 self.app.quit()



 def _callable(self, data):


 self.html = data



 def _loadFinished(self, result):


 self.page().toHtml(self._callable)



 return Render(source_html).html



import requests


sample_html = requests.get(dummy_url).text


print(render(sample_html))



正如你所指出的,Qt5.4依赖于异步调用,不需要使用循环,你唯一的错误是在toHtml调用完成之前调用quit。


def render(source_html):


"""Fully render HTML, JavaScript and all."""



 import sys


 from PyQt5.QtWidgets import QApplication


 from PyQt5.QtWebEngineWidgets import QWebEngineView



 class Render(QWebEngineView):


 def __init__(self, html):


 self.html = None


 self.app = QApplication(sys.argv)


 QWebEngineView.__init__(self)


 self.loadFinished.connect(self._loadFinished)


 self.setHtml(html)


 self.app.exec_()



 def _loadFinished(self, result):


 # This is an async call, you need to wait for this


 # to be called before closing the app


 self.page().toHtml(self.callable)



 def callable(self, data):


 self.html = data


 # Data has been stored, it's safe to quit the app


 self.app.quit()



 return Render(source_html).html



import requests


sample_html = requests.get(dummy_url).text


print(render(sample_html))



稍微修改一下:


def render(url):


"""Fully render HTML, JavaScript and all."""



 import sys


 from PyQt5.QtCore import QEventLoop,QUrl


 from PyQt5.QtWidgets import QApplication


 from PyQt5.QtWebEngineWidgets import QWebEngineView



 class Render(QWebEngineView):


 def __init__(self, url):


 self.html = None


 self.app = QApplication(sys.argv)


 QWebEngineView.__init__(self)


 self.loadFinished.connect(self._loadFinished)


 self.load(QUrl(url))


 while self.html is None:


 self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents)


 self.app.quit()



 def _callable(self, data):


 self.html = data



 def _loadFinished(self, result):


 self.page().toHtml(self._callable)



 return Render(url).html



print(render(dummy_url))



...