- cocos2dx 2.x
- 遍历UI树时,在节点的draw方法中执行绘制指令
- cocos2dx 3.x
- 遍历UI树时,在节点的draw方法中生成绘制指令,比加入到Renderer管理的绘制队列中。
改进的好处:
- 1、绘制逻辑从UI树中剥离
- 2、应用程序级的视口裁剪
- OpenGL ES会在图元装配阶段进行图元丢弃或裁剪,但依然执行了绘制命令。而改进之后的逻辑则从应用程序就进行了裁剪工作,减少了对GPU的占用。
- 3、自动批绘制auto batching
- 如果“相邻”元素使用相同的纹理,相同的着色器等,我们可以只调用一次绘制命令。减少绘制次数(draw calls)提高性能。
- 4、更好的自定义绘制(扩展性)
整个绘制模块的结构图如下:
点击查看【processon】
整个绘制流程可以总结为3个阶段:
- 生成绘制命令
- 在UI树遍历时在每个元素中生成其绘制命令,并插入到当前的RendeQueue中。
- 绘制命令排序
- 对绘制栈中RenderQueue的元素按globalZOrder的升序排序。
- 首先,排序GLOBALZ_NEG(globalZOrder<0)的RenderQueue。
- 然后,排序GLOBALZ_ZERO(globalZOrder<0)的RenderQueue。
- 最后,排序GLOBALZ_POS(globalZOrder>0)的RenderQueue。
- 执行绘制命令
- 管理绘制栈
- 由RenderQueue组成,新加入的RenderCommand总是插入到栈顶的RenderQueue中。index=0的RenderQueue为主Queue,在Renderer构造时便创建,常驻于内存,每次渲染都是从这里开始。
- 执行绘制命令
- 每条RenderCommand的最终执行都在这里完成。
- auto batching也是在这里完成的。
数据结构
点击查看【processon】遍历指令
```cpp
//——————————————Renderer void Renderer::render(){ _isRendering = true; if (_glViewAssigned){
for (auto &renderqueue : _renderGroups)renderqueue.sort(); // 见下面RenderQueue解释// 从index=0的主RenderQueue开始遍历,如果遇到GroupCommand,则递归遍历// GroupCommand执行的RenderQueue。visitRenderQueue(_renderGroups[0]);}......
}
void Renderer::visitRenderQueue(RenderQueue& queue) { // * // 首先绘制globalZOrder < 0的RenderCommand // ** const auto& zNegQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_NEG); if (zNegQueue.size() > 0){ for (const auto& zNegNext : zNegQueue){ processRenderCommand(zNegNext); //这里就是真正开始执行OpenGL指令了。 } }
// *********************************************// *** 绘制globalZOrder = 0的RenderCommand// *********************************************const auto& zZeroQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_ZERO);if (zZeroQueue.size() > 0){for (const auto& zZeroNext : zZeroQueue){processRenderCommand(zZeroNext);}}// *********************************************// *** 绘制globalZOrder > 0的RenderCommand// *********************************************const auto& zPosQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_POS);if (zPosQueue.size() > 0){for (const auto& zPosNext : zPosQueue){processRenderCommand(zPosNext);}}
}
<a name="X43Q1"></a>## 执行指令```cppvoid Renderer::processRenderCommand(RenderCommand* command){auto commandType = command->getType();if( RenderCommand::Type::TRIANGLES_COMMAND == commandType){// flush other queuesflush3D();auto cmd = static_cast<TrianglesCommand*>(command);// flush own queue when buffer is fullif(_filledVertex + cmd->getVertexCount() > VBO_SIZE || _filledIndex + cmd->getIndexCount() > INDEX_VBO_SIZE){CCASSERT(cmd->getVertexCount()>= 0 && cmd->getVertexCount() < VBO_SIZE, "VBO for vertex is not big enough, please break the data down or use customized render command");CCASSERT(cmd->getIndexCount()>= 0 && cmd->getIndexCount() < INDEX_VBO_SIZE, "VBO for index is not big enough, please break the data down or use customized render command");drawBatchedTriangles();}// queue it_queuedTriangleCommands.push_back(cmd);_filledIndex += cmd->getIndexCount();_filledVertex += cmd->getVertexCount();}else if (RenderCommand::Type::MESH_COMMAND == commandType){flush2D();auto cmd = static_cast<MeshCommand*>(command);if (cmd->isSkipBatching() || _lastBatchedMeshCommand == nullptr || _lastBatchedMeshCommand->getMaterialID() != cmd->getMaterialID()){flush3D();CCGL_DEBUG_INSERT_EVENT_MARKER("RENDERER_MESH_COMMAND");if(cmd->isSkipBatching()){// XXX: execute() will call bind() and unbind()// but unbind() shouldn't be call if the next command is a MESH_COMMAND with Material.// Once most of cocos2d-x moves to Pass/StateBlock, only bind() should be used.cmd->execute();}else{cmd->preBatchDraw();cmd->batchDraw();_lastBatchedMeshCommand = cmd;}}else{CCGL_DEBUG_INSERT_EVENT_MARKER("RENDERER_MESH_COMMAND");cmd->batchDraw();}}else if(RenderCommand::Type::GROUP_COMMAND == commandType){flush();int renderQueueID = ((GroupCommand*) command)->getRenderQueueID();CCGL_DEBUG_PUSH_GROUP_MARKER("RENDERER_GROUP_COMMAND");visitRenderQueue(_renderGroups[renderQueueID]);CCGL_DEBUG_POP_GROUP_MARKER();}else if(RenderCommand::Type::CUSTOM_COMMAND == commandType){flush();auto cmd = static_cast<CustomCommand*>(command);CCGL_DEBUG_INSERT_EVENT_MARKER("RENDERER_CUSTOM_COMMAND");cmd->execute();}else if(RenderCommand::Type::BATCH_COMMAND == commandType){flush();auto cmd = static_cast<BatchCommand*>(command);CCGL_DEBUG_INSERT_EVENT_MARKER("RENDERER_BATCH_COMMAND");cmd->execute();}else if(RenderCommand::Type::PRIMITIVE_COMMAND == commandType){flush();auto cmd = static_cast<PrimitiveCommand*>(command);CCGL_DEBUG_INSERT_EVENT_MARKER("RENDERER_PRIMITIVE_COMMAND");cmd->execute();}else{CCLOGERROR("Unknown commands in renderQueue");}}
RenderQueue重复利用
当renderer绘制栈完成一次绘制,注意,不会清空绘制栈,而是清空绘制栈中所有RenderQueue的元素,这样可以减少内存分配操作。
// ************************************************// ********** Renderer.cpp// ************************************************void Renderer::render(){_isRendering = true;if (_glViewAssigned){for (auto &renderqueue : _renderGroups){renderqueue.sort(); //排序}visitRenderQueue(_renderGroups[0]);}clean(); // 绘制完成,清理绘制栈_isRendering = false;}void Renderer::clean(){// Clear render groupfor (size_t j = 0, size = _renderGroups.size() ; j < size; j++){// std::vector<RenderQueue> _renderGroups;_renderGroups[j].clear(); // 清空RenderQueue}}//--------------------------------GroupCommandGroupCommand::GroupCommand(){//由于GroupCommandManager分配一个空的闲置的RenderQueue_renderQueueID = Director::getInstance()->getRenderer()->getGroupCommandManager()->getGroupID();}GroupCommand::~GroupCommand(){//回收RenderQueue,因为是在构造析构中完成分配和回收,所以这是自动化的。Director::getInstance()->getRenderer()->getGroupCommandManager()->releaseGroupID(_renderQueueID);}//--------------------------------GroupCommandManagerint GroupCommandManager::getGroupID(){//std::unordered_map<int, bool> _groupMapping; 每个RenderQueue的是否正在被使用//std::vector<int> _unusedIDs; 当前可以使用的RenderQueue//重复使用已有的RenderQueueif (!_unusedIDs.empty()){int groupID = *_unusedIDs.rbegin();_unusedIDs.pop_back();_groupMapping[groupID] = true;return groupID;}//当前没有可用的RenderQueue则创建Queue并压入栈顶。int newID = Director::getInstance()->getRenderer()->createRenderQueue();_groupMapping[newID] = true;return newID;}void GroupCommandManager::releaseGroupID(int groupID){//回收RenderQueue,groupID指的是Queue。_groupMapping[groupID] = false;_unusedIDs.push_back(groupID);}
RenderQueue
RenderQueue的数据结构见上图,按GroupCommand来理解RenderQueue,因为GroupCommand就是执行RenderQueue。
class RenderQueue {public:// 队列中的指令按如下进行分组:enum QUEUE_GROUP{GLOBALZ_NEG = 0, // globalZ < 0OPAQUE_3D = 1, // globalZ = 0的不透明3D objectTRANSPARENT_3D = 2, // globalZ = 0的透明3D objectGLOBALZ_ZERO = 3, // globalZ = 0的2D objectGLOBALZ_POS = 4, // globalZ > 0的2D objectQUEUE_COUNT = 5,};public:// 根据command的globalZoder是>0,<0,==0来将command插入到对应的queue,void push_back(RenderCommand* command);ssize_t size() const; //全部的命令数void sort() {// 排序绘制顺序,只排序globalZ!=0的Command// globalZoder=0分组就按插入时的顺序,也即是UI树遍历顺序。std::stable_sort(std::begin(_commands[QUEUE_GROUP::TRANSPARENT_3D]), std::end(_commands[QUEUE_GROUP::TRANSPARENT_3D]), compare3DCommand);std::stable_sort(std::begin(_commands[QUEUE_GROUP::GLOBALZ_NEG]), std::end(_commands[QUEUE_GROUP::GLOBALZ_NEG]), compareRenderCommand);std::stable_sort(std::begin(_commands[QUEUE_GROUP::GLOBALZ_POS]), std::end(_commands[QUEUE_GROUP::GLOBALZ_POS]), compareRenderCommand);}protected://RenderQueue的数据结构std::vector<RenderCommand*> _commands[QUEUE_COUNT];};
自动批绘制:Auto Batching
用OpenGL的一段伪代码解释:
// 根据顶点数据vetices、顶点索引数据indcies绘制数个三角形void drawTriangles(vetices, indcies){setupVAO_VBO(vetices, indcies); // 构建VAO、VBO// *******************// **** 我们把以下状态数据称为materialID// *******************glBindTexture(textureID);shader.use();shader.setUniforms(...);glEnable(GL_BLEND);glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);// draw callglDrawElements(GL_TRIANGLES, vertexCount, GL_UNISGNED_INT, (GLvoid*)(0*sizeof(GLuint)) )}void main(){// *********************************************// ******* 需求:要绘制10批顶点数据的三角形,他们的materialID相同// *********************************************// *********************************************// ******** 普通方法// *********************************************while(i<10){drawTriangles(......); // 执行10次glDrawElements,也即是10次draw calls}// *********************************************// ******** auto batching// *********************************************verticesAll, indicesAll = gatherVertexData(...); // 将这10批顶点数据集中起来drawTriangles(verticesAll, indicesAll); // 1次draw call,完成全部绘制}
对于有OpenGL基础的同学理解起来非常简单。
要满足自动批绘制有几个条件:
// 每条TrianglesCommand对应一个materialID void TrianglesCommand::generateMaterialID(){ struct { void* glProgramState; GLuint textureId; GLenum blendSrc; GLenum blendDst; } hashMe;
memset(&hashMe, 0, sizeof(hashMe));//我们可以看到,_materialID是一个hash值,因子包括:hashMe.textureId = _textureID; // 纹理IDhashMe.blendSrc = _blendType.src; // 混合hashMe.blendDst = _blendType.dst;hashMe.glProgramState = _glProgramState; // shader程序状态(glProgram、uniforms/attributes)_materialID = XXH32((const void*)&hashMe, sizeof(hashMe), 0);
}
在Renderer中执行指令的逻辑:```cppvoid Renderer::processRenderCommand(RenderCommand* command){auto commandType = command->getType();if( RenderCommand::Type::TRIANGLES_COMMAND == commandType){......auto cmd = static_cast<TrianglesCommand*>(command);// VBO(Vertex Buffer Object)满了,立即执行绘制if(_filledVertex + cmd->getVertexCount() > VBO_SIZE || _filledIndex + cmd->getIndexCount() > INDEX_VBO_SIZE){......drawBatchedTriangles(); //立即执行队列中的TriangleCommand,}// VBO还没有满,则放入TriangleCommand的队列中,直接执行下一个command,// 如果下一个command不是TriangleCommand,则立即flush,执行队列中的TriangleCommand// 如果下一个还是TriangleCommand,则依然加入TriangleCommand队列_queuedTriangleCommands.push_back(cmd);......}else if (RenderCommand::Type::MESH_COMMAND == commandType){flush2D();......}else if(RenderCommand::Type::GROUP_COMMAND == commandType){flush();......}else if(RenderCommand::Type::CUSTOM_COMMAND == commandType){flush();......}else if(RenderCommand::Type::BATCH_COMMAND == commandType){flush();......}else if(RenderCommand::Type::PRIMITIVE_COMMAND == commandType){flush();......}}void Renderer::flush(){flush2D();flush3D();}void Renderer::flush2D(){flushTriangles();}void Renderer::flushTriangles(){drawBatchedTriangles();}void Renderer::drawBatchedTriangles(){//_queuedTriangleCommands保存的是TriangleCommand//这些Command是在此之前连续相邻的TriangleCommandif(_queuedTriangleCommands.empty())return;// 假设_queuedTriangleCommands = [// command1, //matrialID = 1// command2, //matrialID = 2// command3, //matrialID = 2// command4, //matrialID = 3// command5, //matrialID = 3// command6, //matrialID = 4// ]// 最终绘制指令是// [// command1, //matrialID = 1// command3, //matrialID = 2// command5, //matrialID = 3// command6, //matrialID = 4// ]......}
自动裁剪:Auto Culling
在UI树遍历阶段,cocos就会对一些元素进行位置计算,如果发现在屏幕外,就不会发送绘制指令到绘制栈中。
void Node::visit(......){// quick return if not visible. children won't be drawn.if (!_visible) //元素不可见,直接跳过遍历,不会发送任何绘制指令,子元素也不会遍历{return;}draw(......);......}void Sprite::draw(......){#if CC_USE_CULLING //是否开启自动裁剪,自动裁剪是需要每帧计算位置的,可能会降低性能//如果你确定不会有出现在屏幕外的情况,可关闭自动裁剪,提高性能。// Don't calculate the culling if the transform was not updated......else if (visitingCamera == defaultCamera) {_insideBounds = ((flags & FLAGS_TRANSFORM_DIRTY) || visitingCamera->isViewProjectionUpdated()) ?renderer->checkVisibility(transform, _contentSize) : _insideBounds;//_insideBounds:是否在屏幕内//FLAGS_TRANSFORM_DIRTY:模型视图变换矩阵如果改变了即元素位置发生改变才会进行位置计算。//renderer->checkVisibility,计算元素可见性。}......if(_insideBounds)#endif{_trianglesCommand.init(......);renderer->addCommand(&_trianglesCommand); //发送绘制指令......}}
