| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- <?php
- namespace addons\RfDevTool\common\queues;
- use Yii;
- use yii\base\BaseObject;
- use yii\queue\JobInterface;
- use common\helpers\StringHelper;
- use common\models\common\Provinces;
- use linslin\yii2\curl\Curl;
- use QL\QueryList;
- use addons\RfDevTool\common\models\ProvinceGatherLog;
- /**
- * Class ProvinceChildJob
- * @package addons\RfDevTool\common\queues
- * @author jianyan74 <751393839@qq.com>
- */
- class ProvinceChildJob extends BaseObject implements JobInterface
- {
- /**
- * @var string
- */
- public $baseUrl;
- /**
- * @var int
- */
- public $maxLevel;
- /**
- * @var array
- */
- public $parent;
- /**
- * @var int
- */
- public $job_id;
- /**
- * 重连次数
- *
- * @var int
- */
- public $reconnection = 5;
- /**
- * @var int
- */
- public $level = 2;
- /**
- * 路径前缀
- *
- * @var
- */
- public $chlidPrefix;
- /**
- * @var string[]
- */
- public $range = [
- 2 => 'table.citytable td+td',
- 3 => 'table.countytable td+td',
- 4 => 'table.towntable td+td',
- 5 => 'table.villagetable .villagetr',
- ];
- /**
- * @param \yii\queue\Queue $queue
- * @return mixed|void
- */
- public function execute($queue)
- {
- /** @var QueryList $ql */
- $ql = QueryList::getInstance();
- // 注册一个myHttp方法到QueryList对象
- $ql->bind('http', function ($url) {
- $curl = new Curl();
- $html = $curl->get($url);
- $encode = mb_detect_encoding($html, ["ASCII", 'UTF-8', "GB2312", "GBK", 'BIG5']);
- $str_encode = mb_convert_encoding($html, 'UTF-8', $encode);
- $this->setHtml($str_encode);
- return $this;
- });
- /******************************** 社区 ********************************/
- // 东莞市、中山市、儋州市下面直接是镇所以规则要变
- if (isset($this->parent['code'][1])) {
- if ($this->parent['code'][1] >= 441900000 && $this->parent['code'][1] < 443000000) {
- // 社区
- $this->getVillage($ql);
- return;
- }
- if ($this->parent['code'][1] >= 460400000 && $this->parent['code'][1] < 460500000) {
- // 社区
- $this->getVillage($ql);
- return;
- }
- }
- if ($this->level >= 5) {
- // 社区
- $this->getVillage($ql);
- return;
- }
- /******************************** 市区县/街道 ********************************/
- $level = $this->level;
- // 东莞市、中山市、儋州市下面直接是镇所以规则要变
- if (isset($this->parent['code'][1]) && in_array($this->parent['code'][1], [4419, 4420, 4604])) {
- $level += 1;
- }
- $this->getCityCountyTown($ql, $level);
- }
- /**
- * 获取社区
- *
- * @param QueryList $ql
- */
- public function getVillage($ql)
- {
- $data = $ql->rules([
- 'id' => ['td:first', 'text'],
- 'title' => ['td:nth-child(3)', 'text']
- ])->http($this->parent['chlidLink'])->range($this->range[5])->query()->getData()->all();
- // 找不到数据库可能是抓取失败重新连接
- if (empty($data)) {
- $this->reconnection();
- return;
- }
- foreach ($data as &$datum) {
- if (empty($datum['id']) && empty($datum['title'])) {
- continue;
- }
- $datum['level'] = $this->parent['level'] + 1;
- $datum['pid'] = $this->parent['id'];
- $datum['tree'] = $this->parent['tree'] . $datum['pid'] . '-';
- // 写入数据库
- if (!($model = Provinces::findOne(['id' => $datum['id']]))) {
- $model = new Provinces();
- }
- $model->attributes = $datum;
- $model->save();
- }
- }
- /**
- * 获取市区街道/县
- *
- * @param QueryList $ql
- */
- public function getCityCountyTown($ql, $level)
- {
- $data = $ql->rules([
- 'title' => ['a', 'text'],
- 'link' => ['a', 'href']
- ])->http($this->parent['chlidLink'])->range($this->range[$level])->query()->getData()->all();
- $codeSuffix = $this->level == 2 ? '00' : '';
- // 找不到数据库可能是抓取失败重新连接
- if (empty($data)) {
- $this->reconnection();
- return;
- }
- foreach ($data as &$datum) {
- if (empty($datum['text']) && empty($datum['link'])) {
- continue;
- }
- $code = StringHelper::replace('.html', '', $datum['link']);
- $datum['code'] = explode('/', $code);
- // 地址前缀
- $chlidPrefix = $this->chlidPrefix;
- if (empty($chlidPrefix)) {
- $chlidPrefix = $datum['code'][0];
- } else {
- $chlidPrefix = $chlidPrefix . '/' . $datum['code'][0];
- }
- $datum['id'] = $datum['code'][1] . $codeSuffix;
- $datum['level'] = $this->parent['level'] + 1;
- $datum['pid'] = $this->parent['id'];
- $datum['tree'] = $this->parent['tree'] . $datum['pid'] . '-';
- $datum['chlidPrefix'] = $chlidPrefix;
- $datum['chlidLink'] = $this->baseUrl . $chlidPrefix . '/' . $datum['code'][1] . '.html';
- // 写入数据库
- if (!($model = Provinces::findOne(['id' => $datum['id']]))) {
- $model = new Provinces();
- }
- $model->attributes = $datum;
- $model->save();
- if ($datum['level'] + 1 <= $this->maxLevel) {
- $this->createJob($datum);
- }
- }
- }
- /**
- * 重连
- */
- protected function reconnection()
- {
- if ($this->reconnection <= 0) {
- $this->log('采集彻底失败');
- return;
- }
- $queue = new ProvinceChildJob([
- 'parent' => $this->parent,
- 'baseUrl' => $this->baseUrl,
- 'maxLevel' => $this->maxLevel,
- 'level' => $this->level,
- 'job_id' => $this->job_id,
- 'reconnection' => $this->reconnection - 1,
- ]);
- // 延迟60秒再运行
- $messageId = Yii::$app->queue->delay(1 * 60)->push($queue);
- $this->log('采集失败,等待重试时间60秒', $messageId);
- }
- /**
- * 记录日志
- */
- protected function log($remark, $message_id = 0)
- {
- $model = new ProvinceGatherLog();
- $model->data = $this->parent;
- $model->url = $this->baseUrl;
- $model->max_level = $this->maxLevel;
- $model->level = $this->level;
- $model->job_id = $this->job_id;
- $model->message_id = $message_id;
- $model->reconnection = $this->reconnection;
- $model->remark = $remark;
- $model->save();
- if (!$model->save()) {
- Yii::error(Yii::$app->services->base->analysisErr($model->getFirstErrors()));
- }
- }
- /**
- * 创建一个新队列
- *
- * @param $datum
- * @param $level
- */
- protected function createJob($datum)
- {
- $queue = new ProvinceChildJob([
- 'parent' => $datum,
- 'baseUrl' => $this->baseUrl,
- 'chlidPrefix' => $datum['chlidPrefix'],
- 'maxLevel' => $this->maxLevel,
- 'level' => $this->level + 1,
- 'job_id' => $this->job_id,
- ]);
- $messageId = Yii::$app->queue->push($queue);
- }
- }
|