($width) AND $width .= 'px';
$style = " style=\"width: $width\"";
}
$value = $value ? $value : date('H:i');
$s = " ";
return $s;
}
// form_date('start', '2018-07-05') 为空则当前日期
function form_date($name, $value = 0, $width = FALSE)
{
$style = '';
if (FALSE !== $width) {
is_numeric($width) AND $width .= 'px';
$style = " style=\"width: $width\"";
}
$value = $value ? $value : date('Y-m-d');
$s = " ";
return $s;
}
/**用法
*
* echo form_radio_yes_no('radio1', 0);
* echo form_checkbox('aaa', array('无', '有'), 0);
*
* echo form_radio_yes_no('aaa', 0);
* echo form_radio('aaa', array('无', '有'), 0);
* echo form_radio('aaa', array('a'=>'aaa', 'b'=>'bbb', 'c'=>'ccc', ), 'b');
*
* echo form_select('aaa', array('a'=>'aaa', 'b'=>'bbb', 'c'=>'ccc', ), 'a');
*/
?>组保留的标签 余下为需要删除的标签
unset($oldtag[$key]);
}
}
}
if (!empty($oldtag)) {
$tagids = array();
foreach ($oldtag as $tagid => $tagname) {
$tagids[] = $tagid;
}
well_oldtag_delete($tagids, $tid);
}
$r = well_tag_process($tid, $fid, $create_tag, $tagarr);
return $r;
}
// 删除标签和绑定的主题
function well_oldtag_delete($tagids, $tid)
{
$pagesize = count($tagids);
$arrlist = well_tag_find_by_tagids($tagids, 1, $pagesize);
$delete_tagids = array(); // 删除
$tagids = array();
$n = 0;
foreach ($arrlist as $val) {
++$n;
if (1 == $val['count']) {
// 只有一个主题
$delete_tagids[] = $val['tagid'];
} else {
$tagids[] = $val['tagid'];
}
}
!empty($delete_tagids) and well_tag_delete($delete_tagids);
$arlist = well_tag_thread_find_by_tid($tid, 1, $n);
if ($arlist) {
$ids = array();
foreach ($arlist as $val) $ids[] = $val['id'];
well_tag_thread_delete($ids);
}
!empty($tagids) and well_tag_update($tagids, array('count-' => 1));
}
// 标签数据处理 $arr=新提交的数组 $tagarr=保留的旧标签
function well_tag_process($tid, $fid, $new_tags = array(), $tagarr = array())
{
if (empty($tid)) return '';
// 新标签处理入库
if ($new_tags) {
$threadarr = array();
$tagids = array();
$i = 0;
$size = 5;
$n = count($tagarr);
$n = $n > $size ? $size : $size - $n;
foreach ($new_tags as $name) {
++$i;
$name = trim($name);
$name = stripslashes($name);
$name = strip_tags($name);
$name = str_replace(array(' ', '#', "@", "$", "%", "^", '&', '·', '<', '>', ';', '`', '~', '!', '¥', '……', ';', '?', '?', '-', '—', '_', '=', '+', '.', '{', '}', '|', ':', ':', '、', '/', '。', '[', ']', '【', '】', '‘', ' ', ' ', ' ', ' ', ' '), '', $name);
$name = htmlspecialchars($name, ENT_QUOTES);
if ($name && $i <= $n) {
// 查询标签
$read = well_tag_read_name($name);
if ($read) {
// 存在 count+1
$tagids[] = $read['tagid'];
} else {
// 入库
$arr = array('name' => $name, 'count' => 1);
$tagid = well_tag_create($arr);
FALSE === $tagid and message(-1, lang('create_failed'));
$read = array('tagid' => $tagid, 'name' => $name);
}
$tag_thread = array('tagid' => $read['tagid'], 'tid' => $tid);
$threadarr[] = $tag_thread;
$tagarr[$read['tagid']] = $read['name'];
}
}
!empty($threadarr) and tag_thread_big_insert($threadarr);
!empty($tagids) and well_tag_update($tagids, array('count+' => 1));
}
$json = empty($tagarr) ? '' : xn_json_encode($tagarr);
return $json;
}
?>return $r;
}
/**
* @param int $page 页数
* @param int $pagesize 每页显示数量
* @return mixed
*/
function link_find($page = 1, $pagesize = 100)
{
$arr = link__find($cond = array(), array('rank' => -1), $page, $pagesize);
return $arr;
}
/**
* @param $id
* @return bool 返回FALSE失败 TRUE成功
*/
function link_delete($id)
{
if (empty($id)) return FALSE;
$r = link__delete(array('id' => $id));
link_delete_cache();
return $r;
}
//--------------------------kv + cache--------------------------
/**
* @return mixed 返回全部友情链接
*/
function link_get($page = 1, $pagesize = 100)
{
$g_link = website_get('friends_link');
if (empty($g_link)) {
$g_link = link_find($page, $pagesize);
$g_link AND website_set('friends_link', $g_link);
}
return $g_link;
}
// delete kv and cache
function link_delete_cache()
{
website_set('friends_link', '');
return TRUE;
}
?> $v = implode(",", $v);
$temp[] = $v;
}
// 去掉重复的字符串,也就是重复的一维数组
$temp = array_unique($temp);
// 再将拆开的数组重新组装
$output = array();
foreach ($temp as $k => $v) {
if ($stkeep) $k = $starr[$k];
if ($ndformat) {
$temparr = explode(",", $v);
foreach ($temparr as $ndkey => $ndval) $output[$k][$ndarr[$ndkey]] = $ndval;
} else $output[$k] = explode(",", $v);
}
return $output;
}
// 合并二维数组 如重复 值以第一个数组值为准
function array2_merge($array1, $array2, $key = '')
{
if (empty($array1) || empty($array2)) return NULL;
$arr = array();
foreach ($array1 as $k => $v) {
isset($v[$key]) ? $arr[$v[$key]] = array_merge($v, $array2[$k]) : $arr[] = array_merge($v, $array2[$k]);
}
return $arr;
}
/*
* 对二维数组排序 两个数组必须有一个相同的键值
* $array1 需要排序数组
* $array2 按照该数组key排序
* */
function array2_sort_key($array1, $array2, $key = '')
{
if (empty($array1) || empty($array2)) return NULL;
$arr = array();
foreach ($array2 as $k => $v) {
if (isset($v[$key]) && $v[$key] == $array1[$v[$key]][$key]) {
$arr[$v[$key]] = $array1[$v[$key]];
} else {
$arr[] = $v;
}
}
return $arr;
}
?>
selenium+edge+python on mac-软件玩家 - 软件改变生活!
一、简介
本文主要介绍Selenium的最简单的使用,看懂了去拓展会So ez。 Selenium:一种浏览器脚本驱动工具,实现用电脑模拟人操作浏览器网页,对网页自动化操作。
二、环境准备
安装python,配置环境变量(python和python Scripts的)
安装或更新pip
使用pip工具安装第三方库:pip install -i https://pypi.tuna.tsinghua.edu/simple selenium
下载并部署你浏览器对应的驱动。
创建project,导入Python SDK
三、EASY准备
Edge浏览器采用Chromium内核,并将支持所有受支持的Windows版本以及macOS等平台。 这次我使用的是Edge
1、使用驱动获取指定页面元素
1)使用Service指定驱动路径
service = Service( '/usr/local/bin/msedgedriver' )
service. start( )
driver = webdriver. Remote( service. service_url)
driver. get( 'https://www.baidu/' )
2)给驱动导入用户配置option
#驱动路径
chromedriver = "/usr/local/bin/msedgedriver.exe"
#将驱动对应环境的映像对象 给到os
os. environ[ "webdriver.chrome.driver" ] = chromedriver
#初始化配置
option = webdriver. ChromeOptions( )
#配置加入我们的用户配置文件
option. add_argument( '--user-data-dir=/usr/local/bin/config' )
#将配置应用到驱动
driver = webdriver. Chrome( chromedriver, chrome_options= option)
2、获取完元素,定位元素
1)八种定位元素的方法(子)
通过id定位元素:find_element_by_id(“id_vaule”) 通过name定位元素:find_element_by_name(“name_vaule”) 通过tag_name定位元素:find_element_by_tag_name(“tag_name_vaule”) 通过class_name定位元素:find_element_by_class_name(“class_name”) 通过css定位元素:find_element_by_css_selector() 通过xpath定位元素:find_element_by_xpath(“xpath”) 通过link:find_element_by_link_text(“text_vaule”) 通过find_element_by_partial_link_text()
def find_element_by_id ( self, id_) :
return self. find_element( by= By. ID, value= id_)
def find_element_by_xpath ( self, xpath) :
return self. find_element( by= By. XPATH, value= xpath)
def find_element_by_link_text ( self, link_text) :
return self. find_element( by= By. LINK_TEXT, value= link_text)
def find_element_by_partial_link_text ( self, link_text) :
return self. find_element( by= By. PARTIAL_LINK_TEXT, value= link_text)
def find_element_by_name ( self, name) :
return self. find_element( by= By. NAME, value= name)
def find_element_by_tag_name ( self, name) :
return self. find_element( by= By. TAG_NAME, value= name)
def find_element_by_class_name ( self, name) :
return self. find_element( by= By. CLASS_NAME, value= name)
def find_element_by_css_selector ( self, css_selector) :
return self. find_element( by= By. CSS_SELECTOR, value= css_selector)
2)定位元素(父):find_element 和 find_elements
其实可以一眼看出上面八种定位方法最终都调用的是find_element方法
driver. find_element( By. ID, "kw" )
driver. find_elements( By. TAG_NAME, "input" )
find_element:好处是方法名不会写死,定位方式可以通过参数传递,在一些框架中使用时会更加灵活一些。
定位方式 By name By.NAME class_name By.CLASS_NAME tag_name By.TAG_NAME link_text By.LINK_TEXT partial_link_text By.PARTIAL_LINK_TEXT css_selector By.CSS_SELECTOR xpath By.XPATH
每种 find_element()方法,包括find_element_by_id()在查找元素时,如果定位语句不唯一,能够查到多个函数的话,默认值返回页面中出现的第一个。也就是说定位不唯一,那得到的元素可能就不是你想要的。区别就是带s的会返回指定元素列表,八种基本定位也会拓展出带s的八种,例:find_elements_by_id
以下是返回的元素对象结果:
/usr/local/bin/python3.9 /Users/zjk/IdeaProjects/test_Python/src/web_driver/driver.py
< selenium.webdriver.remote.webelement.WebElement ( session= "dc4f0bbd53be7da2abf796001cde4c77" , element = "155b0372-b50e-4faf-8354-025988caa340" ) >
Process finished with exit code 0
3、驱动对页面操作
driver. back( ) # 返回上一页(右箭头)
driver. forward( ) # 去下一页(左箭头)
driver. refresh( ) # 刷新当前网页
driver. close( ) # 关闭当前窗口
# .submit() 模拟键盘的回车建
driver. find_element_by_link_text( '新闻' ) . submit( )
driver. execute( Command. QUIT)
driver. quit( ) # = .execute(Command.QUIT) + .stop_client()
. . . 很多
四、CASE
1、遍历爬取页面内所有图片属性
from selenium import webdriver
from time import sleep
from selenium. webdriver. chrome. service import Service
service = Service( '/usr/local/bin/msedgedriver' )
service. start( )
driver = webdriver. Remote( service. service_url)
driver. get( 'https://www.baidu' )
for img in driver. find_elements_by_tag_name( "img" ) :
print ( 'text: ' + img. text)
print ( 'size: ' + str ( img. size) )
print ( 'tag_name: ' + img. tag_name)
sleep( 2 )
driver. stop_client( )
# driver.quit()
本文标签:
edge selenium Mac python
发表评论