Skip to content

Commit

Permalink
Merge pull request #1 from youqingxiaozhua/dev
Browse files Browse the repository at this point in the history
Test Complete
  • Loading branch information
youqingxiaozhua authored Sep 19, 2019
2 parents 2f8b691 + 5ac19ee commit 78a8b1a
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 20 deletions.
2 changes: 1 addition & 1 deletion LoginUCAS.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _sep_init(self):

@classmethod
def _read_username_and_password(cls):
with codecs.open(r'./private.txt', "r", 'utf-8') as f:
with codecs.open('./private.txt', "r", "utf-8") as f:
username = password = None
for i, line in enumerate(f):
if i == 0:
Expand Down
Binary file removed easy_use/main.exe
Binary file not shown.
4 changes: 0 additions & 4 deletions easy_use/private.txt

This file was deleted.

29 changes: 15 additions & 14 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

class UCASCourse(object):
def __init__(self, time_out=5):
self.__BEAUTIFULSOUPPARSE = 'html5lib' # or use 'lxml'
self.__BEAUTIFULSOUPPARSE = 'html.parser' # or use 'lxml'
self.semester = None
self.save_base_path, self.semester = UCASCourse._read_info_from_file()
self.session = None
Expand All @@ -33,7 +33,7 @@ def _init_session(self):

@classmethod
def _read_info_from_file(cls):
with codecs.open(r'./private.txt', "r", "utf-8") as f:
with codecs.open('./private.txt', "r", "utf-8") as f:
save_base_path = semester = None
for i, line in enumerate(f):
if i < 2: continue
Expand All @@ -52,20 +52,13 @@ def _get_course_page(self):
url = "http://course.ucas.ac.cn/portal/plogin?Identity=" + code
self.headers['Host'] = "course.ucas.ac.cn"
html = self.session.get(url, headers=self.headers).text
url = 'http://course.ucas.ac.cn' + \
BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find('frame', title='mainFrame')['src']
html = self.session.get(url, headers=self.headers).text
url = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find('a', class_='icon-sakai-membership')['href']
html = self.session.get(url, headers=self.headers).text
url = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find('iframe')['src']
html = self.session.get(url, headers=self.headers).text
return html

def _parse_course_list(self):
# 获取课程的所有URL
html = self._get_course_page()
self.course_list = ['http://course.ucas.ac.cn/portal/site/' + x for x in
re.findall(r'http://course.ucas.ac.cn/portal/site/([\S]+)"', html)]
re.findall(r'http://course.ucas.ac.cn/portal/site/([\d]+)"', html)]

def _get_all_resource_url(self):
# 从课程的所有URL中获取对应的所有课件
Expand All @@ -76,17 +69,19 @@ def _get_all_resource_url(self):

def _get_resource_url(self, base_url, _path='', source_name=None):
html = self.session.get(base_url, headers=self.headers).text
tds = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find_all('td')
tds = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find_all('li')
if not source_name:
source_name = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find('h2').text
source_name = BeautifulSoup(html, self.__BEAUTIFULSOUPPARSE).find('h3').text
if self.semester and source_name.find(self.semester) == -1: return # download only current semester
res = set()
for td in tds:
url = td.find('a')
if not url: continue
url = urllib.parse.unquote(url['href'])
if url == '../': continue
if 'Folder' in td.text: # directory
# if 'Folder' in td.text: # directory
if 'folder' in td.attrs['class']: # directory
# folder_name = td.text
self._get_resource_url(base_url + url, _path + '/' + url, source_name)
if url.startswith('http:__'): # Fix can't download when given a web link. eg: 计算机算法分析与设计
try:
Expand Down Expand Up @@ -126,7 +121,11 @@ def _download_file(self, param):
except requests.exceptions.ConnectionError as e:
print('Error-----------文件下载失败,服务器长时间无响应: ', save_path)

size_mb = int(r.headers.get('Content-Length')) / (1024 ** 2)
try:
# HTML file does not have Content Length attr
size_mb = int(r.headers.get('Content-Length')) / (1024 ** 2)
except TypeError:
size_mb = 0.33 # html文件直接指定大小 :)
try:
print('Start download {dic_name} >> {sub_directory}{filename} {size_mb:.2f}MB'.format(**locals()))
with open(save_path, 'wb') as f:
Expand All @@ -145,6 +144,8 @@ def start(self):


if __name__ == '__main__':
base_path = os.path.dirname(os.path.abspath(__file__))
os.chdir(base_path)
start = datetime.now()
s = UCASCourse()
s.start()
Expand Down
4 changes: 3 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ ps:

需要全部的环境(包括python),见下方环境要求,以及参考对应的安装方法

> 可以设置alias实现快速调用,或者添加计划任务每天自动同步

### private文件说明
Expand All @@ -40,8 +41,8 @@ private中,各行表示意义如下:
- python 3.5.2
- requests 2.11
- BeautifulSoup
- PIL
- 可选环境:
- PIL
- Tesseract-OCR

### 安装方法
Expand All @@ -64,6 +65,7 @@ private中,各行表示意义如下:
- **觉得好用点个star吧~**

## 更新说明
- 更新适配到2019年秋季
- 新增登陆网址,不用验证码
- 修复因为微软CMD下编码不一致导致程序crash
- 支持最新验证码登录(校内校外不一致)
Expand Down

0 comments on commit 78a8b1a

Please sign in to comment.