Skip to content

Commit cebb8e2

Browse files
committed
chore: add comments
1 parent 696e877 commit cebb8e2

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

xingdumper.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
from argparse import RawTextHelpFormatter
66
from datetime import datetime
77

8+
# you may store your session cookie here persistently
89
LOGIN_COOKIE = "<INSERT-YOUR-XING-LOGIN-COOKIE-VALUE>"
910

11+
# converting german umlauts
1012
special_char_map = {ord('ä'):'ae', ord('ü'):'ue', ord('ö'):'oe', ord('ß'):'ss'}
1113

1214
format_examples = '''
@@ -32,8 +34,16 @@
3234
if args.cookie:
3335
LOGIN_COOKIE = args.cookie
3436

35-
mailformat = args.email_format if args.email_format else False
36-
count = args.count if args.count and args.count < 3000 else 2999
37+
if (args.email_format):
38+
mailformat = args.email_format
39+
else:
40+
mailformat = False
41+
42+
if (args.count and args.count < 3000):
43+
count = args.count
44+
else:
45+
# according to XING, the result window must be less than 3000
46+
count = 2999
3747

3848
api = "https://www.xing.com/xing-one/api"
3949
headers = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Content-type': 'application/json'}
@@ -43,12 +53,14 @@
4353
try:
4454
_, _, company = url.partition('pages/')
4555

56+
# retrieve company id from the api
4657
postdata1 = {"operationName":"EntitySubpage","variables":{"id":company,"moduleType":"employees"},"query":"query EntitySubpage($id: SlugOrID!, ) { entityPageEX(id: $id) { ... on EntityPage { slug title context { companyId } } } }"}
4758
r = requests.post(api, data=json.dumps(postdata1), headers=headers, cookies=cookies_dict, timeout=200)
4859
response1 = r.json()
4960
companyID = response1["data"]["entityPageEX"]["context"]["companyId"]
5061
companyTitle = response1["data"]["entityPageEX"]["title"]
5162

63+
# retrieve employee information from the api based on previously obtained company id
5264
postdata2 = {"operationName":"Employees","variables":{"consumer":"","id":companyID,"first":count,"query":{"consumer":"web.entity_pages.employees_subpage","sort":"CONNECTION_DEGREE"}},"query":"query Employees($id: SlugOrID!, $first: Int, $after: String, $query: CompanyEmployeesQueryInput!, $consumer: String! = \"\", $includeTotalQuery: Boolean = false) { company(id: $id) { id totalEmployees: employees(first: 0, query: {consumer: $consumer}) @include(if: $includeTotalQuery) { total } employees(first: $first, after: $after, query: $query) { total edges { node { profileDetails { id firstName lastName displayName gender pageName location { displayLocation } occupations { subline } } } } } } }"}
5365
r2 = requests.post(api, data=json.dumps(postdata2), headers=headers, cookies=cookies_dict, timeout=200)
5466
response2 = r2.json()
@@ -79,6 +91,7 @@
7991
print("[i] Email Format: " + mailformat)
8092
print()
8193

94+
# loop over employees
8295
for emp in response2['data']['company']['employees']['edges']:
8396
pd = emp['node']['profileDetails']
8497
firstname = pd['firstName']
@@ -107,21 +120,25 @@
107120
employee_entry['email'] = mailformat.format(firstname_clean, lastname_clean)
108121

109122
if args.full:
123+
# dump additional contact details for each employee. Most often is "None", so no default api queries for this data
110124
postdata3 = {"operationName":"getXingId","variables":{"profileId":pagename},"query":"query getXingId($profileId: SlugOrID!, $actionsFilter: [AvailableAction!]) { profileModules(id: $profileId) { __typename xingIdModule(actionsFilter: $actionsFilter) { xingId { status { localizationValue __typename } __typename } __typename ...xingIdContactDetails } } } fragment xingIdContactDetails on XingIdModule { contactDetails { business { email fax { phoneNumber } mobile { phoneNumber } phone { phoneNumber } } __typename } __typename }"}
111125
r3 = requests.post(api, data=json.dumps(postdata3), headers=headers, cookies=cookies_dict, timeout=200)
112126
r3data = r3.json()
113127
try:
128+
# try to extract contact details
114129
contact = r3data['data']['profileModules']['xingIdModule']['contactDetails']['business']
115130
employee_entry['business_email'] = contact.get('email', 'None')
116131
employee_entry['fax'] = contact.get('fax', {}).get('phoneNumber', 'None')
117132
employee_entry['mobile'] = contact.get('mobile', {}).get('phoneNumber', 'None')
118133
employee_entry['phone'] = contact.get('phone', {}).get('phoneNumber', 'None')
119134
except:
135+
# if contact details are missing in the API response, set to 'None'
120136
employee_entry['business_email'] = employee_entry['fax'] = employee_entry['mobile'] = employee_entry['phone'] = 'None'
121137

122138
employees.append(employee_entry)
123139

124140
if not args.output_json and not args.output_csv:
141+
# print employee information as Comma Separated Values (CSV)
125142
print("Firstname;Lastname;" + ("Email;" if mailformat else "") + "Position;Gender;Location;" + ("E-Mail;Fax;Mobile;Phone;" if args.full else "") + "Profile")
126143
for emp in employees:
127144
values = [emp['firstname'], emp['lastname']]

0 commit comments

Comments
 (0)