|
5 | 5 | from argparse import RawTextHelpFormatter
|
6 | 6 | from datetime import datetime
|
7 | 7 |
|
| 8 | +# you may store your session cookie here persistently |
8 | 9 | LOGIN_COOKIE = "<INSERT-YOUR-XING-LOGIN-COOKIE-VALUE>"
|
9 | 10 |
|
| 11 | +# converting german umlauts |
10 | 12 | special_char_map = {ord('ä'):'ae', ord('ü'):'ue', ord('ö'):'oe', ord('ß'):'ss'}
|
11 | 13 |
|
12 | 14 | format_examples = '''
|
|
32 | 34 | if args.cookie:
|
33 | 35 | LOGIN_COOKIE = args.cookie
|
34 | 36 |
|
35 |
| -mailformat = args.email_format if args.email_format else False |
36 |
| -count = args.count if args.count and args.count < 3000 else 2999 |
| 37 | +if (args.email_format): |
| 38 | + mailformat = args.email_format |
| 39 | +else: |
| 40 | + mailformat = False |
| 41 | + |
| 42 | +if (args.count and args.count < 3000): |
| 43 | + count = args.count |
| 44 | +else: |
| 45 | + # according to XING, the result window must be less than 3000 |
| 46 | + count = 2999 |
37 | 47 |
|
38 | 48 | api = "https://www.xing.com/xing-one/api"
|
39 | 49 | headers = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Content-type': 'application/json'}
|
|
43 | 53 | try:
|
44 | 54 | _, _, company = url.partition('pages/')
|
45 | 55 |
|
| 56 | + # retrieve company id from the api |
46 | 57 | postdata1 = {"operationName":"EntitySubpage","variables":{"id":company,"moduleType":"employees"},"query":"query EntitySubpage($id: SlugOrID!, ) { entityPageEX(id: $id) { ... on EntityPage { slug title context { companyId } } } }"}
|
47 | 58 | r = requests.post(api, data=json.dumps(postdata1), headers=headers, cookies=cookies_dict, timeout=200)
|
48 | 59 | response1 = r.json()
|
49 | 60 | companyID = response1["data"]["entityPageEX"]["context"]["companyId"]
|
50 | 61 | companyTitle = response1["data"]["entityPageEX"]["title"]
|
51 | 62 |
|
| 63 | + # retrieve employee information from the api based on previously obtained company id |
52 | 64 | postdata2 = {"operationName":"Employees","variables":{"consumer":"","id":companyID,"first":count,"query":{"consumer":"web.entity_pages.employees_subpage","sort":"CONNECTION_DEGREE"}},"query":"query Employees($id: SlugOrID!, $first: Int, $after: String, $query: CompanyEmployeesQueryInput!, $consumer: String! = \"\", $includeTotalQuery: Boolean = false) { company(id: $id) { id totalEmployees: employees(first: 0, query: {consumer: $consumer}) @include(if: $includeTotalQuery) { total } employees(first: $first, after: $after, query: $query) { total edges { node { profileDetails { id firstName lastName displayName gender pageName location { displayLocation } occupations { subline } } } } } } }"}
|
53 | 65 | r2 = requests.post(api, data=json.dumps(postdata2), headers=headers, cookies=cookies_dict, timeout=200)
|
54 | 66 | response2 = r2.json()
|
|
79 | 91 | print("[i] Email Format: " + mailformat)
|
80 | 92 | print()
|
81 | 93 |
|
| 94 | + # loop over employees |
82 | 95 | for emp in response2['data']['company']['employees']['edges']:
|
83 | 96 | pd = emp['node']['profileDetails']
|
84 | 97 | firstname = pd['firstName']
|
|
107 | 120 | employee_entry['email'] = mailformat.format(firstname_clean, lastname_clean)
|
108 | 121 |
|
109 | 122 | if args.full:
|
| 123 | + # dump additional contact details for each employee. Most often is "None", so no default api queries for this data |
110 | 124 | postdata3 = {"operationName":"getXingId","variables":{"profileId":pagename},"query":"query getXingId($profileId: SlugOrID!, $actionsFilter: [AvailableAction!]) { profileModules(id: $profileId) { __typename xingIdModule(actionsFilter: $actionsFilter) { xingId { status { localizationValue __typename } __typename } __typename ...xingIdContactDetails } } } fragment xingIdContactDetails on XingIdModule { contactDetails { business { email fax { phoneNumber } mobile { phoneNumber } phone { phoneNumber } } __typename } __typename }"}
|
111 | 125 | r3 = requests.post(api, data=json.dumps(postdata3), headers=headers, cookies=cookies_dict, timeout=200)
|
112 | 126 | r3data = r3.json()
|
113 | 127 | try:
|
| 128 | + # try to extract contact details |
114 | 129 | contact = r3data['data']['profileModules']['xingIdModule']['contactDetails']['business']
|
115 | 130 | employee_entry['business_email'] = contact.get('email', 'None')
|
116 | 131 | employee_entry['fax'] = contact.get('fax', {}).get('phoneNumber', 'None')
|
117 | 132 | employee_entry['mobile'] = contact.get('mobile', {}).get('phoneNumber', 'None')
|
118 | 133 | employee_entry['phone'] = contact.get('phone', {}).get('phoneNumber', 'None')
|
119 | 134 | except:
|
| 135 | + # if contact details are missing in the API response, set to 'None' |
120 | 136 | employee_entry['business_email'] = employee_entry['fax'] = employee_entry['mobile'] = employee_entry['phone'] = 'None'
|
121 | 137 |
|
122 | 138 | employees.append(employee_entry)
|
123 | 139 |
|
124 | 140 | if not args.output_json and not args.output_csv:
|
| 141 | + # print employee information as Comma Separated Values (CSV) |
125 | 142 | print("Firstname;Lastname;" + ("Email;" if mailformat else "") + "Position;Gender;Location;" + ("E-Mail;Fax;Mobile;Phone;" if args.full else "") + "Profile")
|
126 | 143 | for emp in employees:
|
127 | 144 | values = [emp['firstname'], emp['lastname']]
|
|
0 commit comments