furas
diff --git a/‎__scraping__/bit.do/README.md
Lines changed: 48 additions & 0 deletions b/‎__scraping__/bit.do/README.md
Lines changed: 48 additions & 0 deletions
diff --git a/‎__scraping__/bit.do/main.py
Lines changed: 49 additions & 0 deletions b/‎__scraping__/bit.do/main.py
Lines changed: 49 additions & 0 deletions
diff --git a/‎__scraping__/spotifychart.com/main.py
Lines changed: 35 additions & 0 deletions b/‎__scraping__/spotifychart.com/main.py
Lines changed: 35 additions & 0 deletions
diff --git a/‎csv/incorrectly-save-csv/README.md
Lines changed: 10 additions & 0 deletions b/‎csv/incorrectly-save-csv/README.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎csv/incorrectly-save-csv/main.py
Lines changed: 17 additions & 0 deletions b/‎csv/incorrectly-save-csv/main.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎csv/incorrectly-save-csv/sample.csv
Lines changed: 4 additions & 0 deletions b/‎csv/incorrectly-save-csv/sample.csv
Lines changed: 4 additions & 0 deletions
diff --git a/‎csv/incorrectly-save-csv/temp.csv
Lines changed: 4 additions & 0 deletions b/‎csv/incorrectly-save-csv/temp.csv
Lines changed: 4 additions & 0 deletions
diff --git a/‎decode-encode/macosx-linux/main-unzip.py
Lines changed: 35 additions & 0 deletions b/‎decode-encode/macosx-linux/main-unzip.py
Lines changed: 35 additions & 0 deletions
diff --git a/‎decode-encode/macosx-linux/main.py
Lines changed: 61 additions & 0 deletions b/‎decode-encode/macosx-linux/main.py
Lines changed: 61 additions & 0 deletions
diff --git a/‎pandas/replace-nan-with-mean-in-groups/main.py
Lines changed: 76 additions & 0 deletions b/‎pandas/replace-nan-with-mean-in-groups/main.py
Lines changed: 76 additions & 0 deletions
diff --git a/‎tkinter/__canvas__/canvas-random-move-tag-bind-duck-hunt/README.md
Lines changed: 4 additions & 0 deletions b/‎tkinter/__canvas__/canvas-random-move-tag-bind-duck-hunt/README.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎tkinter/__canvas__/canvas-random-move-tag-bind-duck-hunt/images/tkinter-duck-hunt.png
5.95 KB b/‎tkinter/__canvas__/canvas-random-move-tag-bind-duck-hunt/images/tkinter-duck-hunt.png
5.95 KB
@@ -0,0 +1,48 @@
+It seems minimal working code.
+
+It needs header `'X-Requested-With'` because it is AXAJ/XHR request.
+
+It needs `permasession` but first `GET` doesn't send it so probably it is generated on page with JavaScript. But it works for me with the same `permasession` all the time.
+
+Maybe later it will need new/fresh `permasession`
+
+There are spaces in `" site2 "`
+
+```python
+import requests
+
+headers={
+    'X-Requested-With': 'XMLHttpRequest', # need it
+}
+
+data = {
+    'action': 'shorten',
+    'url': 'https://onet.pl',
+    'url2': ' site2 ', # need spaces
+    'url_hash': None,
+    'url_stats_is_private': 0,
+    'permasession': '1555801674|ole2ky65f9', # need it
+}
+
+r = requests.post('http://bit\.do/mod_perl/url-shortener.pl', headers=headers, data=data)
+
+print(r.status_code)
+print(r.json())
+```
+
+It didn't need `requests.Session()` nor `User-Agent` nor `GET` request at start.
+
+---
+
+**EDIT:** value `1555801674` in `'permasession': '1555801674|ole2ky65f9'` is timestamp with current date and time.
+
+```python
+import datetime
+
+datetime.datetime.fromtimestamp(1555801674)
+
+datetime.datetime(2019, 4, 21, 1, 7, 54)
+```
+
+Maybe `ole2ky65f9` is also timestampe but as shortened value.
+
@@ -0,0 +1,49 @@
+
+# date: 2019.04.21
+# https://stackoverflow.com/a/55778640/1832058
+
+import requests
+
+# not need Sessions
+s = requests.Session()
+s.headers.update({
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Encoding': 'gzip, deflate',
+    'Accept-Language': 'pl,en-US;q=0.7,en;q=0.3',
+    'Cache-Control': 'no-cache',
+    'Connection': 'keep-alive',
+})
+
+#r = s.get('http://bit.do/')
+#print(r.status_code)
+#print(r.cookies)
+
+
+# ------------------------------------
+
+headers={
+    'X-Requested-With': 'XMLHttpRequest', # need it
+    #'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0',
+    #'Cookie': 'permasession=1555801674|ole2ky65f9', #
+}
+
+data = {
+    'action': 'shorten',
+    'url': 'https://onet.pl',
+    'url2': ' site2 ', # need spaces
+    'url_hash': None,
+    'url_stats_is_private': 0,
+    'permasession': '1555801674|ole2ky65f9', # need it
+}
+
+r = requests.post('http://bit.do/mod_perl/url-shortener.pl', headers=headers, data=data)
+print(r.status_code)
+print(r.json())
+
+
+
+import datetime
+
+datetime.datetime.fromtimestamp(1555801674)
+
+
@@ -0,0 +1,35 @@
+# date: 2019.04.16
+# https://stackoverflow.com/questions/55699472/web-scraping-python-indexing-issue-for-dataframe/55700180#55700180
+
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+
+base_url = 'https://spotifycharts.com/regional/global/daily/'
+
+r = requests.get(base_url)
+
+soup = BeautifulSoup(r.text, 'html.parser')
+
+chart = soup.find('table', {'class': 'chart-table'})
+tbody = chart.find('tbody')
+
+all_rows = []
+
+for tr in tbody.find_all('tr'):
+
+    rank_text = tr.find('td', {'class': 'chart-table-position'}).text
+
+    artist_text = tr.find('td', {'class': 'chart-table-track'}).find('span').text
+    artist_text = artist_text.replace('by ','').strip()
+
+    title_text = tr.find('td', {'class': 'chart-table-track'}).find('strong').text
+
+    streams_text = tr.find('td', {'class': 'chart-table-streams'}).text
+
+    all_rows.append([rank_text, artist_text, title_text, streams_text])
+
+# after `for` loop
+
+df = pd.DataFrame(all_rows, columns=['Rank','Artist','Title','Streams'])
+print(df)#.head(15))
@@ -0,0 +1,10 @@
+`sample.csv` is incorrectly saved CSV. 
+
+Probably someone created one string with all items in row and used `csv` to save it. 
+But `csv` saved it as single column with long string, not as many columns.
+
+Example use `csv` to read it again, and write it back as normal file.
+This way it removes `"` at the both sides of long string, 
+and it converts double `""` to single `"`
+
+Now it is correct CSV and there is no problem to read it in `pandas.read_csv()`
@@ -0,0 +1,17 @@
+import csv
+import pandas as pd
+
+
+f1 = open('sample.csv')
+f2 = open('temp.csv', 'w')
+reader = csv.reader(f1)
+for row in reader:
+    f2.write(row[0] + '\n')
+f2.close()
+f1.close()
+
+
+df = pd.read_csv('temp.csv')
+
+print(len(df.columns))
+print(df)
@@ -0,0 +1,4 @@
+"Store code,""Biz"",""Add"",""Labels"",""TotalSe"",""DirectSe"",""DSe"",""TotalVe"",""SeVe"",""MaVe"",""Totalac"",""Webact"",""Dions"",""Ps"""
+",,,,""Numsearching"",""Numsearchingbusiness"",""Numcatprod"",""Numview"",""Numviewed"",""Numviewed2"",""Numaction"",""Numwebsite"",""Numreques"",""Numcall"""
+"Nora,""Ora"",""Sgo, Mp, 2000"",,111,44,33,121,1232,53411,4,5,3,3"
+"mc11,""21 old"",""tjis that place, somewher, Netherlands, 2434"",,3245,325,52454,3432,243,4353,343,23,23,18"
@@ -0,0 +1,4 @@
+Store code,"Biz","Add","Labels","TotalSe","DirectSe","DSe","TotalVe","SeVe","MaVe","Totalac","Webact","Dions","Ps"
+,,,,"Numsearching","Numsearchingbusiness","Numcatprod","Numview","Numviewed","Numviewed2","Numaction","Numwebsite","Numreques","Numcall"
+Nora,"Ora","Sgo, Mp, 2000",,111,44,33,121,1232,53411,4,5,3,3
+mc11,"21 old","tjis that place, somewher, Netherlands, 2434",,3245,325,52454,3432,243,4353,343,23,23,18
@@ -0,0 +1,35 @@
+
+# date: 2019.04.18
+# Bartlomiej 'furas' Burek
+#
+# https://stackoverflow.com/questions/16467479/normalizing-unicode
+#
+#
+
+import os
+import zipfile
+import unicodedata
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('filename', help='zip file with MAC OS X names')
+args = parser.parse_args()
+
+def convert(name):
+    name = name.encode('cp437').decode('utf-8')
+    name = unicodedata.normalize('NFC', name)
+    return name
+
+if args.filename:
+    z = zipfile.ZipFile(args.filename)
+    for item in z.filelist:
+        #if not item.filename.startswith('__MACOSX'):
+        new_name = convert(item.filename)
+        print(new_name)
+        if item.is_dir():
+            os.makedirs(new_name, exist_ok=True)
+        else:
+            with open(new_name, 'wb') as f:
+                f.write(z.read(item))
+
+
@@ -0,0 +1,61 @@
+
+# date: 2019.04.18
+# Bartlomiej 'furas' Burek
+#
+# https://stackoverflow.com/questions/16467479/normalizing-unicode
+# https://www.pythonsheets.com/notes/python-unicode.html
+#
+
+import os
+import zipfile
+import unicodedata
+from unidecode import unidecode
+import ftfy
+
+
+def test(data):
+    text, expected = data
+
+    text2 = text.encode('cp437').decode('utf-8')
+
+    text3 = unidecode(text2)
+    text4 = unicodedata.normalize('NFC', text2)
+
+    text5 = unidecode(text4)
+
+    print('                                text:', text, '| len:', len(text))
+    print('                            expected:', expected, '  | len:', len(expected))
+    print('                    text == expected:', text == expected)
+    print('-------------------------------------')
+    print('text.encode("cp437").decode("utf-8"):', text2, '  | len:', len(text2), '| expected:', text2 == expected)
+    print('                      unicode(text2):', text3, '  | len:', len(text3), '| expected:', text3 == expected)
+    print('-------------------------------------')
+    print(' unicodedata.normalize("NFC", text2):', text4, '  | len:', len(text4), '| expected:', text4 == expected)
+    print('                      unicode(text4):', text5, '  | len:', len(text5), '| expected:', text5 == expected)
+    print('-------------------------------------')
+    print('                 ftfy.fix_text(text):', ftfy.fix_text(text))
+    print('-------------------------------------')
+
+a1 = 'a╠¿'
+
+a2 = a1.encode('cp437').decode('utf-8')
+a4 = unidecode(a2)
+a3 = unicodedata.normalize('NFC', a2)
+
+a5 = unidecode(a3)
+print(a1, a2, len(a2), a3, len(a3), a4, a5)
+
+
+examples = [
+    ('a╠¿', 'ą'),
+    ('e╠¿', 'ę'),
+    ('z╠ü', 'ż'),
+    ('┼é',  'ł'),
+#    'z╠üle'
+]
+
+for data in examples:
+    test(data)
+    print('----------------------------------------------------------------')
+
+
@@ -0,0 +1,76 @@
+
+# date: 2019.04.16
+# https://stackoverflow.com/questions/55699046/filling-specific-missing-value-in-python?noredirect=1#comment98080392_55699046
+
+job_title = '''ANALYST, BRAND DEVELOPMENT
+ANESTHESIOLOGIST
+ANESTHESIOLOGIST
+BUSINESS INTELLIGENCE ANALYSTS
+CIVIL ENGINEER
+CIVIL ENGINEER
+COMPUTER PROGRAMMER
+COMPUTER PROGRAMMER ANALYST
+COMPUTER SYSTEM ANALYST
+COMPUTER SYSTEM ANALYST
+COMPUTER SYSTEMS ANAGLYST
+COMPUTER SYSTEMS ANALYST
+CONSULTANT
+CORPORATE COMMUNICATIONS SPECIALIST
+COUNSELOR
+DESIGN
+ELEMENTARY CO-TEACHER
+FASHION MODEL
+FIELD ENGINEER
+FINANCIAL ANALYST
+FINANCIAL SENIOR ANALYST
+FINANCIAL SPECIALIST'''.split('\n')
+
+job_title = list(set(job_title))
+
+# --- create random data with some NaN
+import random
+
+data = []
+
+for _ in range(1):
+    for item in job_title:
+        data.append( (item, None))
+
+for _ in range(2):    
+    for item in job_title:
+        data.append( (item, random.randint(10000,100000)))    
+
+random.shuffle(data)
+
+# --- get mean salary for JOB_TITLE ---
+
+import pandas as pd
+
+df = pd.DataFrame(data, columns=['JOB_TITLE', 'SALARY'])
+
+rows_with_na = df['SALARY'].isna()
+
+print('\n--- before ---\n')
+print(df[ rows_with_na ])
+
+print('\n--- mean ---\n')
+groups = df.groupby(['JOB_TITLE'])
+
+
+# it doesn't work as I expected - it doesn't change data in original `df`
+# (or i would say I expected this will not work but I still hoped it will work :)
+
+for idx, grp in groups:
+    mean = grp['SALARY'].mean()
+    print('mean:', mean, idx)
+    print(grp['SALARY'].fillna(mean)) 
+    print('---')
+
+# this works
+#df['SALARY'] = groups.transform(lambda x: x.fillna(x.mean()))
+#df['SALARY'] = groups.transform(lambda x: x.fillna(x.mean()))['SALARY']
+df['SALARY'] = groups['SALARY'].transform(lambda x: x.fillna(x.mean()))
+    
+print('\n--- after ---\n')
+print(df[ rows_with_na ])
+
@@ -0,0 +1,4 @@
+
+Image: 
+
+![#1](images/tkinter-duck-hunt.png?raw=true)
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
++
 +Image:
++
 +![#1](images/tkinter-duck-hunt.png?raw=true)