-
Notifications
You must be signed in to change notification settings - Fork 1
/
strings.py
303 lines (205 loc) · 9.78 KB
/
strings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
'''Strings'''
# String methods
# -----------------------------------------------------------------------------
print(dir(str))
# ['__add__', '__class__', '__contains__', '__delattr__', '__dir__', '__doc__',
# '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__',
# '__getnewargs__', '__gt__', '__hash__', '__init__', '__init_subclass__',
# '__iter__', '__le__', '__len__', '__lt__', '__mod__', '__mul__', '__ne__',
# '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmod__', '__rmul__',
# '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'capitalize',
# 'casefold', 'center', 'count', 'encode', 'endswith', 'expandtabs', 'find',
# 'format', 'format_map', 'index', 'isalnum', 'isalpha', 'isdecimal', 'isdigit',
# 'isidentifier', 'islower', 'isnumeric', 'isprintable', 'isspace', 'istitle',
# 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'maketrans', 'partition',
# 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit', 'rstrip',
# 'split', 'splitlines', 'startswith', 'strip', 'swapcase', 'title',
# 'translate', 'upper', 'zfill']
# String Literals
# -----------------------------------------------------------------------------
my_string = 'Some content'
my_string = "Some content"
# Concatenate strings with '+'. If the lines are long you can add '\':
long_string1 = 'A long time ago, ' + 'in a galaxy far, far away... ' + \
'blah blah bla...'
# That being said, many feel the use of \ should be minimized. When possible,
# use parenthesis instead. When strings fall on new lines, you don't need '+':
long_string2 = ('A long time ago, ' + 'in a galaxy far, far away... '
'blah blah bla...')
long_string3 = ('A long time ago, '
'in a galaxy far, far away... '
'blah blah bla...')
print(long_string1)
print(long_string2)
print(long_string3)
# A long time ago, in a galaxy far, far away... blah blah bla...
# A long time ago, in a galaxy far, far away... blah blah bla...
# A long time ago, in a galaxy far, far away... blah blah bla...
# You can also use triple quotes for long strings, but I think these are
# best reserved for docstrings (see documenting_naming.py). When using triple
# quotes, all invisible line breaks and spaces are included in the output.
long_string = """A long time ago,
in a galaxy far, far away...
It is a period of civil war."""
print(long_string)
# A long time ago,
# in a galaxy far, far away...
# It is a period of civil war.
# Convert to a string with .str()
# -----------------------------------------------------------------------------
text = 'The number is '
number = 4.5 * 3.25
print(text + str(number)) # The number is 14.625
# Slice with [start : end : step]
# -----------------------------------------------------------------------------
letters = 'abcdefghijk'
print(letters[:3]) # abc
print(letters[-3:]) # ijk
print(letters[:6:2]) # ace
print(letters[1:8:3]) # beh
print(letters[::-1]) # kjihgfedcba
# Break a string up into a list with .split()
# -----------------------------------------------------------------------------
test_string = 'Yeti Bigfoot Loch Ness Unicorn...'
test_list = test_string.split()
print(test_list) # ['Yeti', 'Bigfoot', 'Loch', 'Ness', 'Unicorn...']
print(type(test_list)) # <class 'list'>
# Break a large string up into a list of lines with .splitlines()
# -----------------------------------------------------------------------------
long_string = '''
A long time ago, in a galaxy far, far away...
It is a period of civil war. Rebel
spaceships, striking from a hidden
base, have won their first victory
against the evil Galactic Empire. '''
splitline_list = long_string.splitlines()
print(splitline_list[3]) # spaceships, striking from a hidden
print(type(splitline_list)) # <class 'list'>
# Break a string up into a tuple with .partition()
# -----------------------------------------------------------------------------
filename = 'image.png'
test = filename.partition('.')
print(test) # ('image', '.', 'png')
# Join a list or tuple into a string with .join()
# -----------------------------------------------------------------------------
test_string = ' '.join(test_list)
print(test_string) # Yeti Bigfoot Loch Ness Unicorn...
print(type(test_string)) # <class 'str'>
# .len(), .startswith(), .endswith(), .find(), .rfind(), .count(), .isalnum()
# -----------------------------------------------------------------------------
print(len(test_string)) # 33
print(test_string.startswith('Yeti')) # True
print(test_string.endswith('Yeti')) # False
print('Bigfoot' in test_string) # True
# Find the offset of the first occurrence of a word
word = 'Ness'
print(test_string.find(word)) # 18
# Find the offset of the last occurrence of a word
print(test_string.rfind(word)) # 18
# Find the total number of occurrences
print(test_string.count(word)) # 1
# Check if all characters are letters or numbers only (T/F)
print(test_string.isalnum()) # False
# Remove and replace: .lstrip(), .rstrip(), .strip(), .replace()
# -----------------------------------------------------------------------------
# remove characters from the beginning with .lstrip(), the end with .rstrip()
# or both with .strip():
name = '- Raja -'
name = name.lstrip('-') # ' Raja -'
name = name.rstrip('-') # ' Raja '
name = name.strip(' ') # 'Raja'
# Replace characters or words. You can also use a number argument
# to limit the number of replacements like: test_string.replace(' ', '-', 2)
name = 'Yello'
print(name.replace('Y', 'H')) # Hello
test_string = test_string.replace(' ', ', ')
print(test_string) # Yeti, Bigfoot, Loch, Ness, Unicorn...
test_string = test_string.replace('Unicorn', 'Dragon')
print(test_string) # Yeti, Bigfoot, Loch, Ness, Dragon...
# Remove a prefix or suffix with removeprefix(), removesuffix()
# ----------------------------------------------------------------------------
# Python 3.9 added str.removeprefix(prefix) and str.removesuffix(suffix) to
# easily remove an unneeded prefix or a suffix from a string.
og_string = 'test_foo_abc'
print(og_string.removeprefix('test_'))
# foo_abc
print(og_string.removesuffix('_abc'))
# test_foo
# Note that while these look like lstrp() and rstrip(), they are not.
# The main difference is the parameters of removeprefix() and removesuffix()
# are considered substrings while the parameters of lstrip() are considered
# a set of characters. This results in the following:
# 1. lstrip() and rstrip() will remove characters in any order:
og_string = 'ab_python'
print(og_string.lstrip('ba'))
# _python
print(og_string.removeprefix('ba'))
# ab_python
# 2. lstrip() and rstrip() will remove duplicates of the characters:
og_string = 'ababbbaab_python'
print(og_string.lstrip('ba'))
# _python
print(og_string.removeprefix('ba'))
# ababbbaab_python
# 3. If no parameter is passed in, lstrip() and rstrip() will remove spaces.
# Ommitting the parameter in removeprefix() and removesuffix() results in a
# TypeError.
og_string = ' python'
print(og_string.lstrip())
# python
# Change Case
# -----------------------------------------------------------------------------
test_string = test_string.lower()
print(test_string) # yeti, bigfoot, loch, ness, dragon...
test_string = test_string.upper()
print(test_string) # YETI, BIGFOOT, LOCH, NESS, DRAGON...
test_string = test_string.capitalize()
print(test_string) # Yeti, bigfoot, loch, ness, dragon...
test_string = test_string.title()
print(test_string) # Yeti, Bigfoot, Loch, Ness, Dragon...
test_string = test_string.swapcase()
print(test_string) # yETI, bIGFOOT, lOCH, nESS, dRAGON...
# NOTE: .lower() and .upper() are particularly useful when you're iterating
# over a string looking for something but you don't care about the case or,
# you receive input and you don't want to handle upper and lower variations:
if 'bigfoot' in test_string.lower():
print("He's there") # He's there
# NOTE: title() doesn't handle apostrophes very well. Use capwords() instead:
from string import capwords
example = "I'm super fun."
print(example.title()) # I'M Super Fun.
print(capwords(example)) # I'm Super Fun.
# Alignment
# -----------------------------------------------------------------------------
test1 = example.center(20)
test2 = example.ljust(20)
test3 = example.rjust(20)
test_list = [test1, test2, test3]
print(test_list)
# [" I'm super fun. ", "I'm super fun. ", " I'm super fun."]
# Raw string type literals
# -----------------------------------------------------------------------------
raw_str = (r"The 'r' at the start of a string before the quotation mark "
r"tells python it's a raw string, so any escape characters like "
r"backlash \t will be ignored... unless it's at the end of the "
r"string - then you need to do a double backlash or a space. "
r"This can come into play when creating raw strings for "
r"directory pathnames like C:\ ")
print(raw_str)
# The 'r' at the start of a string before the quotation mark tells python it's
# a raw string, so any escape characters like backlash \t will be ignored...
# unless it's at the end of the string - then you need to do a double backlash
# or a space. This can come into play when creating raw strings for directory
# pathnames like C:\
# Note: chaining methods
# -----------------------------------------------------------------------------
# One thing that I'm clear on now but for some reason was never really pointed
# out in my early training is the fact that you can 'chain' methods together.
# This is really helpful for shortening code, just make sure it's not at the
# cost of readability. In my early days I remember seeing these long, complex
# chains and it was so hard to understand what was going on.
name = ' Raja.JPEG '
name = name.strip().lower().replace('jpeg', 'png')
print(name)
# raja.png
# See also: formatting.py