Last active
October 2, 2019 17:33
-
-
Save ixe013/1f81cadeb0ecc8225d491ca568454863 to your computer and use it in GitHub Desktop.
Looks for a sequence of text (a prompt) inside a set of lines arbitrary cut, with or without newlines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
tests_passed = { | |
'simple end of line': | |
[ b'One\nSSH>', | |
b'no!', | |
], | |
'simple begining of line': | |
[ b'SSH>', | |
], | |
'simple empty lines': | |
[ b'\n', | |
b'\n', | |
b'SSH>', | |
b'no!', | |
], | |
'last character cut': | |
[ b'One\nSSH', | |
b'>', | |
], | |
'last two characters cut': | |
[ b'One\nSS', | |
b'H>', | |
], | |
'cut after first': | |
[ b'One\nS', | |
b'SH>', | |
], | |
} | |
tests_to_do = { | |
'cut by a new line': | |
[ b'One\nS\n', | |
b'SH>', | |
b'this should show\n', | |
b'SSH>', | |
], | |
'all cut': | |
[ b'One\nS', | |
b'S', | |
b'H', | |
b'>', | |
b'no!', | |
], | |
'false alsert': | |
[ b'One\nSS H>', | |
b'\nSSH>', | |
], | |
'false alsert on two lines': | |
[ b'One\nSS', | |
b' H> ' | |
b'Yes\nSSH>', | |
], | |
'false alert all cut': | |
[ b'One\nS', | |
b'S', | |
b'H', | |
b' >', | |
b'SSH>', | |
], | |
} | |
tests = {} | |
tests.update(tests_passed) | |
tests.update(tests_to_do) | |
prompts_that_work = [ | |
] | |
prompts = [ | |
'SSH>', | |
'SS*>', | |
'S*H>', | |
'S*>', | |
'*>', | |
'*SH>', | |
'*S*>', | |
'SSH*', | |
'S.H*', | |
'....', | |
#'...*' This one does not work... | |
] | |
def fake_read(name): | |
#print(f'Testing "{name}"') | |
for line in tests[name]: | |
yield line | |
#print(f'Test "{name}" complete!\n') | |
def position_of_next_character_to_match(prompt, marker): | |
for pos in range(marker+1, len(prompt)): | |
if prompt[pos] != '*': | |
marker = pos | |
break | |
return min(len(prompt), marker) | |
def forward_to_character_following(iterator, character): | |
for c in iterator: | |
if c == ord(character): | |
break | |
return iterator | |
def forward_to_begining_of_new_line(iterator): | |
return forward_to_character_following(iterator, '\n') | |
def find_wildcard_stop_character(marker): | |
try: | |
after_wildcard = next(marker) | |
#Collapse a series of ***** into a single star, making SS******> the same as SS*> (both match SSH>) | |
while after_wildcard == '*': | |
after_wildcard = next(marker) | |
return after_wildcard | |
except StopIteration: | |
#The prompt ends with a wildcard... | |
pass | |
def eat_next_character(iterator): | |
loop = True | |
byte = None | |
try: | |
byte = next(iterator) | |
except StopIteration: | |
loop = False | |
return byte, loop | |
def new_test(name, prompt): | |
received_chunks = [] | |
reader = fake_read(name) | |
data = next(reader) | |
marker = iter(prompt) | |
looking_for = next(marker) | |
match_anything = looking_for == '.' | |
if looking_for == '*': | |
#Make a copy of the iterator. Will also server as a flag | |
#that we are in wildcard mode | |
marker, saved_marker = itertools.tee(marker) | |
#and start looking for the remainder of the prompt | |
#as if it was what we were looking for all along | |
looking_for = find_wildcard_stop_character(marker) | |
else: | |
saved_marker = None | |
partial_match = False | |
while data: | |
received_chunks.append(data) | |
#For every character in the string | |
iterator = iter(data) | |
#Possible states: | |
#0: Looking for next character after new line | |
#1: Looking for the next character in the prompt string | |
#2: Looging for the next charcter that would stop the non-greedy wildcard search | |
loop = True | |
byte = next(iterator) | |
#We expect the prompt to start a new line, so go looging for it | |
#find_begining_of_new_line(iterator): | |
while loop: #We cannot use a for loop because we might have to test a character twice | |
# If tne character at marker the next one we are looking for? It can be | |
# - The exact char | |
# - Any character because the prompt specification had a . in it | |
if chr(byte) == looking_for or match_anything: | |
partial_match = True | |
try: | |
# Increment the counter optimistically | |
looking_for = next(marker) | |
#Will we match the next char as is? | |
match_anything = looking_for == '.' | |
#If we were in wildcard mode, it's over because we found the character | |
#that made us break out of it | |
#but maybe there is another wild card right after it | |
if looking_for == '*': | |
#We start a new search for the remainder of the prompt | |
partial_match = False | |
marker, saved_marker = itertools.tee(marker) | |
looking_for = find_wildcard_stop_character(marker) | |
# If there are no more character to find | |
except StopIteration: | |
# found it! | |
data = None #Signal to get out of the while loop | |
break #Get out of the for loop | |
#If we reach here, we haven't found it all yet, continue | |
byte, loop = eat_next_character(iterator) | |
elif saved_marker: | |
#We've set looking_for to the character that will get us out the wildcard loop | |
#so if we get here this is just any other charcter, there is nothing to do but | |
#iterate and see. | |
marker, saved_marker = itertools.tee(saved_marker) | |
#We already visited the position in saved_marker. We know it is not a special | |
#character and that it will not raise. But we could be at end of line of the marker | |
try: | |
looking_for = next(marker) | |
except StopIteration: | |
# found it! | |
data = None #Signal to get out of the while loop | |
break #Get out of the for loop | |
#We failed to match this character, but maybe it is the start of | |
#a new prompt. Let's *not* advance the iterator and try again, now | |
#that we reset the prompt iterator | |
if not partial_match: | |
byte, loop = eat_next_character(iterator) | |
else: | |
#But that's a one time thing | |
partial_match = False | |
else: | |
#Start to search from begining of prompt again | |
marker = iter(prompt) | |
looking_for = next(marker) | |
match_anything = looking_for == '.' | |
partial_match = False | |
#If the character that did not match happens to be a new line | |
#then we must let the loop iterate, or else it will skip valid | |
#characters after the \n looking for another one. For example, | |
#if we parsing through "\nSSH>\n". Without that if, the call to | |
#forward_to_begining_of_new_line will skip over the SSH we are | |
#looking for! | |
if chr(byte) != '\n': | |
byte,loop = eat_next_character(forward_to_begining_of_new_line(iterator)) | |
else: | |
byte, loop = eat_next_character(iterator) | |
else: | |
#We read all the data without finding anything | |
data = next(reader) | |
lines = b''.join(received_chunks).decode('utf-8').split('\n') | |
return lines | |
for name in tests.keys(): | |
for prompt in prompts: | |
print(f"Result for [{name},{prompt}] ", end='') | |
print(f"{new_test(name, prompt)}") | |
print('---------------------------------') | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment