Skip to content

Commit

Permalink
Merge pull request #22 from isi-nlp/develop
Browse files Browse the repository at this point in the history
v0.7.1
  • Loading branch information
thammegowda authored Oct 19, 2022
2 parents 149415f + 89e2576 commit 141ee5d
Show file tree
Hide file tree
Showing 13 changed files with 3,513 additions and 57 deletions.
81 changes: 78 additions & 3 deletions docs/50-serve.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ NOTE: batch decoding is yet to be supported. The current decoder decodes only on
An example POST request:
----
curl --data "source=Comment allez-vous?" --data "source=Bonne journée" http://localhost:6060/translate
----

[source,json]
----
{
Expand All @@ -63,11 +63,86 @@ An example POST request:
"translation": [
"How are you?",
"Have a nice day"
]
],
"dec_args": {
"beam_size": 4,
"lp_alpha": 0.6,
"max_len": 50,
"num_hyp": 1
},
"score": [
-6,
-3
],
"time": 4.5281,
"time_unit": "s",
}
----
You can also request like GET method as `http://localhost:6060/translate?source=text1&source=text2`
after properly URL encoding the `text1` `text2`. This should only be used for quick testing in your web browser.
after proper URL encoding the `text1` `text2`. This should only be used for quick testing in your web browser.

=== Advanced Decoder Args
You may pass the following optional arguments to API:

* `beam_size` - Number of beams to use for decoding
* `num_hyp` - Number of hypotheses to return in response
* `max_len` - Maximum length (relative to source length) to wait for end-of-seq token
* `lp_alpha` - Length penalty

All these arguments take default values from conf.yml, but you may also set at runtime via arguments to rest API.
Example:

[source,bash]
----
curl --data "source=Comment allez-vous?" --data "source=Bonne journée" "http://localhost:6060/translate?beam_size=6&num_hyp=4&lp_alpha=0.0"
----

[source,json]
----
{
"dec_args": {
"beam_size": 6,
"lp_alpha": 0,
"max_len": 50,
"num_hyp": 4
},
"source": [
"Comment allez @-@ vous ?",
"Bonne journée"
],
"time": 6.4446,
"time_unit": "s",
"translation": [
[
"How do you do, sir?.",
"- How are you? - Fine.",
"How do you do?",
"How do you do?"
],
[
"Have a nice day.",
"Have a good day",
"Good day",
"Have a good day.."
]
],
"score": [
[
-8.3406,
-8.3871,
-9.1363,
-9.1478
],
[
-3.7928,
-3.8259,
-3.8653,
-3.8789
]
],
}
----


=== Google Analytics Integration

Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
3,244 changes: 3,244 additions & 0 deletions docs/v0.7.1/index.html

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/versions.adoc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
= RTG Docs Versions
:hide-uri-scheme:

* link:v0.7.1[v0.7.1]
* link:v0.7[v0.7]
* link:v0.6.1[v0.6.1]
* link:v0.6.0[v0.6.0]
Expand Down
5 changes: 4 additions & 1 deletion docs/versions.html
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,9 @@ <h1>RTG Docs Versions</h1>
<div class="ulist">
<ul>
<li>
<p><a href="v0.7.1">v0.7.1</a></p>
</li>
<li>
<p><a href="v0.7">v0.7</a></p>
</li>
<li>
Expand Down Expand Up @@ -480,7 +483,7 @@ <h2 id="_releases">Releases</h2>
</div>
<div id="footer">
<div id="footer-text">
Last updated 2022-03-15 16:24:58 -0700
Last updated 2022-10-18 18:21:25 -0700
</div>
</div>
</body>
Expand Down
2 changes: 1 addition & 1 deletion rtg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.7'
__version__ = '0.7.1'


import os
Expand Down
6 changes: 6 additions & 0 deletions rtg/distrib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class DistribTorch:
visible_devices: str = get_env('CUDA_VISIBLE_DEVICES', '')
max_norm = 10
fp16 = False # Manually enable by calling enable_fp16()
fp16_dtype = torch.float16
grad_accum = 1 # grad accumulation over these many batches

_scaler = None
Expand Down Expand Up @@ -67,6 +68,11 @@ def init_trainer_args(self, args: dict):
self.enable_fp16()

def enable_fp16(self):
try:
self.fp16_dtype = torch.bfloat16 # if supported
log.info('BFLOAT16 maybe supported; trying to upgrade')
except:
log.info('BFLOAT16 is not supported')
if not self.fp16:
self.fp16 = True
self._scaler = GradScaler(enabled=self.fp16)
Expand Down
57 changes: 32 additions & 25 deletions rtg/module/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,34 +396,41 @@ def out_vocab(self) -> Field:
else: # all others go from source as input to target as output
return self.exp.tgt_vocab

def decode_sentence(self, line: str, max_len=20, prepared=False, add_bos=False, **args) -> List[StrHypothesis]:

line = line.strip()
if prepared:
in_seq = [int(t) for t in line.split()]
if add_bos and in_seq[0] != self.bos_val:
in_seq.insert(0, self.bos_val)
if in_seq[-1] != self.eos_val:
in_seq.append(self.eos_val)
else:
in_seq = self.inp_vocab.encode_as_ids(line, add_eos=True, add_bos=add_bos)
in_seqs = tensor(in_seq, dtype=torch.long).view(1, -1)
in_lens = tensor([len(in_seq)], dtype=torch.long)
if self.debug:
greedy_score, greedy_out = self.greedy_decode(in_seqs, in_lens, max_len, **args)[0]
greedy_out = self.out_vocab.decode_ids(greedy_out, trunc_eos=True)
log.debug(f'Greedy : score: {greedy_score:.4f} :: {greedy_out}')

beams: List[List[Hypothesis]] = self.beam_decode(in_seqs, in_lens, max_len, **args)
beams = beams[0] # first sentence, the only one we passed to it as input
def decode_sentences(self, sources: List[str], max_len=20, prepared=False, add_bos=False, **args) -> List[List[StrHypothesis]]:
batch_size = len(sources)
assert batch_size > 0
src_seqs, in_lens = [], []
for source in sources:
line = source.strip()
if prepared:
in_seq = [int(t) for t in line.split()]
if add_bos and in_seq[0] != self.bos_val:
in_seq.insert(0, self.bos_val)
if in_seq[-1] != self.eos_val:
in_seq.append(self.eos_val)
else:
in_seq = self.inp_vocab.encode_as_ids(line, add_eos=True, add_bos=add_bos)
src_seqs.append(in_seq)
in_lens.append(len(in_seq))
# pad seqs for batching
max_len = max(in_lens)
in_seqs = torch.full((batch_size, max_len), fill_value=self.exp.src_vocab.pad_idx, dtype=torch.long)
for i, src_seq in enumerate(src_seqs):
in_seqs[i, :len(src_seq)] = torch.tensor(src_seq, dtype=torch.long)
in_lens = tensor(in_lens, dtype=torch.long)

batch_beams: List[List[Hypothesis]] = self.beam_decode(in_seqs, in_lens, max_len, **args)
result = []
for i, (score, beam_toks) in enumerate(beams):
out = self.out_vocab.decode_ids(beam_toks, trunc_eos=True)
if self.debug:
log.debug(f"Beam {i}: score:{score:.4f} :: {out}")
result.append((score, out))
for beams in batch_beams:
result.append([])
for score, beam_toks in beams:
out = self.out_vocab.decode_ids(beam_toks, trunc_eos=True)
result[-1].append((score, out))
return result

def decode_sentence(self, line: str,*args, **kwargs) -> List[StrHypothesis]:
return self.decode_sentences([line], *args, **kwargs)[0]

def decode_visualize(self, line: str, target=None, max_len=20, reduction=None, **args):
line = line.strip()
assert hasattr(self.model, 'cache_attn'), f'{type(self.model)} does not have cache_attn feature'
Expand Down
3 changes: 1 addition & 2 deletions rtg/module/tfmnmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,8 +783,7 @@ def train(self, steps: int, check_point: int, batch_size: int,
y_seqs_out = batch.y_seqs
y_seqs_in = torch.cat([bos_step, batch.y_seqs], dim=1)
y_mask = batch.make_autoreg_mask(y_seqs_in)

with autocast(enabled=dtorch.fp16, dtype=torch.bfloat16):
with autocast(enabled=dtorch.fp16, dtype=dtorch.fp16_dtype):
loss = self._train_step(take_step, x_mask, x_seqs, y_mask, y_seqs_in, y_seqs_out) #norm=max_toks

if stopper and take_step:
Expand Down
55 changes: 43 additions & 12 deletions rtg/serve/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import sys
import platform
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import copy
import time

import numpy as np
import rtg
Expand All @@ -25,7 +27,7 @@
app = Flask(__name__)
app.config['JSON_AS_ASCII'] = False

bp = Blueprint('nmt', __name__, template_folder='templates')
bp = Blueprint('nmt', __name__, template_folder='templates', static_folder='static')

sys_info = {
'RTG Version': rtg.__version__,
Expand All @@ -35,15 +37,15 @@
'Platform Version': platform.version(),
'Processor': platform.processor(),
'CPU Memory Used': max_RSS()[1],
'GPU': '[unavailable]',
'Cuda': '[unavailable]',
}
if torch.cuda.is_available():
sys_info['GPU'] = str(torch.cuda.get_device_properties(rtg.device))
sys_info['Cuda'] = str(torch.cuda.get_device_properties(rtg.device))
sys_info['Cuda Version'] = torch.version.cuda
else:
log.warning("CUDA unavailable")

log.info(f"System Info: ${sys_info}")
log.info(f"System Info: {sys_info}")


def render_template(*args, **kwargs):
Expand Down Expand Up @@ -92,6 +94,7 @@ def attach_translate_route(cli_args):

@bp.route("/translate", methods=["POST", "GET"])
def translate():
start_t = time.time()
if request.method not in ("POST", "GET"):
return "GET and POST are supported", 400
if request.method == 'GET':
Expand All @@ -102,17 +105,43 @@ def translate():
sources = [sources]
if not sources:
return "Please submit 'source' parameter", 400
prep = request.args.get('prep', "True").lower() in ("true", "yes", "y", "t")
prep = request.values.get('prep', "True").lower() in ("true", "yes", "y", "t", "1")
if prep:
sources = [src_prep(sent) for sent in sources]
translations = []
for source in sources:
translated = decoder.decode_sentence(source, **dec_args)[0][1]
if prep:
translated = tgt_postp(translated)
translations.append(translated)

res = dict(source=sources, translation=translations)
_dec_args = copy.deepcopy(dec_args)
num_hyp = _dec_args['num_hyp'] = int(request.values.get('num_hyp') or '1')
if num_hyp < 1 or num_hyp > 20:
return f'{num_hyp=} is invalid; expected range [1, 20]', 400

if 'lp_alpha' in request.values:
_dec_args['lp_alpha'] = float(request.values['lp_alpha'])
if 'max_len' in request.values:
_dec_args['max_len'] = int(request.values['max_len'])
if 'beam_size' in request.values:
_dec_args['beam_size'] = int(request.values['beam_size'])
if num_hyp > _dec_args.get('beam_size', 1):
_dec_args['beam_size'] = num_hyp

translations = []
scores = []
batch_outs = decoder.decode_sentences(sources, **_dec_args)
for outs in batch_outs:
if not num_hyp or num_hyp == 1: # return only one-best as str; old-api
score, translated = outs[0]
scores.append(round(score.item()))
if prep:
translated = tgt_postp(translated)
translations.append(translated)
else: # n-best as arr; new api
scores.append([round(score.item(), FLOAT_POINTS) for score, _ in outs])
hyps = [hyp for _, hyp in outs]
if prep:
hyps = [tgt_postp(hyp) for hyp in hyps]
translations.append(hyps)
res = dict(source=sources, translation=translations, score=scores,
dec_args=_dec_args,
time=time.time() - start_t, time_unit='s')
return jsonify(res)

@bp.route("/visual", methods=["POST", "GET"])
Expand Down Expand Up @@ -170,8 +199,10 @@ def parse_args():
# uwsgi can take CLI args too
# uwsgi --http 127.0.0.1:5000 --module rtg.serve.app:app --pyargv "rtgv0.5-768d9L6L-512K64K-datav1"
cli_args = parse_args()

attach_translate_route(cli_args)
app.register_blueprint(bp, url_prefix=cli_args.get('base'))

if cli_args.pop('debug'):
app.debug = True

Expand Down
Loading

0 comments on commit 141ee5d

Please sign in to comment.